Creating and sourcing python virtual environment
virtualenv NAME source NAME/bin/activate which python
To deactivate
deactivate
Equivalent ways of filtering a list
list(filter(lambda n: n % 2 == 0, range(10))) [n for n in range(10) if n % 2 == 0]
Useful dependencies stack:
pip install numpy pandas pillow opencv-python dominate torch pickle-mixin scipy
Install pip dependency from github
pip install -U git+https://github.com/pytorch/vision
Install conditional version
pip install "pillow<7"
import cv2 img = cv2.imread("image.png") #numpy.ndarray
Progress bar for iterations
from tqdm import tqdm from tqdm import tqdm_notebook as tqdm #for Google Colab for i in tqdm(range(100)): pass
Spreading dictionary (useful in passing arguments to functions or augmenting dictionaries/arrays)
d = {'one': 1, 'two': 2} {*d} # {'one', 'two'} {**d} #{'one': 1, 'two': 2}
Dictionaries can be created from arrays analogously to arrays
boolValues = {i: True for i in some_array}
Iterating dictionary over keys and values
for (key, val) in db.items(): print(key, val)
Equivalent ways of appending data to list
res = [] for key, elem in hmap.items(): res = [*res, {key, np.median(elem)}] for key, elem in hmap.items(): res.append({key, np.median(elem)})
Reading/writing file
with open('old_request.txt') as file: contents = file.read() file.write("text")
Display an image from the matrix x using imshow or matshow
import matplotlib.pyplot as plt plt.imshow(x)
import matplotlib.pyplot as plt plt.matshow(x)
Show a grid of images (2 x 4)
fig = plt.figure() for i in range(7): ax = plt.subplot(2, 4, i + 1) plt.show(imgs[i]) plt.tight_layout() plt.set_title(f"Sample {i}") ax.axis("off")
Useful options
fig = plt.figure(figsize=(10,6)) plt.grid(True) ax.legend(['AU', 'GB', 'US']) ax.set_xlabel("Check Out hour"); ax.set_xticks(range(0,24));
import pandas as pd df = pd.read_csv("./file.csv") df.head() df.keys()
Generic
df = df.rename(columns={"id": "paper_sha", "paragraph": "text"}) df = df.rename_axis("_id") df.dropna() df.dropna(subset=['name', 'born']) df.to_csv('csv2sql2.csv',index=True)
Take certain columns
df.loc[:, cols[4]:cols[10]] df.loc[:, cols[4]:] df.loc[:, cols[4], cols[-1]]
Delete column
del df['column_name']
Get unique values
metadata['source_x'].unique()
Filter data
df.loc[df['table'] == True]
Sum column entries
some_partial_sum = [] for i in df.keys(): col_sum = df[i].sum() sum_partial_sum.append(col_sum)
Reshaping data for ML models
days = np.array([i for i in range(len(dates))]).reshape(-1, 1) some_partial_sum = np.array(some_partial_sum).reshape(-1, 1)
Find total number of rows with missing entries
df.isnull().sum().sum()
Constructing datetime structure from a given start date
import datetime start = '1/22/2020' start_date = datetime.datetime.strptime(start, '%m/%d/%Y') future_forcast_dates = [] for i in range(len(future_forcast)): future_forcast_dates.append((start_date + datetime.timedelta(days=i)).strftime('%m/%d/%Y')) adjusted_dates = future_forcast_dates[:-5]
Train-test split
from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(x_vals, y_vals, test_size=0.1, shuffle=False)
Iterating over files in folder
import os for f in os.listdir("folder"): path = os.path.join("folder/", f)
One hot encoding for N categories could be obtained as vectors from identity matrix
import numpy as np encoding = np.eye(N)
Counting True elements in the list
[True, False, True, True].count(True)
Move color channel of input_tensor from 0 to 2 index
np.einsum('ijk->jki', input_tensor)
Alternatively tensor can be “transposed” to move color channel as
t.transpose((1,2,0))
Valentines plot
import numpy as np import matplotlib.pyplot as plt import base64 fig = plt.figure() ax = fig.gca() t = np.linspace(0, 2 * np.pi, 100) x = 16*np.sin(t)**3 y = 13*np.cos(t) - 5*np.cos(2*t) - 2*np.cos(3*t) - np.cos(4*t) ax.plot(x, y) plt.axis('off') plt.text(min(x)/2, 0, base64.b64decode(b'SGFwcHkgVmFsZW50aW5lcyBicm8h').decode('utf-8'), color="red", fontsize=14) plt.show()