====== Python ======
===== Virtual environment =====
Creating and sourcing python virtual environment
virtualenv NAME
source NAME/bin/activate
which python
To deactivate
deactivate
===== Functional =====
Equivalent ways of filtering a list
list(filter(lambda n: n % 2 == 0, range(10)))
[n for n in range(10) if n % 2 == 0]
===== pip =====
Useful dependencies stack:
pip install numpy pandas pillow opencv-python dominate torch pickle-mixin scipy
Install pip dependency from github
pip install -U git+https://github.com/pytorch/vision
Install conditional version
pip install "pillow<7"
===== Images =====
import cv2
img = cv2.imread("image.png") #numpy.ndarray
===== Useful =====
Progress bar for iterations
from tqdm import tqdm
from tqdm import tqdm_notebook as tqdm #for Google Colab
for i in tqdm(range(100)):
pass
Spreading dictionary (useful in passing arguments to functions or augmenting dictionaries/arrays)
d = {'one': 1, 'two': 2}
{*d} # {'one', 'two'}
{**d} #{'one': 1, 'two': 2}
Dictionaries can be created from arrays analogously to arrays
boolValues = {i: True for i in some_array}
Iterating dictionary over keys and values
for (key, val) in db.items():
print(key, val)
Equivalent ways of appending data to list
res = []
for key, elem in hmap.items():
res = [*res, {key, np.median(elem)}]
for key, elem in hmap.items():
res.append({key, np.median(elem)})
Reading/writing file
with open('old_request.txt') as file:
contents = file.read()
file.write("text")
===== matplotlib =====
Display an image from the matrix ''x'' using ''imshow'' or ''matshow''
import matplotlib.pyplot as plt
plt.imshow(x)
import matplotlib.pyplot as plt
plt.matshow(x)
Show a grid of images (2 x 4)
fig = plt.figure()
for i in range(7):
ax = plt.subplot(2, 4, i + 1)
plt.show(imgs[i])
plt.tight_layout()
plt.set_title(f"Sample {i}")
ax.axis("off")
Useful options
fig = plt.figure(figsize=(10,6))
plt.grid(True)
ax.legend(['AU', 'GB', 'US'])
ax.set_xlabel("Check Out hour");
ax.set_xticks(range(0,24));
===== pandas =====
import pandas as pd
df = pd.read_csv("./file.csv")
df.head()
df.keys()
Generic
df = df.rename(columns={"id": "paper_sha", "paragraph": "text"})
df = df.rename_axis("_id")
df.dropna()
df.dropna(subset=['name', 'born'])
df.to_csv('csv2sql2.csv',index=True)
Take certain columns
df.loc[:, cols[4]:cols[10]]
df.loc[:, cols[4]:]
df.loc[:, cols[4], cols[-1]]
Delete column
del df['column_name']
Get unique values
metadata['source_x'].unique()
Filter data
df.loc[df['table'] == True]
Sum column entries
some_partial_sum = []
for i in df.keys():
col_sum = df[i].sum()
sum_partial_sum.append(col_sum)
Reshaping data for ML models
days = np.array([i for i in range(len(dates))]).reshape(-1, 1)
some_partial_sum = np.array(some_partial_sum).reshape(-1, 1)
Find total number of rows with missing entries
df.isnull().sum().sum()
Constructing ''datetime'' structure from a given start date
import datetime
start = '1/22/2020'
start_date = datetime.datetime.strptime(start, '%m/%d/%Y')
future_forcast_dates = []
for i in range(len(future_forcast)):
future_forcast_dates.append((start_date + datetime.timedelta(days=i)).strftime('%m/%d/%Y'))
adjusted_dates = future_forcast_dates[:-5]
===== Useful in ML =====
Train-test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x_vals, y_vals, test_size=0.1, shuffle=False)
Iterating over files in ''folder''
import os
for f in os.listdir("folder"):
path = os.path.join("folder/", f)
One hot encoding for N categories could be obtained as vectors from identity matrix
import numpy as np
encoding = np.eye(N)
Counting ''True'' elements in the list
[True, False, True, True].count(True)
Move color channel of ''input_tensor'' from 0 to 2 index
np.einsum('ijk->jki', input_tensor)
Alternatively tensor can be "transposed" to move color channel as
t.transpose((1,2,0))
===== Fun =====
Valentines plot
import numpy as np
import matplotlib.pyplot as plt
import base64
fig = plt.figure()
ax = fig.gca()
t = np.linspace(0, 2 * np.pi, 100)
x = 16*np.sin(t)**3
y = 13*np.cos(t) - 5*np.cos(2*t) - 2*np.cos(3*t) - np.cos(4*t)
ax.plot(x, y)
plt.axis('off')
plt.text(min(x)/2, 0, base64.b64decode(b'SGFwcHkgVmFsZW50aW5lcyBicm8h').decode('utf-8'), color="red", fontsize=14)
plt.show()