Loading and saving images directory in numpy
Connect your colab to local runtime
Purpose of the blog:
Load data similar to MNIST style dataset. This is particulary useful to feed your data directoly in many ML pipeline such as scikit-learn.
import cv2
import numpy as np
class_label = {'male':0, 'female': 1}
train_dir = os.path.join(DATASET_PATH, 'Training')
valid_dir = os.path.join(DATASET_PATH, 'Validation')
def datagen(output_size):
X, y = [], []
for cat in os.listdir(train_dir):
cat_dir = train_dir + '/' + cat
for img_file in os.listdir(cat_dir):
img_path = cat_dir + '/' + img_file
img = cv2.imread(img_path)
img = cv2.resize(img, dsize=(output_size, output_size), interpolation=cv2.INTER_CUBIC)
X.append(img)
y.append(class_label[str(cat)])
for cat in os.listdir(valid_dir):
cat_dir = valid_dir + '/' + cat
for img_file in os.listdir(cat_dir):
img_path = cat_dir + '/' + img_file
img = cv2.imread(img_path)
img = cv2.resize(img, dsize=(output_size, output_size), interpolation=cv2.INTER_CUBIC)
X.append(img)
y.append(class_label[str(cat)])
X = np.array(X)
y = np.array(y)
print(X.shape)
print(y.shape)
return X, y
X , y = datagen(output_size = 64)
X = X.astype("float") / 255.0
np.save(saveFile_X, X)
np.save(saveFile_y, y)
Load the saved data
X = np.load(saveFile_X)
y = np.load(saveFile_y)
Split your dataset using scikit learn
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y)
Note that stratify is useful as it maintains the class proportions during split.
Written on July 19, 2020