I have viewed the two questions on this site for creating a pickled data file of my image data (Similar to mnist.pkl.gz). Even though I understand that pickling is not necessary, I would like help in figuring out why my data is not getting pickled after typing the below code for carrying out the same.
Class
0
1
2
0
0
1
.
. and so on.
The problem here, is that only the csv file (The image labels) are getting pickled and not the image data.
These images are in the same directory as the code below.
from PIL import Image
from numpy import genfromtxt
import gzip, cPickle
import pickle
from glob import glob
import numpy as np
import pandas as pd
def dir_to_dataset(glob_files, loc_train_labels=""):
print("Gonna process:\n\t %s"%glob_files)
dataset = []
for file_count, file_name in enumerate( sorted(glob(glob_files),key=len) ):
print file_name
print 'Are we in the loop ?'
image = Image.open(file_name)
img = Image.open(file_name).convert('LA') #tograyscale
pixels = [f[0] for f in list(img.getdata())]
dataset.append(pixels)
if file_count % 10== 0:
print("\t %s files processed"%file_count)
# outfile = glob_files+"out"
# np.save(outfile, dataset)
if len(loc_train_labels) > 0:
df = pd.read_csv(loc_train_labels)
return np.array(dataset), np.array(df["Class"])
else:
return np.array(dataset)
Dataa, y = dir_to_dataset("image\\*.bmp","trainLabels.csv")
# Data and labels are read
train_set_x = Dataa[:30]
val_set_x = Dataa[31:40]
test_set_x = Dataa[41:50]
train_set_y = y[:30]
val_set_y = y[31:40]
test_set_y = y[41:50]
# Divided dataset into 3 parts. I had 6281 images.
train_set = train_set_x, train_set_y
print 'Type of train_set_x',type(train_set_x)
print train_set_x
val_set = val_set_x, val_set_y
test_set = test_set_x, val_set_y
dataset = [train_set, val_set, test_set]
f = gzip.open('traffic_file.pkl.gz','wb')
pickle.dump(dataset, f, protocol=2)
f.close()
Changing the regular expression to *.bmp made it work.