Search code examples
pythonpandastensorflowkeras

KeyError: 'filename' (Pandas)


I get this error when modeling. I can't figure out where this error comes from.

The whole algorithm is designed to describe products (clothes). This part recognizes the colors of clothes.

Dataframe loads correctly.

Below I put a lot of code because I don't know exactly where I made the mistake.

# path to the training set
TRAIN_LABELS_FILE = "train/labels.txt"
# path to the validation set
VAL_LABELS_FILE = "val/labels.txt"
# path to the test set
TEST_LABELS_FILE = "test/labels.txt"
# Color names
COLOR_FILE = "names.txt"

# Specify image size
IMG_WIDTH = 224
IMG_HEIGHT = 224
CHANNELS = 3

color = pd.read_csv(COLOR_FILE)
color = color.T
color_list = list(color.iloc[0])
color_list.insert(0,'beige')
color_list.insert(0,'path')

train = pd.read_csv(TRAIN_LABELS_FILE,sep=" ",names=color_list)

def crop_image_from_gray(img, tol=7):
    """
    Applies masks to the orignal image and 
    returns the a preprocessed image with 
    3 channels
    """
    # If for some reason we only have two channels
    if img.ndim == 2:
        mask = img > tol
        return img[np.ix_(mask.any(1),mask.any(0))]
    # If we have a normal RGB images
    elif img.ndim == 3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        mask = gray_img > tol

        check_shape = img[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
        if (check_shape == 0): # image is too dark so that we crop out everything,
            return img # return original image
        else:
            img1=img[:,:,0][np.ix_(mask.any(1),mask.any(0))]
            img2=img[:,:,1][np.ix_(mask.any(1),mask.any(0))]
            img3=img[:,:,2][np.ix_(mask.any(1),mask.any(0))]
            img = np.stack([img1,img2,img3],axis=-1)
        return img

def preprocess_image(image, sigmaX=10):
    """
    The whole preprocessing pipeline:
    1. Read in image
    2. Apply masks
    3. Resize image to desired size
    4. Add Gaussian noise to increase Robustness
    """
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = crop_image_from_gray(image)
    image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))
    image = cv2.addWeighted (image,4, cv2.GaussianBlur(image, (0,0) ,sigmaX), -4, 128)
    return image

from keras.preprocessing.image import ImageDataGenerator
BATCH_SIZE = 4

# Add Image augmentation to our generator
train_datagen = ImageDataGenerator(rotation_range=360,
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                   validation_split=0.15,
                                   preprocessing_function=preprocess_image, 
                                   rescale=1 / 128.)

# Use the dataframe to define train and validation generators
train_generator = train_datagen.flow_from_dataframe(train, 
                                                    #x_col='id_code', 
                                                    y_col=color_list[1:],
                                                    directory = 'train/images/',
                                                    target_size=(IMG_WIDTH, IMG_HEIGHT),
                                                    batch_size=BATCH_SIZE,
                                                    class_mode=None, 
                                                    subset='training')

val_generator = train_datagen.flow_from_dataframe(train, 
                                                  #x_col='id_code', 
                                                  y_col=color_list[1:],
                                                  directory = 'train/images/',
                                                  target_size=(IMG_WIDTH, IMG_HEIGHT),
                                                  batch_size=BATCH_SIZE,
                                                  class_mode=None,
                                                  subset='validation')

I also put the whole Traceback error below.

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2656             try:
-> 2657                 return self._engine.get_loc(key)
   2658             except KeyError:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'filename'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-15-d57c90ec4a7f> in <module>
     18                                                     batch_size=BATCH_SIZE,
     19                                                     class_mode=None,
---> 20                                                     subset='training')
     21 
     22 val_generator = train_datagen.flow_from_dataframe(train, 

/usr/local/lib/python3.6/dist-packages/keras/preprocessing/image.py in flow_from_dataframe(self, dataframe, directory, x_col, y_col, weight_col, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, save_to_dir, save_prefix, save_format, subset, interpolation, validate_filenames, **kwargs)
    592             interpolation=interpolation,
    593             validate_filenames=validate_filenames,
--> 594             **kwargs
    595         )
    596 

/usr/local/lib/python3.6/dist-packages/keras/preprocessing/image.py in __init__(self, dataframe, directory, image_data_generator, x_col, y_col, weight_col, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, data_format, save_to_dir, save_prefix, save_format, subset, interpolation, dtype, validate_filenames)
    233             interpolation=interpolation,
    234             dtype=dtype,
--> 235             validate_filenames=validate_filenames)
    236 
    237 

/usr/local/lib/python3.6/dist-packages/keras_preprocessing/image/dataframe_iterator.py in __init__(self, dataframe, directory, image_data_generator, x_col, y_col, weight_col, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, data_format, save_to_dir, save_prefix, save_format, subset, interpolation, dtype, validate_filenames)
    127         self.dtype = dtype
    128         # check that inputs match the required class_mode
--> 129         self._check_params(df, x_col, y_col, weight_col, classes)
    130         if validate_filenames:  # check which image files are valid and keep them
    131             df = self._filter_valid_filepaths(df, x_col)

/usr/local/lib/python3.6/dist-packages/keras_preprocessing/image/dataframe_iterator.py in _check_params(self, df, x_col, y_col, weight_col, classes)
    179             )
    180         # check that filenames/filepaths column values are all strings
--> 181         if not all(df[x_col].apply(lambda x: isinstance(x, str))):
    182             raise TypeError('All values in column x_col={} must be strings.'
    183                             .format(x_col))

/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in __getitem__(self, key)
   2925             if self.columns.nlevels > 1:
   2926                 return self._getitem_multilevel(key)
-> 2927             indexer = self.columns.get_loc(key)
   2928             if is_integer(indexer):
   2929                 indexer = [indexer]

/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2657                 return self._engine.get_loc(key)
   2658             except KeyError:
-> 2659                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2660         indexer = self.get_indexer([key], method=method, tolerance=tolerance)
   2661         if indexer.ndim > 1 or indexer.size > 1:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'filename'

Solution

  • According to keras doc, the default option for x_col in flow_from_dataframe is 'filename'. Since your code doesn't pass x_col, flow_from_dataframe assumes the default and looks for it in the dataframe.