Search code examples
pythontensorflowdeep-learningxgboostmulticlass-classification

Why i am getting this problem of zero accuracy when doing gradient boosting?


I am running a multi class classification problem using colab pro wherein i am training my model using vgg16 for feature extraction and using xgboost as a classifier, however when i check accuracy i am getting 0%. I ran the same code on my spyder ide on my local machine and there i am getting an accuracy of 72.6%. The code along with the output message is pasted below:

    train_images = []
    train_labels = []
    for directory_path in glob.glob("/content/drive/MyDrive/indoorCVPR_09/train/*"):
      label = directory_path.split("\\")[-1]
      print(label)
      for img_path in glob.glob(os.path.join(directory_path, "*.jpg")):
        print(img_path)
        img = cv2.imread(img_path,cv2.IMREAD_COLOR)
        img = cv2.resize(img,(SIZE,SIZE))
        img = cv2.cvtColor(img,cv2.COLOR_RGB2BGR)
        train_images.append(img)
        train_labels.append(label)
    
    
    train_images = np.array(train_images)
    train_labels = np.array(train_labels)

    test_images = []
    test_labels = []
    for directory_path in glob.glob("/content/drive/MyDrive/indoorCVPR_09/test/*"):
      fruit_label = directory_path.split("\\")[-1]
      for img_path in glob.glob(os.path.join(directory_path, "*.jpg")):
        print(img_path)
        img = cv2.imread(img_path,cv2.IMREAD_COLOR)
        img = cv2.resize(img,(SIZE,SIZE))
        img = cv2.cvtColor(img,cv2.COLOR_RGB2BGR)
        test_images.append(img)
        test_labels.append(fruit_label)
    
    test_images = np.array(test_images)
    test_labels = np.array(test_labels)
    from sklearn import preprocessing
    le = preprocessing.LabelEncoder()
    le.fit(test_labels)
    test_labels_encoded = le.transform(test_labels)
    le.fit(train_labels)
    train_labels_encoded = le.transform(train_labels)
    
    x_train,y_train,x_test,y_test = train_images,train_labels_encoded,test_images,test_labels_encoded
    x_train = x_train/255
    print(x_train)
    VGG_model = VGG16(weights='imagenet', include_top=False , input_shape=(SIZE,SIZE,3))
    
    for layer in VGG_model.layers:
      layer.trainable = False
    
    VGG_model.summary()
    feature_extractor = VGG_model.predict(x_train)
    features = feature_extractor.reshape(feature_extractor.shape[0],-1)
    X_for_training = features


    import xgboost as xgb
    model = xgb.XGBClassifier()
    model.fit(X_for_training,y_train)
#Output of the section above:
XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=3,
              min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
              nthread=None, objective='multi:softprob', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)


    X_test_feature = VGG_model.predict(x_test)
    X_test_features = X_test_feature.reshape(X_test_feature.shape[0], -1)


    prediction = model.predict(X_test_features)
    prediction = le.inverse_transform(prediction)
    from sklearn import metrics
    print("Accuracy = ",metrics.accuracy_score(test_labels,prediction))
#Output of accuracy:
Accuracy =  0.0

Solution

  • Have you checked the data types of test_labels and prediction? I ran into a problem very similar to this and found out that I was trying to calculate the accuracy score of strings and np.int64. After I converted them both to the same data type, I no longer got an accuracy score of 0.