Testing weighted categorical cross entropy for multiple classes in keras with tensorflow backend

I have an issue that seems to have no straight forward solution in Keras. My server runs on ubuntu 14.04, Keras with backend tensorflow. It has 4 Nvidia Geforce gtx1080 GPUs.

I am trying to test the best available implementation of weighted categorical cross entropy( commented on Jan20,2017). The code pasted below is reproducible for the error shown below.

The input array Xtrain is of shape (800,40) where 800 indicates the number of samples and 40 represents the input feature dimension. Similarly Xtest is of shape (400,40). The problem is of a multiclass scenario where the number of classes is three. Following code is used to implement but an error is showing up indicating a GPU and batchsize mismatch, which is difficult to address, please provide some pointers to address this.

import keras
from keras.models import Sequential, Model, load_model
from keras.layers.embeddings import Embedding
from keras.layers.core import Activation, Dense, Dropout, Reshape
from keras.optimizers import SGD, Adam, RMSprop
#from keras.layers import TimeDistributed,Merge, Conv1D, Conv2D, Flatten, MaxPooling2D, Conv2DTranspose, UpSampling2D, RepeatVector

#keras.layers.recurrent import GRU, LSTM
#from keras.datasets.data_utils import get_file
#import tarfile
from functools import partial, update_wrapper
from keras.callbacks import TensorBoard
from time import time
from sklearn.model_selection import KFold
import numpy as np
from keras.callbacks import EarlyStopping
import tensorflow as tf
from keras import backend as K
from keras.layers import Input, Lambda
import os
from keras import optimizers
from matplotlib import pyplot
from sklearn.preprocessing import MinMaxScaler
#os.export CUDA_VISIBLE_DEVICES="0,1"
import keras, sys
from matplotlib import pyplot
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
#from keras.utils import np_utils
from itertools import product
from keras.layers import Input

def w_categorical_crossentropy(y_true, y_pred, weights):
    nb_cl = weights.shape[1]#len(weights[0,:])
    print weights.shape
    print nb_cl
    print y_pred
    print y_true
    final_mask = K.zeros_like(y_pred[:, 0])
    y_pred_max = K.max(y_pred, axis=1)#returns maximum value along an axis in a tensor
    y_pred_max = K.reshape(y_pred_max, (K.shape(y_pred)[0], 1))
    y_pred_max_mat = K.cast(K.equal(y_pred, y_pred_max), K.floatx())
    for c_p, c_t in product(range(nb_cl), range(nb_cl)):
        final_mask += (weights[c_t, c_p] *y_pred_max_mat[:, c_p]*y_true[:, c_t])
    return K.categorical_crossentropy(y_true,y_pred) * final_mask

#def get_mat_data(add,in1,in2):
    # Assuming sample_matlab_file.mat has 2 matrices A and B
    #matData =
    #matrixA = matData[in1]
    #matrixA1 = matData[in2]
    #matrixB = matData['Ytrain']
    #matrixB1 = matData['Ytest']
    #weights = matData['w']
    #matrixC = matData['Ytrainclassify']
    #matrixC1 = matData['Ytestclassify']
    #nfold = matData['nfold']
    #return matrixA, matrixA1, matrixB, matrixB1, weights, matrixC, matrixC1, nfold 
def wrapped_partial(func, *args, **kwargs):
    partial_func = partial(func, *args, **kwargs)
    update_wrapper(partial_func, func)
    return partial_func

def gen_model():
    input = Input(shape=(40,))  
    # m1.add(conv_model)
    # #m1.add(Conv2D(15, (5,5), strides=(1, 1),activation='relu', input_shape=(1,30,125), kernel_initializer='glorot_uniform'))#temporal filters theano
    # m1.add(Dropout(0.2))
    # #m1.add(Conv2D(15, (5,1), strides=(1, 1),activation='relu',kernel_initializer='glorot_uniform'))#spatial filters
    # #m1.add(Dropout(0.2))
    # m1.add(Flatten())
    # m1.add(Dropout(0.2))
    x1 =(Dense(200,activation='relu',name='dense_1'))(input)
    x2 =(Dropout(0.2))(x1)
    x3 =(Dense(100,activation='relu',name='dense_2'))(x2)
    x4 =(Dropout(0.2))(x3)
    x5 =(Dense(3,activation='softmax',name='softmax_layer'))(x4)
    model = Model(input=input, output=[x5])
    return model

    #in1 = 'Xtrain'
    #in2 = 'Xtest'
    #add = '/home/tharun/all_mat_files/test_keras.mat'
    #Xtrain, Xtest, Ytrain, Ytest, weights, Ytrainclassify, Ytestclassify, nfold = get_mat_data(add,in1,in2)
    Ytrain = np.random.randint(3, size=(800, 1))
    Ytest = np.random.randint(3, size=(400, 1))
    Ytrainclassify = Ytrain
    Ytestclassify = Ytrain
    nb_classes = 3
    print Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape, Ytrainclassify.shape, Ytestclassify.shape
    wts = np.ones((3,3))
    print 'wts:' 
    print wts.shape
    # convert class vectors to binary class matrices
    Y_train = keras.utils.to_categorical(Ytrainclassify[:,None], nb_classes)
    Y_test = keras.utils.to_categorical(Ytestclassify[:,None], nb_classes)

    print Xtrain.shape
    print Y_train.shape
    print Xtest.shape
    print Y_test.shape
    ncce = wrapped_partial(w_categorical_crossentropy, weights=wts)
    batch_size = 10
    nb_classes = 3
    nb_epoch = 1
    #model.compile(loss=ncce, optimizer="adam")
    rms = SGD()
    model.compile(loss=ncce, optimizer=rms), Y_train,batch_size=batch_size, nb_epoch=nb_epoch)
    model.evaluate(Xtest, Y_test)
    #print('Test score:', score[0])
    #print('Test accuracy:', score[1])

    #saving weights'model_classify_weights.h5')


python /home/tharun/keras_workshop/ 

/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/h5py/ FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
(800, 40) (400, 40) (800, 1) (400, 1) (1, 3) (800, 1) (400, 1)
(3, 3)
(800, 40)
(800, 3)
(400, 40)
(400, 3)
/home/tharun/keras_workshop/ UserWarning: Update your `Model` call to the Keras 2 API: `Model(outputs=[<tf.Tenso..., inputs=Tensor("in...)`
  model = Model(input=input, output=[x5])
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 40)                0         
dense_1 (Dense)              (None, 200)               8200      
dropout_1 (Dropout)          (None, 200)               0         
dense_2 (Dense)              (None, 100)               20100     
dropout_2 (Dropout)          (None, 100)               0         
softmax_layer (Dense)        (None, 3)                 303       
Total params: 28,603
Trainable params: 28,603
Non-trainable params: 0
(?, 3)
Tensor("softmax_layer_target:0", shape=(?, ?), dtype=float32)
[[array([1.41292294]) 1 1]
 [1 array([7.328564]) 1]
 [1 1 array([2.38611435])]]
/home/tharun/keras_workshop/ UserWarning: The `nb_epoch` argument in `fit` has been renamed `epochs`., Y_train,batch_size=batch_size, nb_epoch=nb_epoch)
Epoch 1/1
2018-02-13 15:41:44.382214: I tensorflow/core/platform/] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
2018-02-13 15:41:44.758387: I tensorflow/core/common_runtime/gpu/] Found device 0 with properties: 
name: GeForce GTX 1080 major: 6 minor: 1 memoryClockRate(GHz): 1.7715
pciBusID: 0000:05:00.0
totalMemory: 7.92GiB freeMemory: 7.42GiB
2018-02-13 15:41:44.992640: I tensorflow/core/common_runtime/gpu/] Found device 1 with properties: 
name: GeForce GTX 1080 major: 6 minor: 1 memoryClockRate(GHz): 1.7715
pciBusID: 0000:06:00.0
totalMemory: 7.92GiB freeMemory: 7.80GiB
2018-02-13 15:41:45.225696: I tensorflow/core/common_runtime/gpu/] Found device 2 with properties: 
name: GeForce GTX 1080 major: 6 minor: 1 memoryClockRate(GHz): 1.7715
pciBusID: 0000:09:00.0
totalMemory: 7.92GiB freeMemory: 7.80GiB
2018-02-13 15:41:45.458070: I tensorflow/core/common_runtime/gpu/] Found device 3 with properties: 
name: GeForce GTX 1080 major: 6 minor: 1 memoryClockRate(GHz): 1.7715
pciBusID: 0000:0a:00.0
totalMemory: 7.92GiB freeMemory: 7.80GiB
2018-02-13 15:41:45.461078: I tensorflow/core/common_runtime/gpu/] Device peer to peer matrix
2018-02-13 15:41:45.461151: I tensorflow/core/common_runtime/gpu/] DMA: 0 1 2 3 
2018-02-13 15:41:45.461160: I tensorflow/core/common_runtime/gpu/] 0:   Y Y Y Y 
2018-02-13 15:41:45.461165: I tensorflow/core/common_runtime/gpu/] 1:   Y Y Y Y 
2018-02-13 15:41:45.461170: I tensorflow/core/common_runtime/gpu/] 2:   Y Y Y Y 
2018-02-13 15:41:45.461175: I tensorflow/core/common_runtime/gpu/] 3:   Y Y Y Y 
2018-02-13 15:41:45.461191: I tensorflow/core/common_runtime/gpu/] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: GeForce GTX 1080, pci bus id: 0000:05:00.0, compute capability: 6.1)
2018-02-13 15:41:45.461198: I tensorflow/core/common_runtime/gpu/] Creating TensorFlow device (/device:GPU:1) -> (device: 1, name: GeForce GTX 1080, pci bus id: 0000:06:00.0, compute capability: 6.1)
2018-02-13 15:41:45.461204: I tensorflow/core/common_runtime/gpu/] Creating TensorFlow device (/device:GPU:2) -> (device: 2, name: GeForce GTX 1080, pci bus id: 0000:09:00.0, compute capability: 6.1)
2018-02-13 15:41:45.461209: I tensorflow/core/common_runtime/gpu/] Creating TensorFlow device (/device:GPU:3) -> (device: 3, name: GeForce GTX 1080, pci bus id: 0000:0a:00.0, compute capability: 6.1)
Traceback (most recent call last):
  File "/home/tharun/keras_workshop/", line 239, in <module>
  File "/home/tharun/keras_workshop/", line 176, in main, Y_train,batch_size=batch_size, nb_epoch=nb_epoch)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/", line 1598, in fit
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/", line 1183, in _fit_loop
    outs = f(ins_batch)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/backend/", line 2273, in __call__
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/client/", line 889, in run
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/client/", line 1120, in _run
    feed_dict_tensor, options, run_metadata)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/client/", line 1317, in _do_run
    options, run_metadata)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/client/", line 1336, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [3] vs. [10]
     [[Node: training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@loss/softmax_layer_loss/mul_20"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/Shape, training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/Shape_1)]]
     [[Node: loss/mul/_19 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_806_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op u'training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/BroadcastGradientArgs', defined at:
  File "/home/tharun/keras_workshop/", line 239, in <module>
  File "/home/tharun/keras_workshop/", line 176, in main, Y_train,batch_size=batch_size, nb_epoch=nb_epoch)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/", line 1575, in fit
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/", line 960, in _make_train_function
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/legacy/", line 87, in wrapper
    return func(*args, **kwargs)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/", line 156, in get_updates
    grads = self.get_gradients(loss, params)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/", line 73, in get_gradients
    grads = K.gradients(loss, params)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/backend/", line 2310, in gradients
    return tf.gradients(loss, variables, colocate_gradients_with_ops=True)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/", line 581, in gradients
    grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/", line 353, in _MaybeCompile
    return grad_fn()  # Exit early
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/", line 581, in <lambda>
    grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/", line 742, in _MulGrad
    rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/", line 532, in _broadcast_gradient_args
    "BroadcastGradientArgs", s0=s0, s1=s1, name=name)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/", line 787, in _apply_op_helper
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/", line 2956, in create_op
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

...which was originally created as op u'loss/softmax_layer_loss/mul_20', defined at:
  File "/home/tharun/keras_workshop/", line 239, in <module>
  File "/home/tharun/keras_workshop/", line 174, in main
    model.compile(loss=ncce, optimizer=rms)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/", line 850, in compile
    sample_weight, mask)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/keras/engine/", line 466, in weighted
    score_array *= weights
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/", line 894, in binary_op_wrapper
    return func(x, y, name=name)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/", line 1117, in _mul_dispatch
    return gen_math_ops._mul(x, y, name=name)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/ops/", line 2726, in _mul
    "Mul", x=x, y=y, name=name)
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/", line 787, in _apply_op_helper
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/", line 2956, in create_op
  File "/home/tharun/anaconda2/envs/kerasdl/lib/python2.7/site-packages/tensorflow/python/framework/", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): Incompatible shapes: [3] vs. [10]
     [[Node: training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@loss/softmax_layer_loss/mul_20"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/Shape, training/SGD/gradients/loss/softmax_layer_loss/mul_20_grad/Shape_1)]]
     [[Node: loss/mul/_19 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_806_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]


  • The issue is with input labels arrays (Ytrainclassify). It had to be reshaped to (Ytrainclassify.shape[0],). The working code is shared below without errors.

    import keras
    from keras.models import Sequential, Model, load_model
    from keras.layers.embeddings import Embedding
    from keras.layers.core import Activation, Dense, Dropout, Reshape
    from keras.optimizers import SGD, Adam, RMSprop
    #from keras.layers import TimeDistributed,Merge, Conv1D, Conv2D, Flatten, MaxPooling2D, Conv2DTranspose, UpSampling2D, RepeatVector
    #from keras.layers.recurrent import GRU, LSTM
    #from keras.datasets.data_utils import get_file
    #import tarfile
    from ipdb import set_trace as bp
    from functools import partial, update_wrapper
    from keras.callbacks import TensorBoard
    from time import time
    from sklearn.model_selection import KFold
    import numpy as np
    from keras.callbacks import EarlyStopping
    import tensorflow as tf
    from keras import backend as K
    from keras.layers import Input, Lambda
    import os
    from keras import optimizers
    from matplotlib import pyplot
    from sklearn.preprocessing import MinMaxScaler
    #os.export CUDA_VISIBLE_DEVICES="0,1"
    import keras, sys
    from matplotlib import pyplot
    from keras.wrappers.scikit_learn import KerasRegressor
    from sklearn.model_selection import cross_val_score
    from sklearn.model_selection import KFold
    from sklearn.preprocessing import StandardScaler
    from sklearn.pipeline import Pipeline
    #from keras.utils import np_utils
    from itertools import product
    from keras.layers import Input
    # Custom loss function with costs
    def w_categorical_crossentropy(y_true, y_pred, weights):
        nb_cl = weights.shape[1]#len(weights[0,:])
        print "dbg \n\n\n\n\n\n\n\n\n\n"
        print weights.shape
        print nb_cl
        print y_pred
        print y_true
        final_mask = K.zeros_like(y_pred[:, 0])
        print final_mask
        y_pred_max = K.max(y_pred, axis=1)#returns maximum value along an axis in a tensor
        print y_pred_max
        y_pred_max = K.reshape(y_pred_max, (K.shape(y_pred)[0], 1))
        print y_pred_max
        y_pred_max_mat = K.cast(K.equal(y_pred, y_pred_max), K.floatx())
        print y_pred_max_mat
        for c_p, c_t in product(range(nb_cl), range(nb_cl)):
            final_mask += (weights[c_t, c_p] *y_pred_max_mat[:, c_p]*y_true[:, c_t])
        return K.categorical_crossentropy(y_true,y_pred) * final_mask
    # def joint_classificatn_regressn_loss(x1,ytrn,x2,ytst,w):
    #     return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))
    #def get_mat_data(add,in1,in2):
        # Assuming sample_matlab_file.mat has 2 matrices A and B
        #matData =
        #matrixA = matData[in1]
        #matrixA1 = matData[in2]
        #matrixB = matData['Ytrain']
        #matrixB1 = matData['Ytest']
        #weights = matData['w']
        #matrixC = matData['Ytrainclassify']
        #matrixC1 = matData['Ytestclassify']
        #nfold = matData['nfold']
        #return matrixA, matrixA1, matrixB, matrixB1, weights, matrixC, matrixC1, nfold 
    #load riemannian features from matlab
    #phase I
    #train and test three DNN models
    # def cutomized_loss(args):
    #     #A is from the training data
    #     #S is the internal state
    #     A, A, S, S = args
    #     #customize your own loss components
    #     loss1 = K.mean(K.square(A-A),axis=-1)
    #     loss1 = K.mean(K.square(A-A),axis=-1)
    def wrapped_partial(func, *args, **kwargs):
        partial_func = partial(func, *args, **kwargs)
        update_wrapper(partial_func, func)
        return partial_func
    def gen_model():
        input = Input(shape=(40,))  
        # m1.add(conv_model)
        # #m1.add(Conv2D(15, (5,5), strides=(1, 1),activation='relu', input_shape=(1,30,125), kernel_initializer='glorot_uniform'))#temporal filters theano
        # m1.add(Dropout(0.2))
        # #m1.add(Conv2D(15, (5,1), strides=(1, 1),activation='relu',kernel_initializer='glorot_uniform'))#spatial filters
        # #m1.add(Dropout(0.2))
        # m1.add(Flatten())
        # m1.add(Dropout(0.2))
        x1 =(Dense(200,activation='relu',name='dense_1'))(input)
        x2 =(Dropout(0.2))(x1)
        x3 =(Dense(100,activation='relu',name='dense_2'))(x2)
        x4 =(Dropout(0.2))(x3)
        x5 =(Dense(3,activation='softmax',name='softmax_layer'))(x4)
        model = Model(input=input, output=[x5])
        return model
        #x6 =(Dropout(0.2))(x5)
    def main():
        # print command line arguments
        # for arg in sys.argv[1:]:
        #     print arg
        batch_size = 10
        nb_classes = 3
        nb_epoch = 1
        Ytrain = np.random.randint(3, size=(800, 1))
        Ytest = np.random.randint(3, size=(400, 1))
        Ytrainclassify = Ytrain
        Ytestclassify = Ytrain
        #add = '/home/tharun/all_mat_files/'#+sys.argv[1]
        #in1 = 'Xfff'
        #in2 = 'Xtestf'
        #in1 = 'Xtrain'
        #in2 = 'Xtest'
        #add = '/home/tharun/all_mat_files/test_keras.mat'
        #Xtrain, Xtest, Ytrain, Ytest, weights, Ytrainclassify, Ytestclassify, nfold = get_mat_data(add,in1,in2)
        wts = np.ones((3,3))
        #np.array([[1/weights[:,0], 1, 1],[1, 1/weights[:,1], 1],[1, 1, 1/weights[:,2]]])
        #y = np.bincount(Ytrain)
        #ii = np.nonzero(y)[0]
        #weight_indx = y[ii]
        # wts[0,0]=1/weights[0,0]
        # wts[1,1]=1/weights[0,1]
        # wts[2,2]=1/weights[0,2]
        print 'wts.shape:' 
        print wts.shape
        print wts
        ncce = wrapped_partial(w_categorical_crossentropy, weights=wts)
        Xtrain = Xtrain.astype('float32')
        Xtest = Xtest.astype('float32')
        nb_classes = 3
        print Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape, wts.shape,Ytrainclassify.shape, Ytestclassify.shape
        Ytestclassify = Ytestclassify.reshape(Ytestclassify.shape[0],)
        Ytrainclassify = Ytrainclassify.reshape(Ytrainclassify.shape[0],)
        # convert class vectors to binary class matrices
        Y_train = keras.utils.to_categorical(Ytrainclassify, nb_classes)
        Y_test = keras.utils.to_categorical(Ytestclassify, nb_classes)
        print Ytrainclassify.shape
        print Ytestclassify.shape
        print Xtrain.shape
        print Y_train.shape
        print Xtest.shape
        print Y_test.shape
        #weights = np.array(sys.argv[2:], dtype=np.float64)
        # m1.add(Dense(400,activation='relu'))
        # m1.add(Dropout(0.2))
        # m1.add(Dense(100,activation='relu'))
        # m1.add(Dropout(0.2))
        # m1.add(Dense(3, activation='softmax'))
        #parent model
        #model.compile(loss=ncce, optimizer="adam")
        rms = SGD()
        model.compile(loss=ncce, optimizer=rms)
        print Xtrain.shape
        print Y_train.shape
        print Xtest.shape
        print Y_test.shape
   , Y_train, batch_size=batch_size, nb_epoch=nb_epoch)
        model.evaluate(Xtest, Y_test)
        #print('Test score:', score[0])
        #print('Test accuracy:', score[1])
        #saving weights'model_classify_weights.h5')
    if __name__ == "__main__":