Search code examples
tensorflowkerasmultiplicationweighted-averageensemble-learning

How to multiply a layer by a constant vector element wise in Keras?


I want to make a weighted average ensemble of 3 of my trained models. So, I want first to multiply the softmax output of a model (element-wise) by a vector and then average the 3 weighted outputs of the 3 models.

I used the following code to multiply the output of the first model by its weight vector:

from keras.layers import Multiply, Average

resnet_weights = np.asarray([[0.91855, 0.99485, 0.89065, 0.96525, 0.98005,
                              0.93645, 0.6149, 0.934, 0.92505, 0.785, 0.85]], np.float32)
resnet_weight_tensor=tf.constant(resnet_weights, np.float32)
sess = tf.InteractiveSession()  
print(resnet_weight_tensor.eval())
sess.close()

resnet_weighted = Multiply()([finetuned_model.layers[-1].output, resnet_weight_tensor])
print(resnet_weighted)

new_model=Model(model.input, resnet_weighted)

However, I'm stuck with the following error:

Error_message

What can I do?


Solution

  • Use Lambda instead of Multiply, and K.constant instead of tf.constant (is backend-neutral):

    resnet_weight_tensor=K.constant(resnet_weights, 'float32')
    out = finetuned_model.layers[-1].output
    resnet_weighted = Lambda(lambda x: x * resnet_weight_tensor)(out)
    


    FULL EXAMPLE:

    ## BUILD MODELS
    batch_size  = 32
    num_batches = 100
    input_shape = (4,)
    num_classes = 3
    
    model_1 = make_model(input_shape,  8, num_classes)
    model_2 = make_model(input_shape, 10, num_classes)
    model_3 = make_model(input_shape, 12, num_classes)
    
    ## BUILD ENSEMBLE
    models = (model_1, model_2, model_3)
    models_ins  = [model.input for model in models]
    models_outs = [model.input for model in models]
    
    outputs_weights = [np.random.random((batch_size, num_classes)),
                       np.random.random((batch_size, num_classes)),
                       np.random.random((batch_size, num_classes))]
    outs_avg  = model_outputs_average(models, outputs_weights)
    
    final_out = Dense(num_classes, activation='softmax')(outs_avg)
    model_ensemble = Model(inputs=models_ins, outputs=final_out)
    model_ensemble.compile('adam', loss='categorical_crossentropy')
    
    ### TEST ENSEMBLE
    x1 = np.random.randn(batch_size, *input_shape) # toy data
    x2 = np.random.randn(batch_size, *input_shape)
    x3 = np.random.randn(batch_size, *input_shape)
    y  = np.random.randint(0,2,(batch_size, num_classes)) # toy labels
    
    model_ensemble.fit([x1,x2,x3], y)
    

    Verify averaging:

    [print(layer.name) for layer in model_ensemble.layers] # show layer names
    
    preouts1    = get_layer_outputs(model_ensemble, 'lambda_1', [x1,x2,x3])
    preouts2    = get_layer_outputs(model_ensemble, 'lambda_2', [x1,x2,x3])
    preouts3    = get_layer_outputs(model_ensemble, 'lambda_3', [x1,x2,x3])
    preouts_avg = get_layer_outputs(model_ensemble, 'average_1',[x1,x2,x3])
    
    preouts = np.asarray([preouts1, preouts2, preouts3])
    sum_of_diff_of_means = np.sum(np.mean(preouts, axis=0) - preouts_avg)
    print(np.sum(np.mean([preouts1, preouts2, preouts3],axis=0) - preouts_avg))
    # 4.69e-07
    


    Functions used:

    def make_model(input_shape, dense_dim, num_classes=3):
        ipt = Input(shape=input_shape)
        x   = Dense(dense_dim,   activation='relu')(ipt)
        out = Dense(num_classes, activation='softmax')(x)
    
        model = Model(ipt, out)
        model.compile('adam', loss='categorical_crossentropy')
        return model
    
    def model_outputs_average(models, outputs_weights):
        outs = [model.output for model in models]
        out_shape = K.int_shape(outs[0])[1:] # ignore batch dim
    
        assert all([(K.int_shape(out)[1:] == out_shape) for out in outs]), \
               "All model output shapes must match"
    
        outs_weights = [K.constant(w, 'float32') for w in outputs_weights]
        ow_shape = K.int_shape(outs_weights[0])
        assert all([(K.int_shape(w) == ow_shape) for w in outs_weights]), \
               "All outputs_weights and model.output shapes must match"
    
        weights_layers = [Lambda(lambda x: x * ow)(out) for ow, out 
                          in zip(outs_weights, outs)]
    
        return Average()(weights_layers)
    
    def get_layer_outputs(model,layer_name,input_data,train_mode=False):
        outputs   = [layer.output for layer in model.layers if layer_name in layer.name]
        layers_fn = K.function([model.input, K.learning_phase()], outputs)
        return [layers_fn([input_data,int(train_mode)])][0][0]