Search code examples
pythondeep-learninghyperparametersray

Ray | AttributeError: 'BroadModel' object has no attribute 'model'


I am using ray tune to find to optimal hyperparameters value for this model:

class BroadModel(tune.Trainable):
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    def build_model(self, config):
        global convB2, drop2, convA2, poolA, poolB 
        window_size = 200
        self.x_gyro, self.x_acc, x_mag, q = load_data_train()
        self.Att_quat = Att_q(q)
        self.x_gyro_t, self.x_acc_t, x_mag_t, q_t = load_data_test()
        self.Att_quat_t = Att_q(q_t)

        self.x_gyro, self.x_acc, self.Att_quat = shuffle(self.x_gyro, self.x_acc, self.Att_quat)
        x1 = Input((window_size, 3), name='x1')
        x2 = Input((window_size, 3), name='x2')
        convA1 = Conv1D(config["Conv1DA"],11,padding='same',activation='relu')(x1)
        for i in range(config["Conv1DAn"]):
            if i > 0: 
                convA2 = Conv1D(config[f'Conv1DAn_{i}'],11,padding='same',activation='relu')(convA1)
        poolA = MaxPooling1D(3)(convA1)
        
        
        convB1 = Conv1D(config["Conv1DB"],11,padding='same',activation='relu')(x2)
        for i in range(config["Conv1DBn"]):
            if i > 0:
                convB2 = Conv1D(config[f'Conv1DBn_{i}'],11,padding='same',activation='relu')(convB1)
        poolB = MaxPooling1D(3)(convB1)
        AB = concatenate([poolA, poolB])
        
        lstm1 = Bidirectional(LSTM(config["LSTM1"], return_sequences=True))(AB)
        drop1 = Dropout(config['dropout'])(lstm1)
        for i in range(config['LSTMn']):
            if i > 0:
                lstm2 = Bidirectional(LSTM(config[f'LSTMn_{i}'], return_sequences=True))(drop1)
                drop1 =  Dropout(config['dropout'])(lstm2)   
        lstm2 = Bidirectional(LSTM(config['LSTMn_l']))(drop1)
        drop2 =  Dropout(config['dropout'])(lstm2)
        y1_pred = Dense(4,kernel_regularizer='l2')(drop2)
        model = Model(inputs =[x1, x2], outputs = [y1_pred])
        return model
    def setup(self, config):
        
        model = self.build_model(config)
    
        model.compile(
            optimizer=Adam(learning_rate=config['lr']),
            loss=quaternion_mean_multiplicative_error,
            metrics=[quaternion_mean_multiplicative_error],
        )
        self.model = model
        return model

But whenever I scale up my network by increasing the size of each layer from 50 to 100 or more or increasing the number of iterations from 10~20 to more than 40 I get some weird errors such as

> Failure # 1 (occurred at 2022-09-05_12-04-07)
> [36mray::ResourceTrainable.train()[39m (pid=35719,
> ip=192.168.91.120, repr=<ray.tune.trainable.util.BroadModel object at
> 0x7f478f107c40>)   File
> "/home/ssrc/asq/lib/python3.8/site-packages/ray/tune/trainable/trainable.py",
> line 347, in train
>     result = self.step()   File "ray_test.py", line 258, in step
>     self.model.fit( AttributeError: 'BroadModel' object has no attribute 'model'

This is the tunning code

if __name__ == "__main__":
    import ray
    from ray.tune.schedulers import PopulationBasedTraining
    

    pbt = PopulationBasedTraining(
        perturbation_interval=600,
        hyperparam_mutations={
            "dropout": tune.uniform(0.1,0.5),
            "lr": tune.uniform(1e-5,1e-3),
            "Conv1DA": tune.randint(10,15),
            "Conv1DAn": tune.choice([0,1,2,3]),
            "Conv1DAn_1": tune.randint(10,15),
            "Conv1DAn_2": tune.randint(10,15),
            "Conv1DAn_3": tune.randint(10,15),
            "Conv1DB": tune.randint(10,15),
            "Conv1DBn": tune.choice([0,1,2,3]),
            "Conv1DBn_1": tune.randint(10,15),
            "Conv1DBn_2": tune.randint(10,15),
            "Conv1DBn_3": tune.randint(10,15),
            "LSTM1": tune.randint(10,15),
            "LSTMn": tune.choice([0,1,2,3]),
            "LSTMn_1": tune.randint(10,15),
            "LSTMn_2": tune.randint(10,15),
            "LSTMn_3": tune.randint(10,15),
            "LSTMn_l": tune.randint(10,15),
           },
        
    )
    resources_per_trial = {"cpu": 10 , "gpu": 0}
    tuner = tune.Tuner(
         tune.with_resources(
        BroadModel,
        resources=resources_per_trial),
        run_config=air.RunConfig(
            name="BroadPBT"+timestr,
            stop={"training_iteration": 50},
        ),
        tune_config=tune.TuneConfig(
            reuse_actors=True,
            scheduler=pbt,
            metric="loss",
            mode="min",
            num_samples=2 ,
            
        ),
        param_space={
            "finish_fast": False,
            "batch_size": 1000,
            "epochs": 200,
            "dropout": tune.uniform(0.1,0.5),
            "lr": tune.uniform(1e-5,1e-3),
            "Conv1DA": tune.randint(10,15),
            "Conv1DAn": tune.choice([0,1,2,3]),
            "Conv1DAn_1": tune.randint(10,15),
            "Conv1DAn_2": tune.randint(10,15),
            "Conv1DAn_3": tune.randint(10,15),
            "Conv1DB": tune.randint(10,15),
            "Conv1DBn": tune.choice([0,1,2,3]),
            "Conv1DBn_1": tune.randint(10,15),
            "Conv1DBn_2": tune.randint(10,15),
            "Conv1DBn_3": tune.randint(10,15),
            "LSTM1": tune.randint(10,15),
            "LSTMn": tune.choice([0,1,2,3]),
            "LSTMn_1": tune.randint(10,15),
            "LSTMn_2": tune.randint(10,15),
            "LSTMn_3": tune.randint(10,15),
            "LSTMn_l": tune.randint(10,15),
            
            
        },
    )
    #tune.run(resources_per_trial={'gpu': 1}, tuner)
    tuner.fit()

What should I do? As I mentioned before if I change the interaction numbers to less than 20, I don't get any errors.


Solution

  • Add this code to your class:

    def reset_config(self, new_config):
            self.config = new_config
            self.build_model(new_config)
            return True