Search code examples
pythonpytorchmlflow

TypeError: 'numpy.float32' object is not iterable when logging in mlflow


I am trying a machine learning model and logging metrics using mlflow. But I am getting TypeError: 'numpy.float32' object is not iterable. I have tried using .tolist() and dict() but nothing seems to work.

def train(max_epochs, model, optimizer, scheduler, train_loader, valid_loader, project_name):
    best_val_loss = 100
    for epoch in range(max_epochs):
        model.train()
        running_loss = []
        tq_loader = tqdm(train_loader)
        o = {}
        for samples in tq_loader:
            optimizer.zero_grad()
            outputs, interaction_map = model(
                [samples[0].to(device), samples[1].to(device), torch.tensor(samples[2]).to(device),
                 torch.tensor(samples[3]).to(device)])
            l1_norm = torch.norm(interaction_map, p=2) * 1e-4
            loss = loss_fn(outputs, torch.tensor(samples[4]).to(device).float()) + l1_norm
            loss.backward()
            optimizer.step()
            loss = loss - l1_norm
            running_loss.append(loss.cpu().detach())
            tq_loader.set_description(
                "Epoch: " + str(epoch + 1) + "  Training loss: " + str(np.mean(np.array(running_loss))))
        model.eval()
        val_loss, mae_loss = get_metrics(model, valid_loader)
        scheduler.step(val_loss)
        
        #metrics mlflow
        mlflow.log_metrics('train_loss',(np.mean(np.array(running_loss))).tolist())
        mlflow.log_metrics('validation_loss',(val_loss).tolist())
        mlflow.log_metrics('MAE Val_loss', (mae_loss).tolist())

        print("Epoch: " + str(epoch + 1) + "  train_loss " + str(np.mean(np.array(running_loss))) + " Val_loss " + str(
            val_loss) + " MAE Val_loss " + str(mae_loss))
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), "./runs/run-" + str(project_name) + "/models/best_model.tar")

mlflow.set_experiment('CIGIN_V2')
mlflow.start_run(nested=True)
train(max_epochs, model, optimizer, scheduler, train_loader, valid_loader, project_name)
mlflow.end_run()

Error

Epoch: 1  Training loss: 6770.575: 100%|██████████| 1/1 [00:04<00:00,  4.35s/it]
100%|██████████| 1/1 [00:03<00:00,  3.86s/it]

---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

<ipython-input-96-8c3a6eb822c3> in <module>()
      1 mlflow.set_experiment('CIGIN_V2')
      2 mlflow.start_run(nested=True)
----> 3 train(max_epochs, model, optimizer, scheduler, train_loader, valid_loader, project_name)
      4 mlflow.end_run()

<ipython-input-95-ab0a6c80b65b> in train(max_epochs, model, optimizer, scheduler, train_loader, valid_loader, project_name)
     55 
     56         #metrics mlflow
---> 57         mlflow.log_metrics('train_loss',dict(np.mean(np.array(running_loss))).tolist())
     58         mlflow.log_metrics('validation_loss',dict(val_loss).tolist())
     59         mlflow.log_metrics('MAE Val_loss', dict(mae_loss).tolist())

TypeError: 'numpy.float32' object is not iterable

Solution

  • Youre logging a single value into log_metrics and i dont think thats correct based on the implementation of log_metric and log_metrics in the documentation:

    https://www.mlflow.org/docs/latest/python_api/mlflow.html#mlflow.log_metric and https://www.mlflow.org/docs/latest/python_api/mlflow.html#mlflow.log_metrics

    So i would suggest to maybe change the "log_metrics" to "log_metric" and leave the tolist out