Search code examples
pythondeep-learningnlpcomputer-visionhuggingface-evaluate

Load accuracy metric with evaluate ,sometime mistakes happen: TypeError: 'NoneType' object is not callable


I'm using Bert and other encoder models for text classification tasks,but when I try to load accuracy metric with evaluate in huggingface,sometime mistakes happen: TypeError: 'NoneType' object is not callable. I am searching for a long time on net. But no use. Please help or try to give some ideas how to achieve this. Thanks in advance.

This is the cause of the code error:

─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /home/ubuntu/Bill_PyCharm/absa-three/yasi_encoder/yasi_roberta.py:94 in <module>                 │
│                                                                                                  │
│    91 test_dataset = datasets.Dataset.from_dict(train_ds.get_dataset())                          │
│    92                                                                                            │
│    93 """## Train Loop"""                                                                        │
│ ❱  94 accuracy = evaluate.load("accuracy")                                                       │
│    95 # accuracy = evaluate.load("../evaluate/accuracy.py")                                      │
│    96 # recall = evaluate.load("recall")                                                         │
│    97 # precision = evaluate.load("precision")                                                   │
│                                                                                                  │
│ /home/ubuntu/anaconda3/lib/python3.9/site-packages/evaluate/loading.py:778 in load               │
│                                                                                                  │
│   775 │   │   path, module_type=module_type, revision=revision, download_config=download_confi   │
│   776 │   ).module_path                                                                          │
│   777 │   evaluation_cls = import_main_class(evaluation_module)                                  │
│ ❱ 778 │   evaluation_instance = evaluation_cls(                                                  │
│   779 │   │   config_name=config_name,                                                           │
│   780 │   │   process_id=process_id,                                                             │
│   781 │   │   num_process=num_process,                                                           │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
TypeError: 'NoneType' object is not callable

This is the source code:

"""# Loading the Libraries & Models"""
import pandas as pd`enter code here`
import numpy as np
import evaluate
import torch
from datasets import Dataset
import datasets
from torch.utils.data import Dataset
from transformers import (AutoTokenizer,
                          AutoModelForSequenceClassification,
                          TrainingArguments,
                          Trainer)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Device: {device}")

check_point = "xlm-roberta-base"
# check_point = "hfl/chinese-roberta-wwm-ext"
output_dir = "./models/yasi/" + check_point
tokenizer = AutoTokenizer.from_pretrained(check_point)
model = AutoModelForSequenceClassification.from_pretrained(check_point, num_labels=2).to(device)

import pandas as pd
from datasets import Dataset
from sklearn.model_selection import train_test_split

import json

# 读取配置文件
config_file = '../HyperParameter/config.json'
with open(config_file, 'r') as f:
    config = json.load(f)
# 从配置中获取需要的值
batch_size = config['batch_size']
dataset = config['dataset']
epoch = config['epoch']

# 读取数据集
data = pd.read_csv(dataset, sep='\t') # from datasets import Dataset

# 随机划分数据集
train_data, remaining_data = train_test_split(data, test_size=0.2, random_state=42)
dev_data, test_data = train_test_split(remaining_data, test_size=0.5, random_state=42)

# 将划分后的数据集转换为Dataset对象
train_df = Dataset.from_pandas(train_data)
dev_df = Dataset.from_pandas(dev_data)
test_df = Dataset.from_pandas(test_data)


class YasiDataset(Dataset):
    def __init__(self, df, tokenizer: AutoTokenizer):
        super(YasiDataset).__init__()

        self.sentence = []
        self.labels = []

        # 读取每一行内容
        for row in df:
            # 提取content列的内容(假设是第二列)
            content = row["sentence"]
            labels = row["label"]
            # 将content添加到sentence_pairs列表中
            self.sentence.append(content)
            self.labels.append(labels)

        self.labels = torch.tensor(self.labels)
        self.tokenizer_output = tokenizer(self.sentence,
                                          padding=True,
                                          truncation=True,
                                          max_length=512,  # 最大长度
                                          return_tensors='pt',
                                          return_token_type_ids=True,
                                          return_attention_mask=True,
                                          )
        self.tokenizer_output['labels'] = self.labels

    def __len__(self):
        return len(self.tokenizer_output.shape[0])

    def get_dataset(self):
        return self.tokenizer_output


train_ds = YasiDataset(train_df, tokenizer)
dev_ds = YasiDataset(dev_df, tokenizer)
test_ds = YasiDataset(test_df, tokenizer)

train_dataset = datasets.Dataset.from_dict(train_ds.get_dataset())
dev_dataset = datasets.Dataset.from_dict(dev_ds.get_dataset())
test_dataset = datasets.Dataset.from_dict(test_ds.get_dataset())

"""## Train Loop"""
accuracy = evaluate.load("accuracy")

I am searching for a long time on net. But no use. Please help or try to give some ideas how to achieve this.


Solution

  • now,I find what's wrong with this problem.the evaluate version of my computer is evaluate 0.1.2. we should update the version of the evaluate. use the code as follow: pip install --upgrade evaluate