I'm using Bert and other encoder models for text classification tasks,but when I try to load accuracy metric with evaluate in huggingface,sometime mistakes happen: TypeError: 'NoneType' object is not callable. I am searching for a long time on net. But no use. Please help or try to give some ideas how to achieve this. Thanks in advance.
This is the cause of the code error:
─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /home/ubuntu/Bill_PyCharm/absa-three/yasi_encoder/yasi_roberta.py:94 in <module> │
│ │
│ 91 test_dataset = datasets.Dataset.from_dict(train_ds.get_dataset()) │
│ 92 │
│ 93 """## Train Loop""" │
│ ❱ 94 accuracy = evaluate.load("accuracy") │
│ 95 # accuracy = evaluate.load("../evaluate/accuracy.py") │
│ 96 # recall = evaluate.load("recall") │
│ 97 # precision = evaluate.load("precision") │
│ │
│ /home/ubuntu/anaconda3/lib/python3.9/site-packages/evaluate/loading.py:778 in load │
│ │
│ 775 │ │ path, module_type=module_type, revision=revision, download_config=download_confi │
│ 776 │ ).module_path │
│ 777 │ evaluation_cls = import_main_class(evaluation_module) │
│ ❱ 778 │ evaluation_instance = evaluation_cls( │
│ 779 │ │ config_name=config_name, │
│ 780 │ │ process_id=process_id, │
│ 781 │ │ num_process=num_process, │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
TypeError: 'NoneType' object is not callable
This is the source code:
"""# Loading the Libraries & Models"""
import pandas as pd`enter code here`
import numpy as np
import evaluate
import torch
from datasets import Dataset
import datasets
from torch.utils.data import Dataset
from transformers import (AutoTokenizer,
AutoModelForSequenceClassification,
TrainingArguments,
Trainer)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Device: {device}")
check_point = "xlm-roberta-base"
# check_point = "hfl/chinese-roberta-wwm-ext"
output_dir = "./models/yasi/" + check_point
tokenizer = AutoTokenizer.from_pretrained(check_point)
model = AutoModelForSequenceClassification.from_pretrained(check_point, num_labels=2).to(device)
import pandas as pd
from datasets import Dataset
from sklearn.model_selection import train_test_split
import json
# 读取配置文件
config_file = '../HyperParameter/config.json'
with open(config_file, 'r') as f:
config = json.load(f)
# 从配置中获取需要的值
batch_size = config['batch_size']
dataset = config['dataset']
epoch = config['epoch']
# 读取数据集
data = pd.read_csv(dataset, sep='\t') # from datasets import Dataset
# 随机划分数据集
train_data, remaining_data = train_test_split(data, test_size=0.2, random_state=42)
dev_data, test_data = train_test_split(remaining_data, test_size=0.5, random_state=42)
# 将划分后的数据集转换为Dataset对象
train_df = Dataset.from_pandas(train_data)
dev_df = Dataset.from_pandas(dev_data)
test_df = Dataset.from_pandas(test_data)
class YasiDataset(Dataset):
def __init__(self, df, tokenizer: AutoTokenizer):
super(YasiDataset).__init__()
self.sentence = []
self.labels = []
# 读取每一行内容
for row in df:
# 提取content列的内容(假设是第二列)
content = row["sentence"]
labels = row["label"]
# 将content添加到sentence_pairs列表中
self.sentence.append(content)
self.labels.append(labels)
self.labels = torch.tensor(self.labels)
self.tokenizer_output = tokenizer(self.sentence,
padding=True,
truncation=True,
max_length=512, # 最大长度
return_tensors='pt',
return_token_type_ids=True,
return_attention_mask=True,
)
self.tokenizer_output['labels'] = self.labels
def __len__(self):
return len(self.tokenizer_output.shape[0])
def get_dataset(self):
return self.tokenizer_output
train_ds = YasiDataset(train_df, tokenizer)
dev_ds = YasiDataset(dev_df, tokenizer)
test_ds = YasiDataset(test_df, tokenizer)
train_dataset = datasets.Dataset.from_dict(train_ds.get_dataset())
dev_dataset = datasets.Dataset.from_dict(dev_ds.get_dataset())
test_dataset = datasets.Dataset.from_dict(test_ds.get_dataset())
"""## Train Loop"""
accuracy = evaluate.load("accuracy")
I am searching for a long time on net. But no use. Please help or try to give some ideas how to achieve this.
now,I find what's wrong with this problem.the evaluate version of my computer is evaluate 0.1.2. we should update the version of the evaluate.
use the code as follow:
pip install --upgrade evaluate