While finetuning Gemma2B model using QLoRA i'm getting error as AttributeError: 'TrainingArguments' object has no attribute 'model_init_kwargs'
Code:
Loading the libraries
from enum import Enum
from functools import partial
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, set_seed, BitsAndBytesConfig
from datasets import load_dataset
from trl import SFTTrainer
from peft import get_peft_model, LoraConfig, TaskType
seed = 42
set_seed(seed)
Loading the dataset and preprocess it.
model_name = "gg-hf/gemma-2b-it"
dataset_name = "FinGPT/fingpt-fiqa_qa"
tokenizer = AutoTokenizer.from_pretrained(model_name)
template = """{% for message in messages %}\n{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% if loop.last and add_generation_prompt %}{{'<|im_start|>assistant\n' }}{% endif %}{% endfor %}"""
tokenizer.chat_template = template
def preprocess(samples):
batch = []
for system_prompt, input, output in zip(samples["instruction"], samples["input"], samples["output"]):
conversation = [{"content": system_prompt, "role": "system"},
{"content": input, "role": "user"},
{"content": output, "role": "assistant"}]
batch.append(tokenizer.apply_chat_template(conversation, tokenize=False))
return {"content": batch}
dataset = load_dataset(dataset_name)
dataset = dataset.map(
preprocess,
batched=True,
remove_columns=dataset["train"].column_names
)
dataset = dataset["train"].train_test_split(0.1)
print(dataset)
print(dataset["train"][0])
Create PEFT configurations
peft_config = LoraConfig(r=8,
lora_alpha=16,
lora_dropout=0.1,
target_modules=["gate_proj","q_proj","lm_head","o_proj","k_proj","embed_tokens","down_proj","up_proj","v_proj"],
task_type=TaskType.CAUSAL_LM)
Create Quantization configurations
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
)
Load the model and tokenizer
class ChatmlSpecialTokens(str, Enum):
user = "<|im_start|>user"
assistant = "<|im_start|>assistant"
system = "<|im_start|>system"
eos_token = "<|im_end|>"
bos_token = "<s>"
pad_token = "<pad>"
@classmethod
def list(cls):
return [c.value for c in cls]
tokenizer = AutoTokenizer.from_pretrained(
model_name,
pad_token=ChatmlSpecialTokens.pad_token.value,
bos_token=ChatmlSpecialTokens.bos_token.value,
eos_token=ChatmlSpecialTokens.eos_token.value,
additional_special_tokens=ChatmlSpecialTokens.list(),
trust_remote_code=True
)
tokenizer.chat_template = template
model = AutoModelForCausalLM.from_pretrained(model_name)
model.resize_token_embeddings(len(tokenizer))
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()
# cast non-trainable params in fp16
for p in model.parameters():
if not p.requires_grad:
p.data = p.to(torch.float16)
Training Configurations
output_dir = "Gemma2B_finetune_QLoRA"
per_device_train_batch_size = 1
per_device_eval_batch_size = 1
gradient_accumulation_steps = 8
logging_steps = 5
learning_rate = 5e-4
max_grad_norm = 1.0
max_steps = 250
num_train_epochs=10
warmup_ratio = 0.1
lr_scheduler_type = "cosine"
max_seq_length = 2048
training_arguments = TrainingArguments(
output_dir=output_dir,
per_device_train_batch_size=per_device_train_batch_size,
per_device_eval_batch_size=per_device_eval_batch_size,
gradient_accumulation_steps=gradient_accumulation_steps,
save_strategy="no",
evaluation_strategy="epoch",
logging_steps=logging_steps,
learning_rate=learning_rate,
max_grad_norm=max_grad_norm,
weight_decay=0.1,
warmup_ratio=warmup_ratio,
lr_scheduler_type=lr_scheduler_type,
fp16=True,
report_to=["tensorboard", "wandb"],
hub_private_repo=True,
push_to_hub=True,
num_train_epochs=num_train_epochs,
gradient_checkpointing=True,
gradient_checkpointing_kwargs={"use_reentrant": False}
)
Create trainer
trainer = SFTTrainer(
model=model,
args=training_arguments,
train_dataset=dataset["train"],
eval_dataset=dataset["test"],
tokenizer=tokenizer,
packing=True,
dataset_text_field="content",
max_seq_length=max_seq_length,
peft_config=peft_config,
dataset_kwargs={
"append_concat_token": False,
"add_special_tokens": False,
},
)
The error I'm getting is like :-
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[10], line 1
----> 1 trainer = SFTTrainer(
2 model=model,
3 args=training_arguments,
4 train_dataset=dataset["train"],
5 eval_dataset=dataset["test"],
6 tokenizer=tokenizer,
7 packing=True,
8 dataset_text_field="content",
9 max_seq_length=max_seq_length,
10 peft_config=peft_config,
11 dataset_kwargs={
12 "append_concat_token": False,
13 "add_special_tokens": False,
14 },
15 )
File /usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_deprecation.py:101, in _deprecate_arguments.<locals>._inner_deprecate_positional_args.<locals>.inner_f(*args, **kwargs)
99 message += "\n\n" + custom_message
100 warnings.warn(message, FutureWarning)
--> 101 return f(*args, **kwargs)
File /usr/local/lib/python3.10/dist-packages/trl/trainer/sft_trainer.py:154, in SFTTrainer.__init__(self, model, args, data_collator, train_dataset, eval_dataset, tokenizer, model_init, compute_metrics, callbacks, optimizers, preprocess_logits_for_metrics, peft_config, dataset_text_field, packing, formatting_func, max_seq_length, infinite, num_of_sequences, chars_per_token, dataset_num_proc, dataset_batch_size, neftune_noise_alpha, model_init_kwargs, dataset_kwargs, eval_packing)
150 warnings.warn(
151 "You passed `model_init_kwargs` to the SFTTrainer, the value you passed will override the one in the `SFTConfig`."
152 )
153 args.model_init_kwargs = model_init_kwargs
--> 154 if args.model_init_kwargs is None:
155 model_init_kwargs = {}
156 elif not isinstance(model, str):
AttributeError: 'TrainingArguments' object has no attribute 'model_init_kwargs'
Do let me know if there's any solution for this?
Thanks.
Just replace your TrainingArguments constructor with SFTConfig constructor, and pass this to SFTTrainer.
from trl import SFTConfig
training_arguments = SFTConfig(your training args ...)
trainer = SFTTrainer(args=training_arguments, rest of the args...)