Search code examples
pythonnlphuggingface-transformerslarge-language-modelpeft

AttributeError: 'TrainingArguments' object has no attribute 'model_init_kwargs'


While finetuning Gemma2B model using QLoRA i'm getting error as AttributeError: 'TrainingArguments' object has no attribute 'model_init_kwargs'

Code:

Loading the libraries

from enum import Enum
from functools import partial
import pandas as pd
import torch

from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, set_seed, BitsAndBytesConfig
from datasets import load_dataset
from trl import SFTTrainer
from peft import get_peft_model, LoraConfig, TaskType

seed = 42
set_seed(seed)

Loading the dataset and preprocess it.


model_name = "gg-hf/gemma-2b-it"
dataset_name = "FinGPT/fingpt-fiqa_qa"
tokenizer = AutoTokenizer.from_pretrained(model_name)
template = """{% for message in messages %}\n{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% if loop.last and add_generation_prompt %}{{'<|im_start|>assistant\n' }}{% endif %}{% endfor %}"""
tokenizer.chat_template = template

def preprocess(samples):
    batch = []
    for system_prompt, input, output in zip(samples["instruction"], samples["input"], samples["output"]):
        conversation = [{"content": system_prompt, "role": "system"},
                        {"content": input, "role": "user"},
                         {"content": output, "role": "assistant"}]
        batch.append(tokenizer.apply_chat_template(conversation, tokenize=False))
    return {"content": batch}

dataset = load_dataset(dataset_name)
dataset = dataset.map(
    preprocess,
    batched=True,
    remove_columns=dataset["train"].column_names
)
dataset = dataset["train"].train_test_split(0.1)
print(dataset)
print(dataset["train"][0])

Create PEFT configurations


peft_config = LoraConfig(r=8,
                         lora_alpha=16,
                         lora_dropout=0.1,
                         target_modules=["gate_proj","q_proj","lm_head","o_proj","k_proj","embed_tokens","down_proj","up_proj","v_proj"],
                         task_type=TaskType.CAUSAL_LM)

Create Quantization configurations

bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16,
            bnb_4bit_use_double_quant=True,
        )

Load the model and tokenizer


class ChatmlSpecialTokens(str, Enum):
    user = "<|im_start|>user"
    assistant = "<|im_start|>assistant"
    system = "<|im_start|>system"
    eos_token = "<|im_end|>"
    bos_token = "<s>"
    pad_token = "<pad>"

    @classmethod
    def list(cls):
        return [c.value for c in cls]

tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        pad_token=ChatmlSpecialTokens.pad_token.value,
        bos_token=ChatmlSpecialTokens.bos_token.value,
        eos_token=ChatmlSpecialTokens.eos_token.value,
        additional_special_tokens=ChatmlSpecialTokens.list(),
        trust_remote_code=True
    )
tokenizer.chat_template = template
model = AutoModelForCausalLM.from_pretrained(model_name)
model.resize_token_embeddings(len(tokenizer))
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()
# cast non-trainable params in fp16
for p in model.parameters():
    if not p.requires_grad:
        p.data = p.to(torch.float16)

Training Configurations

output_dir = "Gemma2B_finetune_QLoRA"
per_device_train_batch_size = 1
per_device_eval_batch_size = 1
gradient_accumulation_steps = 8
logging_steps = 5
learning_rate = 5e-4
max_grad_norm = 1.0
max_steps = 250
num_train_epochs=10
warmup_ratio = 0.1
lr_scheduler_type = "cosine"
max_seq_length = 2048

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    per_device_eval_batch_size=per_device_eval_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    save_strategy="no",
    evaluation_strategy="epoch",
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    max_grad_norm=max_grad_norm,
    weight_decay=0.1,
    warmup_ratio=warmup_ratio,
    lr_scheduler_type=lr_scheduler_type,
    fp16=True,
    report_to=["tensorboard", "wandb"],
    hub_private_repo=True,
    push_to_hub=True,
    num_train_epochs=num_train_epochs,
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False}
)

Create trainer

trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=tokenizer,
    packing=True,
    dataset_text_field="content",
    max_seq_length=max_seq_length,
    peft_config=peft_config,
    dataset_kwargs={
        "append_concat_token": False,
        "add_special_tokens": False,
    },
)


The error I'm getting is like :-

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[10], line 1
----> 1 trainer = SFTTrainer(
      2     model=model,
      3     args=training_arguments,
      4     train_dataset=dataset["train"],
      5     eval_dataset=dataset["test"],
      6     tokenizer=tokenizer,
      7     packing=True,
      8     dataset_text_field="content",
      9     max_seq_length=max_seq_length,
     10     peft_config=peft_config,
     11     dataset_kwargs={
     12         "append_concat_token": False,
     13         "add_special_tokens": False,
     14     },
     15 )

File /usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_deprecation.py:101, in _deprecate_arguments.<locals>._inner_deprecate_positional_args.<locals>.inner_f(*args, **kwargs)
     99         message += "\n\n" + custom_message
    100     warnings.warn(message, FutureWarning)
--> 101 return f(*args, **kwargs)

File /usr/local/lib/python3.10/dist-packages/trl/trainer/sft_trainer.py:154, in SFTTrainer.__init__(self, model, args, data_collator, train_dataset, eval_dataset, tokenizer, model_init, compute_metrics, callbacks, optimizers, preprocess_logits_for_metrics, peft_config, dataset_text_field, packing, formatting_func, max_seq_length, infinite, num_of_sequences, chars_per_token, dataset_num_proc, dataset_batch_size, neftune_noise_alpha, model_init_kwargs, dataset_kwargs, eval_packing)
    150     warnings.warn(
    151         "You passed `model_init_kwargs` to the SFTTrainer, the value you passed will override the one in the `SFTConfig`."
    152     )
    153     args.model_init_kwargs = model_init_kwargs
--> 154 if args.model_init_kwargs is None:
    155     model_init_kwargs = {}
    156 elif not isinstance(model, str):

AttributeError: 'TrainingArguments' object has no attribute 'model_init_kwargs'

Do let me know if there's any solution for this?

Thanks.


Solution

  • Just replace your TrainingArguments constructor with SFTConfig constructor, and pass this to SFTTrainer.

    from trl import SFTConfig
    
    training_arguments = SFTConfig(your training args ...)
    
    trainer = SFTTrainer(args=training_arguments, rest of the args...)