I trained Whisper model through LoRA.
But there's a problem.
The original model directory I trained has a capacity of 2.7G. However, the size of the model directory learned through LoRA is 57M.
From this, I found that only additional weighting information was save to the LoRA directory.
(That is, the original weighting information is not included.)
Therefore, this is a question.
How can I combine the existing Whisper model with a model trained with LoRA for Inference?
Below, I'm attaching my code for your convenience.
import numpy as np
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from peft import PeftModel, PeftConfig
class whisper:
# model_str
# 1. large - "openai/whisper-large-v3"
# 2. medium - "openai/whisper-medium"
# 3. small - "openai/whisper-small"
def __init__(self, baseModelPath):
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model = AutoModelForSpeechSeq2Seq.from_pretrained(baseModelPath, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True)
model.to(device)
processor = AutoProcessor.from_pretrained(baseModelPath)
self.pipe = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
max_new_tokens=128,
chunk_length_s=30,
batch_size=16,
return_timestamps=True,
torch_dtype=torch_dtype,
device=device,
)
# korean
def getText(self, audioPath, language='<|ko|>'):
sentence = self.pipe(audioPath, generate_kwargs={"task":"transcribe", "language":language})
return sentence['text']
There is some information related to that link, but I don't know if it can be applied to the Whisper model.
Below are the files in the original directory.
-rw-r--r-- 1 root root 34K Mar 4 17:49 added_tokens.json
-rw-r--r-- 1 root root 1.4K Mar 5 09:48 config.json
-rw-r--r-- 1 root root 3.0K Mar 5 09:48 generation_config.json
-rw-r--r-- 1 root root 483K Mar 4 17:49 merges.txt
-rw-r--r-- 1 root root 923M Mar 5 09:48 model.safetensors
-rw-r--r-- 1 root root 52K Mar 4 17:49 normalizer.json
-rw-r--r-- 1 root root 1.8G Mar 5 09:49 optimizer.pt
-rw-r--r-- 1 root root 339 Mar 5 09:48 preprocessor_config.json
-rw-r--r-- 1 root root 14K Mar 5 09:49 rng_state.pth
drwxr-xr-x 4 root root 4.0K Mar 4 17:49 runs
-rw-r--r-- 1 root root 1.1K Mar 5 09:49 scheduler.pt
-rw-r--r-- 1 root root 2.2K Mar 4 17:49 special_tokens_map.json
-rw-r--r-- 1 root root 277K Mar 4 17:49 tokenizer_config.json
-rw-r--r-- 1 root root 60K Mar 5 09:49 trainer_state.json
-rw-r--r-- 1 root root 4.9K Mar 5 09:48 training_args.bin
-rw-r--r-- 1 root root 1013K Mar 4 17:49 vocab.json
Below are the files in the LoRA-processed directory for the original files.
drwxr-xr-x 3 root root 4.0K Mar 21 06:50 .
drwxr-xr-x 11 root root 4.0K Mar 21 13:16 ..
-rw-r--r-- 1 root root 5.0K Mar 21 06:13 README.md
-rw-r--r-- 1 root root 789 Mar 21 06:13 adapter_config.json
drwxr-xr-x 2 root root 4.0K Mar 21 06:13 adapter_model
-rw-r--r-- 1 root root 14M Mar 21 06:13 adapter_model.safetensors
-rw-r--r-- 1 root root 34K Mar 20 12:55 added_tokens.json
-rw-r--r-- 1 root root 483K Mar 20 12:55 merges.txt
-rw-r--r-- 1 root root 52K Mar 20 12:55 normalizer.json
-rw-r--r-- 1 root root 28M Mar 21 06:13 optimizer.pt
-rw-r--r-- 1 root root 339 Mar 21 06:13 preprocessor_config.json
-rw-r--r-- 1 root root 14K Mar 21 06:13 rng_state.pth
-rw-r--r-- 1 root root 1.1K Mar 21 06:13 scheduler.pt
-rw-r--r-- 1 root root 2.2K Mar 20 12:55 special_tokens_map.json
-rw-r--r-- 1 root root 277K Mar 20 12:55 tokenizer_config.json
-rw-r--r-- 1 root root 31K Mar 21 06:13 trainer_state.json
-rw-r--r-- 1 root root 4.9K Mar 21 06:13 training_args.bin
-rw-r--r-- 1 root root 1013K Mar 20 12:55 vocab.json
The following code allows you to infer by connecting the Whisper model with LoRA.
from transformers import WhisperProcessor, WhisperForConditionalGeneration
from peft import PeftModel
class whisper:
def __init__(self, baseModelName, loraModelPath = '', cuda=True):
# load model and processor
self.processor = WhisperProcessor.from_pretrained(baseModelName)
self.forced_decoder_ids = self.processor.get_decoder_prompt_ids(language="korean", task="transcribe")
if cuda == True:
self.model = WhisperForConditionalGeneration.from_pretrained(baseModelName).to("cuda")
else:
self.model = WhisperForConditionalGeneration.from_pretrained(baseModelName)
if loraModelPath != '':
self.model = PeftModel.from_pretrained(self.model, loraModelPath)