Is it possible to upload a model with a custom forward function to the huggingface model hub?
I can see how to do it if your model is of a normal form but can't see how to customise the forward function and do it?
Yes absolutely. You can create your own model with added any number of layers/customisations you want and upload it to model hub. Let me present you a demo which will describe the entire process.
import tqdm
from datasets import load_dataset
import transformers
from transformers import AutoTokenizer, AutoModel, BertConfig
from transformers import AdamW
from transformers import get_scheduler
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
# setting device to `cuda` if gpu exists
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# initialising the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("google/bert_uncased_L-2_H-128_A-2")
bert = AutoModel.from_pretrained("google/bert_uncased_L-2_H-128_A-2")
def tokenize_function(examples):
'''Function for tokenizing raw texts'''
return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)
# downloading IMDB dataset from 🤗 `datasets`
raw_datasets = load_dataset("imdb")
# Running tokenizing function on the raw texts
tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
# for simplicity I have taken only the train split
tokenized_datasets = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
# Now lets create the torch Dataset class
class IMDBClassificationDataset(Dataset):
def __init__(self, dataset):
self.dataset = dataset
def __len__(self):
return len(self.dataset)
def __getitem__(self, idx):
d = self.dataset[idx]
ids = torch.tensor(d['input_ids'])
mask = torch.tensor(d['attention_mask'])
label = torch.tensor(d['label'])
return ids, mask, label
# Preparing the dataset and the Dataloader
dataset = IMDBClassificationDataset(tokenized_datasets)
train_dataloader = DataLoader(dataset, shuffle=True, batch_size=8)
# Now lets create a custom Bert model
class CustomBert(transformers.PreTrainedModel):
'''Custom model class
------------------
Now the trick is not to inherit the class from `nn.Module` but `transformers.PretrainedModel`
Also you need to pass the model config during initialisation'''
def __init__(self, bert):
super(CustomBert, self).__init__(config=BertConfig.from_pretrained('google/bert_uncased_L-2_H-128_A-2'))
self.bert = bert
self.l1 = nn.Linear(128, 1)
self.do = nn.Dropout(0.1)
self.relu = nn.ReLU()
self.sigmoid = nn.Sigmoid()
def forward(self, sent_id, mask):
'''For simplicity I have added only one linear layer, you can create any type of network you want'''
bert_out = self.bert(sent_id, attention_mask=mask)
o = bert_out.last_hidden_state[:,0,:]
o = self.do(o)
o = self.relu(o)
o = self.l1(o)
o = self.sigmoid(o)
return o
# initialising model, loss and optimizer
model = CustomBert(bert)
model.to(device)
criterion = torch.nn.BCELoss()
optimizer = AdamW(model.parameters(), lr=5e-5)
# setting epochs, num_training_steps and the lr_scheduler
num_epochs = 3
num_training_steps = num_epochs * len(train_dataloader)
lr_scheduler = get_scheduler(
"linear",
optimizer=optimizer,
num_warmup_steps=0,
num_training_steps=num_training_steps
)
# training loop
model.train()
for epoch in tqdm.tqdm(range(num_epochs)):
for batch in train_dataloader:
ids, masks, labels = batch
labels = labels.type(torch.float32)
o = model(ids.to(device), masks.to(device))
loss = criterion(torch.squeeze(o), labels.to(device))
loss.backward()
optimizer.step()
lr_scheduler.step()
optimizer.zero_grad()
# save the tokenizer and the model in `./test-model/` directory
tokenizer.save_pretrained("./test-model/")
model.save_pretrained("./test-model/", push_to_hub=False)
Now create a new model in 🤗 and push all the contents inside the test-model
to 🤗 model hub.
To test the authenticity of the model you can try 🤗's pipeline
to check if something is wrong.
from transformers import pipeline
# as this is classification so you need to mention `text-classification` as task
classifier = pipeline('text-classification', model='tanmoyio/test-model')
classifier("This movie was superb")
It will output something like this
[{'label': 'LABEL_0', 'score': 0.5571992993354797}]
This is a real demo, check the model here - https://huggingface.co/tanmoyio/test-model. Let me know if you have further questions.