Uploading models with custom forward functions to the huggingface model hub?

Is it possible to upload a model with a custom forward function to the huggingface model hub?

I can see how to do it if your model is of a normal form but can't see how to customise the forward function and do it?

Solution

Yes absolutely. You can create your own model with added any number of layers/customisations you want and upload it to model hub. Let me present you a demo which will describe the entire process.

Uploading custom model to 🤗 model hub

import tqdm

from datasets import load_dataset
import transformers
from transformers import AutoTokenizer, AutoModel, BertConfig
from transformers import AdamW
from transformers import get_scheduler

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# setting device to `cuda` if gpu exists
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

# initialising the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("google/bert_uncased_L-2_H-128_A-2")
bert = AutoModel.from_pretrained("google/bert_uncased_L-2_H-128_A-2")


def tokenize_function(examples):
    '''Function for tokenizing raw texts'''
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)


# downloading IMDB dataset from 🤗 `datasets`
raw_datasets = load_dataset("imdb")

# Running tokenizing function on the raw texts
tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)

# for simplicity I have taken only the train split
tokenized_datasets = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))


# Now lets create the torch Dataset class
class IMDBClassificationDataset(Dataset):

    def __init__(self, dataset):
        self.dataset = dataset

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        d = self.dataset[idx]

        ids = torch.tensor(d['input_ids'])
        mask = torch.tensor(d['attention_mask'])
        label = torch.tensor(d['label'])
        return ids, mask, label

# Preparing the dataset and the Dataloader
dataset = IMDBClassificationDataset(tokenized_datasets)
train_dataloader = DataLoader(dataset, shuffle=True, batch_size=8)

# Now lets create a custom Bert model
class CustomBert(transformers.PreTrainedModel):
    '''Custom model class
       ------------------
       Now the trick is not to inherit the class from `nn.Module` but `transformers.PretrainedModel`
       Also you need to pass the model config during initialisation'''

    def __init__(self, bert):
        super(CustomBert, self).__init__(config=BertConfig.from_pretrained('google/bert_uncased_L-2_H-128_A-2'))
        self.bert = bert

        self.l1 = nn.Linear(128, 1)

        self.do = nn.Dropout(0.1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, sent_id, mask):
        '''For simplicity I have added only one linear layer, you can create any type of network you want'''
        
        bert_out = self.bert(sent_id, attention_mask=mask)
        o = bert_out.last_hidden_state[:,0,:]
        o = self.do(o)
        o = self.relu(o)
        o = self.l1(o)
        o = self.sigmoid(o)
        return o

# initialising model, loss and optimizer
model = CustomBert(bert)
model.to(device)
criterion = torch.nn.BCELoss()
optimizer = AdamW(model.parameters(), lr=5e-5)

# setting epochs, num_training_steps and the lr_scheduler
num_epochs = 3
num_training_steps = num_epochs * len(train_dataloader)
lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps
)

# training loop
model.train()
for epoch in tqdm.tqdm(range(num_epochs)):
    for batch in train_dataloader:
        ids, masks, labels = batch
        labels = labels.type(torch.float32)
        o = model(ids.to(device), masks.to(device))
        loss = criterion(torch.squeeze(o), labels.to(device))
        loss.backward()

        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

# save the tokenizer and the model in `./test-model/` directory 
tokenizer.save_pretrained("./test-model/")
model.save_pretrained("./test-model/", push_to_hub=False)

Now create a new model in 🤗 and push all the contents inside the test-model to 🤗 model hub.

To test the authenticity of the model you can try 🤗's pipeline to check if something is wrong.

from transformers import pipeline

# as this is classification so you need to mention `text-classification` as task
classifier = pipeline('text-classification', model='tanmoyio/test-model')
classifier("This movie was superb")

It will output something like this

[{'label': 'LABEL_0', 'score': 0.5571992993354797}]

This is a real demo, check the model here - https://huggingface.co/tanmoyio/test-model. Let me know if you have further questions.