Search code examples
huggingface-transformers

Uploading models with custom forward functions to the huggingface model hub?


Is it possible to upload a model with a custom forward function to the huggingface model hub?

I can see how to do it if your model is of a normal form but can't see how to customise the forward function and do it?


Solution

  • Yes absolutely. You can create your own model with added any number of layers/customisations you want and upload it to model hub. Let me present you a demo which will describe the entire process.

    Uploading custom model to 🤗 model hub

    import tqdm
    
    from datasets import load_dataset
    import transformers
    from transformers import AutoTokenizer, AutoModel, BertConfig
    from transformers import AdamW
    from transformers import get_scheduler
    
    import torch
    import torch.nn as nn
    from torch.utils.data import Dataset, DataLoader
    
    # setting device to `cuda` if gpu exists
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    
    # initialising the tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained("google/bert_uncased_L-2_H-128_A-2")
    bert = AutoModel.from_pretrained("google/bert_uncased_L-2_H-128_A-2")
    
    
    def tokenize_function(examples):
        '''Function for tokenizing raw texts'''
        return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)
    
    
    # downloading IMDB dataset from 🤗 `datasets`
    raw_datasets = load_dataset("imdb")
    
    # Running tokenizing function on the raw texts
    tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
    
    # for simplicity I have taken only the train split
    tokenized_datasets = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
    
    
    # Now lets create the torch Dataset class
    class IMDBClassificationDataset(Dataset):
    
        def __init__(self, dataset):
            self.dataset = dataset
    
        def __len__(self):
            return len(self.dataset)
    
        def __getitem__(self, idx):
            d = self.dataset[idx]
    
            ids = torch.tensor(d['input_ids'])
            mask = torch.tensor(d['attention_mask'])
            label = torch.tensor(d['label'])
            return ids, mask, label
    
    # Preparing the dataset and the Dataloader
    dataset = IMDBClassificationDataset(tokenized_datasets)
    train_dataloader = DataLoader(dataset, shuffle=True, batch_size=8)
    
    # Now lets create a custom Bert model
    class CustomBert(transformers.PreTrainedModel):
        '''Custom model class
           ------------------
           Now the trick is not to inherit the class from `nn.Module` but `transformers.PretrainedModel`
           Also you need to pass the model config during initialisation'''
    
        def __init__(self, bert):
            super(CustomBert, self).__init__(config=BertConfig.from_pretrained('google/bert_uncased_L-2_H-128_A-2'))
            self.bert = bert
    
            self.l1 = nn.Linear(128, 1)
    
            self.do = nn.Dropout(0.1)
            self.relu = nn.ReLU()
            self.sigmoid = nn.Sigmoid()
    
        def forward(self, sent_id, mask):
            '''For simplicity I have added only one linear layer, you can create any type of network you want'''
            
            bert_out = self.bert(sent_id, attention_mask=mask)
            o = bert_out.last_hidden_state[:,0,:]
            o = self.do(o)
            o = self.relu(o)
            o = self.l1(o)
            o = self.sigmoid(o)
            return o
    
    # initialising model, loss and optimizer
    model = CustomBert(bert)
    model.to(device)
    criterion = torch.nn.BCELoss()
    optimizer = AdamW(model.parameters(), lr=5e-5)
    
    # setting epochs, num_training_steps and the lr_scheduler
    num_epochs = 3
    num_training_steps = num_epochs * len(train_dataloader)
    lr_scheduler = get_scheduler(
        "linear",
        optimizer=optimizer,
        num_warmup_steps=0,
        num_training_steps=num_training_steps
    )
    
    # training loop
    model.train()
    for epoch in tqdm.tqdm(range(num_epochs)):
        for batch in train_dataloader:
            ids, masks, labels = batch
            labels = labels.type(torch.float32)
            o = model(ids.to(device), masks.to(device))
            loss = criterion(torch.squeeze(o), labels.to(device))
            loss.backward()
    
            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()
    
    # save the tokenizer and the model in `./test-model/` directory 
    tokenizer.save_pretrained("./test-model/")
    model.save_pretrained("./test-model/", push_to_hub=False)
    

    Now create a new model in 🤗 and push all the contents inside the test-model to 🤗 model hub.

    To test the authenticity of the model you can try 🤗's pipeline to check if something is wrong.

    from transformers import pipeline
    
    # as this is classification so you need to mention `text-classification` as task
    classifier = pipeline('text-classification', model='tanmoyio/test-model')
    classifier("This movie was superb")
    

    It will output something like this

    [{'label': 'LABEL_0', 'score': 0.5571992993354797}]
    

    This is a real demo, check the model here - https://huggingface.co/tanmoyio/test-model. Let me know if you have further questions.