将具有自定义转发功能的模型上传到 huggingface 模型中心?

Uploading models with custom forward functions to the huggingface model hub?

是否可以将具有自定义转发功能的模型上传到 huggingface 模型中心?

如果你的模型是普通形式,我可以看到如何做,但是看不到如何自定义前向函数和做?

绝对是。您可以添加任意数量的 layers/customisations 创建自己的模型,并将其上传到模型中心。给大家看个demo,把整个过程描述一下。

正在将自定义模型上传到模型中心

import tqdm

from datasets import load_dataset
import transformers
from transformers import AutoTokenizer, AutoModel, BertConfig
from transformers import AdamW
from transformers import get_scheduler

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# setting device to `cuda` if gpu exists
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

# initialising the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("google/bert_uncased_L-2_H-128_A-2")
bert = AutoModel.from_pretrained("google/bert_uncased_L-2_H-128_A-2")


def tokenize_function(examples):
    '''Function for tokenizing raw texts'''
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)


# downloading IMDB dataset from  `datasets`
raw_datasets = load_dataset("imdb")

# Running tokenizing function on the raw texts
tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)

# for simplicity I have taken only the train split
tokenized_datasets = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))


# Now lets create the torch Dataset class
class IMDBClassificationDataset(Dataset):

    def __init__(self, dataset):
        self.dataset = dataset

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        d = self.dataset[idx]

        ids = torch.tensor(d['input_ids'])
        mask = torch.tensor(d['attention_mask'])
        label = torch.tensor(d['label'])
        return ids, mask, label

# Preparing the dataset and the Dataloader
dataset = IMDBClassificationDataset(tokenized_datasets)
train_dataloader = DataLoader(dataset, shuffle=True, batch_size=8)

# Now lets create a custom Bert model
class CustomBert(transformers.PreTrainedModel):
    '''Custom model class
       ------------------
       Now the trick is not to inherit the class from `nn.Module` but `transformers.PretrainedModel`
       Also you need to pass the model config during initialisation'''

    def __init__(self, bert):
        super(CustomBert, self).__init__(config=BertConfig.from_pretrained('google/bert_uncased_L-2_H-128_A-2'))
        self.bert = bert

        self.l1 = nn.Linear(128, 1)

        self.do = nn.Dropout(0.1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, sent_id, mask):
        '''For simplicity I have added only one linear layer, you can create any type of network you want'''
        
        bert_out = self.bert(sent_id, attention_mask=mask)
        o = bert_out.last_hidden_state[:,0,:]
        o = self.do(o)
        o = self.relu(o)
        o = self.l1(o)
        o = self.sigmoid(o)
        return o

# initialising model, loss and optimizer
model = CustomBert(bert)
model.to(device)
criterion = torch.nn.BCELoss()
optimizer = AdamW(model.parameters(), lr=5e-5)

# setting epochs, num_training_steps and the lr_scheduler
num_epochs = 3
num_training_steps = num_epochs * len(train_dataloader)
lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps
)

# training loop
model.train()
for epoch in tqdm.tqdm(range(num_epochs)):
    for batch in train_dataloader:
        ids, masks, labels = batch
        labels = labels.type(torch.float32)
        o = model(ids.to(device), masks.to(device))
        loss = criterion(torch.squeeze(o), labels.to(device))
        loss.backward()

        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

# save the tokenizer and the model in `./test-model/` directory 
tokenizer.save_pretrained("./test-model/")
model.save_pretrained("./test-model/", push_to_hub=False)

现在创建一个新模型并将 test-model 中的所有内容推送到模型中心。

要测试模型的真实性,您可以尝试 pipeline 来检查是否有问题。

from transformers import pipeline

# as this is classification so you need to mention `text-classification` as task
classifier = pipeline('text-classification', model='tanmoyio/test-model')
classifier("This movie was superb")

它会输出这样的东西

[{'label': 'LABEL_0', 'score': 0.5571992993354797}]

这是一个真实的演示,请在此处查看模型 - https://huggingface.co/tanmoyio/test-model。如果您还有其他问题,请告诉我。