List index out of range while saving a trained model

PLEASE HELP !!

I’m trying to fine-tune a pre-trained DistilBERT model from Huggingface using Tensorflow. Everything runs smoothly and the model builds and trains without error. But when I try to save the model it stops with the error “IndexError: list index out of range”. I’m using pycharm with TPU.

Any help would be much appreciated!

Code:

import h5py
import numpy as np
import pandas as pd
import pydot
import simplejson as simplejson
import tensorflow as tf
import os
from transformers import pipeline
from tensorflow import keras
train = pd.read_csv("train.csv")
print("Training_dataset_shape:", train.shape)

MAX_LEN=100

from transformers import BertTokenizer, TFBertModel, TFAutoModel,AutoTokenizer

#model_name = "bert-base-multilingual-cased"
#tokenizer = BertTokenizer.from_pretrained(model_name) # FC: this is the tokenizer we will use on our text data to tokenize it

model_name = "huggingface/distilbert-base-uncased-finetuned-mnli"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# FC we make a function in order to have a list of the id for each word and the separator
def encode_sentence(s):
   tokens = list(tokenizer.tokenize(s)) # FC: split the sentence into tokens that are either words or sub-words
   tokens.append('[SEP]') # FC: a token called [SEP] (=separator) is added to mark end of each sentence
   return tokenizer.convert_tokens_to_ids(tokens) # # FC: instead of returning the list of tokens, a list of each token ID is returned


def bert_encode(hypotheses, premises,
                tokenizer):  # FC: for RoBERTa we remove the input_type_ids from the inputs of the model

    num_examples = len(hypotheses)

    sentence1 = tf.ragged.constant([  # FC: constructs a constant ragged tensor. every entry has a different length
        encode_sentence(s) for s in np.array(hypotheses)])

    sentence2 = tf.ragged.constant([
        encode_sentence(s) for s in np.array(premises)])

    cls = [tokenizer.convert_tokens_to_ids(['[CLS]'])] * sentence1.shape[
        0]  # FC: list of IDs for the token '[CLS]' to denote each beginning

    input_word_ids = tf.concat([cls, sentence1, sentence2],
                               axis=-1)  # FC: put everything together. every row still has a different length.

    # input_word_ids2 = tf.concat([cls, sentence2, sentence1], axis=-1)

    # input_word_ids = tf.concat([input_word_ids1, input_word_ids2], axis=0) # we duplicate the dataset inverting sentence 1 and 2

    input_mask = tf.ones_like(
        input_word_ids).to_tensor()  # FC: first, a tensor with just ones in it is constructed in the same size as input_word_ids. Then, by applying to_tensor the ends of each row are padded with zeros to give every row the same length
    type_cls = tf.zeros_like(cls)  
    type_s1 = tf.zeros_like(sentence1)
    type_s2 = tf.ones_like(sentence2)  
    input_type_ids = tf.concat(
        [type_cls, type_s1, type_s2], axis=-1).to_tensor()  # FC: concatenates everything and again adds padding
    inputs = {
        'input_word_ids': input_word_ids.to_tensor(),  # FC: input_word_ids hasn't been padded yet - do it here now
        'input_mask': input_mask}

    return inputs

train_input = bert_encode(train.premise.values, train.hypothesis.values, tokenizer)
# total_train_input = bert_encode(total_train.premise.values, total_train.hypothesis.values, tokenizer)


max_len = 136  #: FC 50 in the initial tutorial


def build_model():
    encoder = TFAutoModel.from_pretrained(model_name)
    input_word_ids = tf.keras.Input(shape(max_len,),dtype=tf.int32,name="input_word_ids")  
    input_mask = tf.keras.Input(shape=(max_len,), dtype=tf.int32,name="input_mask") 
    embedding = encoder([input_word_ids, input_mask])[0]  # FC: add_input_type_ids for the BERT model
    output = tf.keras.layers.Dense(3, activation='softmax')(embedding[:, 0, :])
    model = tf.keras.Model(inputs=[input_word_ids, input_mask],outputs=output)  # FC: based on the code in the lines above, a model is now constructed and passed into the variable model
    model.compile(tf.keras.optimizers.Adam(learning_rate=1e5),loss='sparse_categorical_crossentropy', metrics=['accuracy'])  
    return model


with strategy.scope(): 
    model = build_model() 
    model.summary()       


# print("model.layers[2]:-------". model.layers[2])
# model.layers[2].trainable=True


for key in train_input.keys():
    train_input[key] = train_input[key][:,:max_len]

print("train the model now")
early_stop = tf.keras.callbacks.EarlyStopping(patience=3,restore_best_weights=True)


model.fit(train_input, train.label.values, epochs = 3, verbose = 1, validation_split = 0.01,
         batch_size=16*strategy.num_replicas_in_sync,
          callbacks=[early_stop])
print("Training is completeted")


model.save("saved_model/trackers/1")```

Try using model.save_weights() instead of model.save()

But i want to save the whole model as i need to deploy this in production.

Hi Chitra,

I ran into this problem as well and HuggingFace has shown that it can be resolved by changing how the inputs are passed to the transformer layer:

If you change this line:

embedding = encoder([input_word_ids, input_mask])[0]  # FC: add_input_type_ids for the BERT model

To this (i.e. pass the inputs as separate arguments rather than as a list):

embedding = encoder(input_word_ids, input_mask)[0]  # FC: add_input_type_ids for the BERT model

Are you able to save()?

Cheers,
Chris

Thank you so much criss. i will try this.