Trying to create a english story generator with BERT model and graph execution error came unwanted

Neo_TheOne · March 30, 2023, 9:18am

There seems to be a problem during the execution of the TensorFlow graph, specifically in the node model/tf_distil_bert_model/distilbert/embeddings/Gather_1 .

I am using winows 10, tensorflow and keras both version 2.12.0.

This is the code:
‘’’
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from transformers import DistilBertTokenizer, TFDistilBertModel
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers.legacy import Adam

Set the path to the directory containing the text files

data_dir = “C:/Users/Shah/AppData/Local/Programs/Python/Python310/LAB/Project/Data”

Initialize lists to hold the text and label data

text_data = []
label_data = []

Loop through each file in the directory

for filename in os.listdir(data_dir):
# Read the contents of the file
with open(os.path.join(data_dir, filename), “r”) as f:
lines = f.readlines()
label = lines[0].strip().split()[0] # extract the label from the first column of the first line
text = “”.join(lines[1:]).strip()

# Append the text and label data to their respective lists
text_data.append(text)
label_data.append(label)

Create a Pandas dataframe from the text and label data

data = pd.DataFrame({“text”: text_data, “label”: label_data})

Split the data into training and validation sets

train_data, valid_data = train_test_split(data, test_size=0.2, random_state=42)

Print out the ‘label’ column of the ‘train_data’ DataFrame

print(train_data[‘label’])
print(data.head(10))

Initialize the tokenizer and the pre-trained model

tokenizer = DistilBertTokenizer.from_pretrained(‘distilbert-base-uncased’)
MAX_LENGTH = 2048
transformer_model = TFDistilBertModel.from_pretrained(‘distilbert-base-uncased’, max_length=MAX_LENGTH)

Tokenize the training and validation data

train_sequences = [tokenizer.encode(seq, add_special_tokens=True) for seq in train_data[‘text’]]
valid_sequences = [tokenizer.encode(seq, add_special_tokens=True) for seq in valid_data[‘text’]]

Pad the sequences to ensure they are all the same length

train_sequences = tf.keras.preprocessing.sequence.pad_sequences(train_sequences, maxlen=MAX_LENGTH, padding=‘post’, truncating=‘post’, value=0)
valid_sequences = tf.keras.preprocessing.sequence.pad_sequences(valid_sequences, maxlen=MAX_LENGTH, padding=‘post’, truncating=‘post’, value=0)

Convert the sequences to tensors

train_inputs = tf.constant(train_sequences)
valid_inputs = tf.constant(valid_sequences)

Convert the labels to one-hot encoded format

num_classes = len(data[‘label’].unique())
train_labels = tf.one_hot(train_data[‘label’].astype(int), num_classes)
valid_labels = tf.one_hot(valid_data[‘label’].astype(‘int32’), num_classes)

print(“train_inputs shape:”, train_inputs.shape)
print(“train_labels shape:”, train_labels.shape)
print(“valid_inputs shape:”, valid_inputs.shape)
print(“valid_labels shape:”, valid_labels.shape)

print(“train_inputs type:”, type(train_inputs))
print(“train_labels type:”, type(train_labels))
print(“valid_inputs type:”, type(valid_inputs))
print(“valid_labels type:”, type(valid_labels))

Define the model architecture

input_layer = tf.keras.layers.Input(shape=(MAX_LENGTH,), dtype=‘int32’)
bert_layer = transformer_model(input_layer)[0]
output_layer = tf.keras.layers.Dense(units=num_classes, activation=‘softmax’)(bert_layer[:,0,:])
model = tf.keras.models.Model(inputs=input_layer, outputs=output_layer)

model.summary()

Define a callback function to monitor the validation loss and save the best model

checkpoint_filepath = ‘best_model.h5’
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath=checkpoint_filepath,
save_weights_only=True,
monitor=‘val_loss’,
mode=‘min’,
save_best_only=True)

Compile the model

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5), loss=‘categorical_crossentropy’, metrics=[‘accuracy’])

Train the model

epochs = 3
batch_size = 32
history = model.fit(train_inputs, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(valid_inputs, valid_labels), callbacks=[model_checkpoint_callback])

Save the final version of the model

model.save(‘final_model.h5’)

Generate a story using the trained model

prompt = “Once upon a time”
max_length = 256
generated_story = [tokenizer.encode(prompt, add_special_tokens=True)]
for i in range(max_length):
input_sequence = tf.constant(generated_story[-128:])
predicted_label = model.predict(input_sequence)[0]
next_token_id = np.argmax(predicted_label)
if next_token_id == tokenizer.sep_token_id:
break
generated_story.append(next_token_id)
generated_text = tokenizer.decode
‘’’

This is the error message:

Traceback (most recent call last):
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\LAB\Project\lm2.py”, line 97, in
history = model.fit(train_inputs, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(valid_inputs, valid_labels), callbacks=[model_checkpoint_callback])
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\eager\execute.py”, line 52, in quick_execute
tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
tensorflow.python.framework.errors_impl.InvalidArgumentError: Graph execution error:

Detected at node ‘model/tf_distil_bert_model/distilbert/embeddings/Gather_1’ defined at (most recent call last):
File “”, line 1, in
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\idlelib\run.py”, line 164, in main
ret = method(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\idlelib\run.py”, line 578, in runcode
exec(code, self.locals)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\LAB\Project\lm2.py”, line 97, in
history = model.fit(train_inputs, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(valid_inputs, valid_labels), callbacks=[model_checkpoint_callback])
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py”, line 1685, in fit
tmp_logs = self.train_function(iterator)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py”, line 1284, in train_function
return step_function(self, iterator)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py”, line 1268, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py”, line 1249, in run_step
outputs = model.train_step(data)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py”, line 1050, in train_step
y_pred = self(x, training=True)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py”, line 558, in call
return super().call(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\base_layer.py”, line 1145, in call
outputs = call_fn(inputs, *args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 96, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\functional.py”, line 512, in call
return self._run_internal_graph(inputs, training=training, mask=mask)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\functional.py”, line 669, in _run_internal_graph
outputs = node.layer(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py”, line 558, in call
return super().call(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\base_layer.py”, line 1145, in call
outputs = call_fn(inputs, *args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 96, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\modeling_tf_utils.py”, line 558, in run_call_with_unpacked_inputs
if isinstance(main_input, (tf.Tensor, KerasTensor)) or main_input is None:
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\models\distilbert\modeling_tf_distilbert.py”, line 559, in call
outputs = self.distilbert(
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\base_layer.py”, line 1145, in call
outputs = call_fn(inputs, *args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 96, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\modeling_tf_utils.py”, line 558, in run_call_with_unpacked_inputs
if isinstance(main_input, (tf.Tensor, KerasTensor)) or main_input is None:
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\models\distilbert\modeling_tf_distilbert.py”, line 409, in call
embedding_output = self.embeddings(input_ids, inputs_embeds=inputs_embeds) # (bs, seq_length, dim)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 65, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\base_layer.py”, line 1145, in call
outputs = call_fn(inputs, *args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py”, line 96, in error_handler
return fn(*args, **kwargs)
File “C:\Users\Shah\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\models\distilbert\modeling_tf_distilbert.py”, line 129, in call
position_embeds = tf.gather(params=self.position_embeddings, indices=position_ids)
Node: ‘model/tf_distil_bert_model/distilbert/embeddings/Gather_1’
indices[0,1984] = 1984 is not in [0, 512)
[[{{node model/tf_distil_bert_model/distilbert/embeddings/Gather_1}}]] [Op:__inference_train_function_20930]