How to jointly predict a sequence and its associated scoremo

Dear all. I’m trying to train a Seq2Seq keras model to predict an output sequence score along with the output sequence. However, I’m having throuble with this because predicting the one-dimensional score seems to be incompatible with the sequence prediction task. I’ve tried by repeating the output score according to the number of steps of the output sequence without success. My last attempt was to use the TimeDistributed wrapper, but it also failed. Please help me with the solution to this issue, and to understand what I am doing bad. Next I show you a code sample and toy data that repreduces the problem. Thanks in advance (Tensorflow2.6, Python 3.9):

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import  plot_model
import numpy as np
import re, random, string


train_pairs = [('fence is a', ('[start] border [end]', 2.321928094887362)),
 ('add up column of number causes',
  ('[start] get sum [end]', 3.1699250014423126)),
 ('pluto defined as',
  ('[start] ninth planet from sun [end]', 1.5849625007211563)),
 ('sit down has prerequisite',
  ('[start] something to sit on [end]', 2.321928094887362)),
 ('eat has subevent', ('[start] make lot of noise [end]', 2.0)),
 ('hang glider is a', ('[start] minimal aircraft [end]', 2.0)),
 ('staircase used for', ('[start] go downstairs [end]', 2.321928094887362)),
 ('go to work has prerequisite', ('[start] open front door [end]', 2.0)),
 ('elastic band used for',
  ('[start] hold two or more object together [end]', 2.321928094887362)),
 ('condom is a', ('[start] call rubber [end]', 1.5849625007211563)),
 ('curiosity causes desire', ('[start] learn [end]', 2.321928094887362)),
 ('bird capable of',
  ('[start] build their nest on strong branch [end]', 1.5849625007211563)),
 ('join club motivated by goal', ('[start] find friend [end]', 2.0)),
('join club motivated by goal', ('[start] find friend [end]', 2.0)),
 ('start fire causes', ('[start] heat [end]', 2.0)),
 ('coffee has property', ('[start] have distinctive aroma [end]', 2.0)),
 ('read newspaper has a',
  ('[start] effect of learn about event [end]', 1.5849625007211563)),
 ('foot used for', ('[start] stand [end]', 2.321928094887362)),
 ('jello receives action',
  ('[start] make from hoof and connective tissue [end]', 1.5849625007211563)),
 ('ranch used for', ('[start] clean animal [end]', 2.0)),
 ('gain more land has subevent',
  ('[start] increase maintainance [end]', 1.5849625007211563))]

sequence_length = 10
n_epochs = 1
embedding_dim = 500
n_states = 5
max_features=15000
chunk_size = 5

strip_chars = string.punctuation
strip_chars = strip_chars.replace("[", "")
strip_chars = strip_chars.replace("]", "")

def custom_standardization(input_string):
    lowercase = tf.strings.lower(input_string)
    return tf.strings.regex_replace(lowercase, "[%s]" % re.escape(strip_chars), "")
input_vectorizer = layers.experimental.preprocessing.TextVectorization(
    output_mode="int", max_tokens=max_features,
    output_sequence_length=sequence_length, standardize=custom_standardization)

output_vectorizer = layers.experimental.preprocessing.TextVectorization(
    output_mode="int", max_tokens=max_features,
    output_sequence_length=sequence_length + 1, standardize=custom_standardization)

train_in_texts = [pair[0] for pair in train_pairs]
train_out_texts = [pair[1][0] for pair in train_pairs]

input_vectorizer.adapt(train_in_texts)
output_vectorizer.adapt(train_out_texts)

def format_dataset(in_phr, out_phr, label=None):
    in_phr = input_vectorizer(in_phr)
    out_phr = output_vectorizer(out_phr)
    return ({"encoder_inputs": in_phr, "decoder_inputs": out_phr[:, :-1],}, (out_phr[:, 1:], label))
def make_dataset(pairs):
    in_phr_texts, targets = zip(*pairs)
    in_phr_texts = list(in_phr_texts)
    out_phr_texts = [t[0] for t in targets]
    labels = [[t[1]] for t in targets]
    dataset = tf.data.Dataset.from_tensor_slices((in_phr_texts, out_phr_texts, labels))

    dataset = dataset.batch(chunk_size)
    dataset = dataset.map(format_dataset)
    return dataset.shuffle(2048).prefetch(16).cache()


def build_rnn_encdec_verifier_model(
        vocab_size, sequence_length, embedding_dims=100, n_states=10):
    sub_pred_seq = layers.Input(shape=(sequence_length, ), name='encoder_inputs')
    enc_embedding = layers.Embedding(
                                    vocab_size + 1,
                                    embedding_dims,
                                     input_length = sequence_length,
                                     mask_zero=True,
                                     name="Enc_emb")(sub_pred_seq)
    _, state_h, state_c = layers.LSTM(
                                    n_states, return_state=True,
                                    #unroll=True,
                                    name="Enc_LSTM")(
                                        enc_embedding)
    encoder_states = [state_h, state_c]
    decoder_inputs = keras.Input(shape=(sequence_length, ), name='decoder_inputs')
    dec_embedding = layers.Embedding(
                                    vocab_size + 1,
                                    embedding_dims,
                                     input_length = sequence_length,
                                     mask_zero=True,
                                     name="Dec_emb")(decoder_inputs)
    decoder_out, last_state, last_cell = layers.LSTM(
                            n_states,
                            return_sequences=True,
                            return_state=True,
                            name="Dec_LSTM")(dec_embedding,
                                             initial_state=encoder_states)
    obj_out = layers.Dense(vocab_size,
                            name="obj_MLP",
                            )(decoder_out)
    out_label = layers.TimeDistributed(layers.Dense(1,
                            name="labels_MLP",
                            ))(decoder_out)
    model = keras.Model([sub_pred_seq, decoder_inputs], [obj_out, out_label])
    return model


train_ds = make_dataset(train_pairs)

model = build_rnn_encdec_verifier_model(
    vocab_size=max_features,
    embedding_dims=embedding_dim,
    sequence_length=sequence_length,
    n_states=n_states)
model.summary()
model.compile(optimizer='adam',
              loss=[keras.losses.SparseCategoricalCrossentropy(), 'mae'],
              metrics=['acc', 'mse'])

model.fit(train_ds,
        epochs=n_epochs,
        verbose=1)

This gives me the following error:

ValueError: Can not squeeze dim[1], expected a dimension of 1, got 10 for '{{node mean_absolute_error/weighted_loss/Squeeze}} = Squeeze[T=DT_FLOAT, squeeze_dims=[-1]](Cast_2)' with input shapes: [?,10].

Help please

Hi @iarroyof

Welcome to the TensorFlow Forum!

It seems there is preprocessed dataset shape and model expected shape mismatch error. Please refer to this Seq2Seq text Translate model which might be helpful to fix the error. Let us know if this issue still persists. Thank you.