ValueError When Initializing FactorizedTopK in TensorFlow Recommenders Model

I’m working on a recommendation system using TensorFlow and TensorFlow Recommenders (TFRS), and I’ve run into a perplexing issue during the initialization of the FactorizedTopK metric within my RecommendationModel. Specifically, the error emerges when the model attempts to add a weight named “counter” in the Streaming layer of tfrs.metrics.FactorizedTopK. I am following this following documentation to make my reccomenation model: Building deep retrieval models  |  TensorFlow Recommenders

Here’s the relevant section of my model code:

programs = tf_dataset.map(lambda x: {
    "program_id": x["program_id"],
    "name": x["name"],
    "Country": x["Country"],
    "Studylvl": x["Studylvl"],
    "majors": x["majors"],
})


desired_index = 20

desired_data = next(iter(programs.skip(desired_index).take(1)))

print("Program ID:", desired_data["program_id"].numpy().decode())
print("Name:", desired_data["name"].numpy().decode())
print("Country:", desired_data["Country"].numpy().decode())
print("Study Level:", desired_data["Studylvl"].numpy().decode())
print("Majors:", desired_data["majors"].numpy().decode())


Program ID: 157027
Name: m.s.e in robotics
Country: united states of america
Study Level: postgraduate
Majors: automation science and engineering, biorobotics, control and dynamical systems, medical robotics and computer integrated surgical , perception and cognitive systems, general robotics
class ProgramModel(tf.keras.Model):
    def __init__(self):
        super().__init__()

        max_tokens = 10_000
        embedding_dimension = 32

        self.program_id_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_program_id, mask_token=None),
            tf.keras.layers.Embedding(len(unique_program_id) + 1, embedding_dimension),
        ])

        self.name_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_program_name, mask_token=None),
            tf.keras.layers.Embedding(len(unique_program_name) + 1, embedding_dimension),
        ])

        self.name_text_vectorizer = tf.keras.layers.TextVectorization(max_tokens=max_tokens, output_mode='int', output_sequence_length=32)

        self.name_text_embedding = tf.keras.Sequential([
            self.name_text_vectorizer,
            tf.keras.layers.Embedding(max_tokens, embedding_dimension, mask_zero=True),
            tf.keras.layers.GlobalAveragePooling1D(),
        ])

        self.name_text_vectorizer.adapt(unique_program_name)

        self.country_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_countries, mask_token=None),
            tf.keras.layers.Embedding(len(unique_countries) + 1, embedding_dimension),
        ])

        self.study_lvl_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_study_lvl, mask_token=None),
            tf.keras.layers.Embedding(len(unique_study_lvl) + 1, embedding_dimension),
        ])


        self.major_text_vectorizer = tf.keras.layers.TextVectorization(max_tokens=max_tokens, output_mode='int', output_sequence_length=32)


        self.major_text_embedding = tf.keras.Sequential([
            self.major_text_vectorizer,
            tf.keras.layers.Embedding(max_tokens, embedding_dimension, mask_zero=True),
            tf.keras.layers.GlobalAveragePooling1D()

        ])

        self.major_text_vectorizer.adapt(majors)




    def call(self, inputs):
        return tf.concat([
            self.country_embedding(inputs["Country"]),
            self.study_lvl_embedding(inputs["Studylvl"]),
            self.name_embedding(inputs["name"]),
            self.name_text_embedding(inputs["name"]),
            self.major_text_embedding(inputs["majors"]),
            self.program_id_embedding(inputs["program_id"]),
        ], axis=1)

class CandidateModel(tf.keras.Model):
    def __init__(self, layer_sizes):
        super().__init__()

        self.embedding_model = ProgramModel()

        self.dense_layers = tf.keras.Sequential()

        for layer_size in layer_sizes[:-1]:
            self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))
            self.dense_layers.add(tf.keras.layers.BatchNormalization())

        for layer_size in layer_sizes[-1:]:
            self.dense_layers.add(tf.keras.layers.Dense(layer_size))

    def call(self, inputs):
        feature_embedding = self.embedding_model(inputs)
        return self.dense_layers(feature_embedding)

class RecommendationModel(tfrs.models.Model):
    def __init__(self, layer_sizes):
        super().__init__()
        self.query_model = QueryModel(layer_sizes)
        self.candidate_model = CandidateModel(layer_sizes) 
        self.task = tfrs.tasks.Retrieval(
            metrics= tfrs.metrics.FactorizedTopK(
                candidates=programs.batch(128).map(self.candidate_model)
            )
        )

    def compute_loss(self, features, training=False):
        query_embeddings = self.query_model({
            "Country": features["Country"],
            "Studylvl": features["Studylvl"],
            "name": features["name"],
            "majors": features["majors"],
        })
        candidate_embeddings = self.candidate_model({
            "Country": features["Country"],
            "Studylvl": features["Studylvl"],
            "name": features["name"],
            "majors": features["majors"],
            "program_id": features["program_id"],
        })

        return self.task(query_embeddings, candidate_embeddings)


model = RecommendationModel([128, 64, 32])
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
)

model.fit(
    x=train.batch(2000),
    epochs=20,
    verbose=True,
    validation_data=test.batch(500)
)

Upon attempting to initialize the RecommendationModel, I encounter the following ValueError:

ValueError: Cannot convert '('c', 'o', 'u', 'n', 't', 'e', 'r')' to a shape. Found invalid entry 'c' of type '<class 'str'>'.

Here is the full ErrorLog:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[64], line 1
----> 1 model = RecommendationModel([128, 64, 32])
      2 model.compile(
      3     optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
      4 )
      6 # Train the model

Cell In[63], line 7, in RecommendationModel.__init__(self, layer_sizes)
      4 self.query_model = QueryModel(layer_sizes)
      5 self.candidate_model = CandidateModel(layer_sizes) 
      6 self.task = tfrs.tasks.Retrieval(
----> 7     metrics= tfrs.metrics.FactorizedTopK(
      8         candidates=programs.batch(128).map(self.candidate_model)
      9     )
     10 )

File /usr/local/lib/python3.9/site-packages/tensorflow_recommenders/metrics/factorized_top_k.py:79, in FactorizedTopK.__init__(self, candidates, ks, name)
     75 super().__init__(name=name)
     77 if isinstance(candidates, tf.data.Dataset):
     78   candidates = (
---> 79       layers.factorized_top_k.Streaming(k=max(ks))
     80       .index_from_dataset(candidates)
     81   )
     83 self._ks = ks
     84 self._candidates = candidates

File /usr/local/lib/python3.9/site-packages/tensorflow_recommenders/layers/factorized_top_k.py:376, in Streaming.__init__(self, query_model, k, handle_incomplete_batches, num_parallel_calls, sorted_order)
    373 self._num_parallel_calls = num_parallel_calls
    374 self._sorted = sorted_order
--> 376 self._counter = self.add_weight("counter", dtype=tf.int32, trainable=False)

File /usr/local/lib/python3.9/site-packages/keras/src/layers/layer.py:499, in Layer.add_weight(self, shape, initializer, dtype, trainable, regularizer, constraint, name)
    497 initializer = initializers.get(initializer)
    498 with backend.name_scope(self.name, caller=self):
--> 499     variable = backend.Variable(
    500         initializer=initializer,
    501         shape=shape,
    502         dtype=dtype,
    503         trainable=trainable,
    504         name=name,
    505     )
    506 # Will be added to layer.losses
    507 variable.regularizer = regularizers.get(regularizer)

File /usr/local/lib/python3.9/site-packages/keras/src/backend/common/variables.py:74, in KerasVariable.__init__(self, initializer, shape, dtype, trainable, name)
     72 else:
     73     if callable(initializer):
---> 74         shape = self._validate_shape(shape)
     75         value = initializer(shape, dtype=dtype)
     76     else:

File /usr/local/lib/python3.9/site-packages/keras/src/backend/common/variables.py:97, in KerasVariable._validate_shape(self, shape)
     96 def _validate_shape(self, shape):
---> 97     shape = standardize_shape(shape)
     98     if None in shape:
     99         raise ValueError(
    100             "Shapes used to initialize variables must be "
    101             "fully-defined (no `None` dimensions). Received: "
    102             f"shape={shape} for variable path='{self.path}'"
    103         )

File /usr/local/lib/python3.9/site-packages/keras/src/backend/common/variables.py:426, in standardize_shape(shape)
    424     continue
    425 if not is_int_dtype(type(e)):
--> 426     raise ValueError(
    427         f"Cannot convert '{shape}' to a shape. "
    428         f"Found invalid entry '{e}' of type '{type(e)}'. "
    429     )
    430 if e < 0:
    431     raise ValueError(
    432         f"Cannot convert '{shape}' to a shape. "
    433         "Negative dimensions are not allowed."
    434     )

ValueError: Cannot convert '('c', 'o', 'u', 'n', 't', 'e', 'r')' to a shape. Found invalid entry 'c' of type '<class 'str'>'. 

This error suggests an issue with interpreting parameters during weight initialization within TensorFlow or TFRS’s internal code, but I’m at a loss for how to resolve it. I’ve confirmed that my inputs don’t contain any NaN values or other obvious issues, and my learning rate seems reasonable.

Has anyone encountered a similar issue or have suggestions on what might be going wrong? I’m using TensorFlow 2.x and TensorFlow Recommenders 0.x. Any insights or guidance would be greatly appreciated!

1 Like

Instead of

    self.task = tfrs.tasks.Retrieval(
        metrics= tfrs.metrics.FactorizedTopK(
            candidates=programs.batch(128).map(self.candidate_model)
        )

try

    self.task = tfrs.tasks.Retrieval(
        metrics= tfrs.metrics.FactorizedTopK(
            candidates=programs.batch(128).map(lambda c: (c["program_id"], self.candidate_model(c)))
        )

Hey, Thanks for the response. I tried doing that but that didn’t work either. I get the same error

Cell In[22], line 7, in RecommendationModel.init(self, layer_sizes)
4 self.query_model = QueryModel(layer_sizes)
5 self.candidate_model = CandidateModel(layer_sizes)
6 self.task = tfrs.tasks.Retrieval(
----> 7 metrics= tfrs.metrics.FactorizedTopK(
8 candidates=programs.batch(128).map(lambda c: (c[“program_id”], self.candidate_model(c)))
9 )
10 )

ValueError: Cannot convert '('c', 'o', 'u', 'n', 't', 'e', 'r')' to a shape. Found invalid entry 'c' of type '<class 'str'>'. 

What is the output of pip show keras in your notebook?

Noting here that I see a potential solution on StackOverflow where the issue is also posted.

I’ve tracked down this bug in the implementation of the tfrs.layers.factorized_top_k module in TensorFlow Recommenders.

In recommenders/tensorflow_recommenders/layers/factorized_top_k.py, we find:

self._counter = self.add_weight("counter", dtype=tf.int32, trainable=False)

Note that the first argument passed to add_weight is the “counter” string.

In the Keras 2.15.0 implementation of add_weight, we find:

    def add_weight(
        self,
        name=None,
        shape=None,
        dtype=None,
        initializer=None,
        regularizer=None,
        trainable=None,
        constraint=None,
        use_resource=None,
        synchronization=tf.VariableSynchronization.AUTO,
        aggregation=tf.VariableAggregation.NONE,
        **kwargs,
    ):

Note that first argument is the name of the weight variable. Passing “counter” as the name in this manner works fine in Keras 2.15.0, and we won’t get the ValueError: Cannot convert '('c', 'o', 'u', 'n', 't', 'e', 'r')' to a shape error.

But the Keras 3.1.1 implementation of add_weight expects shape as the first argument:

    def add_weight(
        self,
        shape=None,
        initializer=None,
        dtype=None,
        trainable=True,
        regularizer=None,
        constraint=None,
        name=None,
    ):

So when it goes on to check for a valid shape, the shape is set to “counter”, which is not valid, and it results in ValueError: Cannot convert '('c', 'o', 'u', 'n', 't', 'e', 'r')' to a shape.

The bug, therefore, is in TensorFlow Recommenders’ implementation of the tfrs.layers.factorized_top_k module.