ValueError When Initializing FactorizedTopK in TensorFlow Recommenders Model

sulav_dahal · March 13, 2024, 5:08am

I’m working on a recommendation system using TensorFlow and TensorFlow Recommenders (TFRS), and I’ve run into a perplexing issue during the initialization of the FactorizedTopK metric within my RecommendationModel. Specifically, the error emerges when the model attempts to add a weight named “counter” in the Streaming layer of tfrs.metrics.FactorizedTopK. I am following this following documentation to make my reccomenation model: Building deep retrieval models | TensorFlow Recommenders

Here’s the relevant section of my model code:

programs = tf_dataset.map(lambda x: {
    "program_id": x["program_id"],
    "name": x["name"],
    "Country": x["Country"],
    "Studylvl": x["Studylvl"],
    "majors": x["majors"],
})


desired_index = 20

desired_data = next(iter(programs.skip(desired_index).take(1)))

print("Program ID:", desired_data["program_id"].numpy().decode())
print("Name:", desired_data["name"].numpy().decode())
print("Country:", desired_data["Country"].numpy().decode())
print("Study Level:", desired_data["Studylvl"].numpy().decode())
print("Majors:", desired_data["majors"].numpy().decode())


Program ID: 157027
Name: m.s.e in robotics
Country: united states of america
Study Level: postgraduate
Majors: automation science and engineering, biorobotics, control and dynamical systems, medical robotics and computer integrated surgical , perception and cognitive systems, general robotics

class ProgramModel(tf.keras.Model):
    def __init__(self):
        super().__init__()

        max_tokens = 10_000
        embedding_dimension = 32

        self.program_id_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_program_id, mask_token=None),
            tf.keras.layers.Embedding(len(unique_program_id) + 1, embedding_dimension),
        ])

        self.name_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_program_name, mask_token=None),
            tf.keras.layers.Embedding(len(unique_program_name) + 1, embedding_dimension),
        ])

        self.name_text_vectorizer = tf.keras.layers.TextVectorization(max_tokens=max_tokens, output_mode='int', output_sequence_length=32)

        self.name_text_embedding = tf.keras.Sequential([
            self.name_text_vectorizer,
            tf.keras.layers.Embedding(max_tokens, embedding_dimension, mask_zero=True),
            tf.keras.layers.GlobalAveragePooling1D(),
        ])

        self.name_text_vectorizer.adapt(unique_program_name)

        self.country_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_countries, mask_token=None),
            tf.keras.layers.Embedding(len(unique_countries) + 1, embedding_dimension),
        ])

        self.study_lvl_embedding = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_study_lvl, mask_token=None),
            tf.keras.layers.Embedding(len(unique_study_lvl) + 1, embedding_dimension),
        ])


        self.major_text_vectorizer = tf.keras.layers.TextVectorization(max_tokens=max_tokens, output_mode='int', output_sequence_length=32)


        self.major_text_embedding = tf.keras.Sequential([
            self.major_text_vectorizer,
            tf.keras.layers.Embedding(max_tokens, embedding_dimension, mask_zero=True),
            tf.keras.layers.GlobalAveragePooling1D()

        ])

        self.major_text_vectorizer.adapt(majors)




    def call(self, inputs):
        return tf.concat([
            self.country_embedding(inputs["Country"]),
            self.study_lvl_embedding(inputs["Studylvl"]),
            self.name_embedding(inputs["name"]),
            self.name_text_embedding(inputs["name"]),
            self.major_text_embedding(inputs["majors"]),
            self.program_id_embedding(inputs["program_id"]),
        ], axis=1)

class CandidateModel(tf.keras.Model):
    def __init__(self, layer_sizes):
        super().__init__()

        self.embedding_model = ProgramModel()

        self.dense_layers = tf.keras.Sequential()

        for layer_size in layer_sizes[:-1]:
            self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))
            self.dense_layers.add(tf.keras.layers.BatchNormalization())

        for layer_size in layer_sizes[-1:]:
            self.dense_layers.add(tf.keras.layers.Dense(layer_size))

    def call(self, inputs):
        feature_embedding = self.embedding_model(inputs)
        return self.dense_layers(feature_embedding)

class RecommendationModel(tfrs.models.Model):
    def __init__(self, layer_sizes):
        super().__init__()
        self.query_model = QueryModel(layer_sizes)
        self.candidate_model = CandidateModel(layer_sizes) 
        self.task = tfrs.tasks.Retrieval(
            metrics= tfrs.metrics.FactorizedTopK(
                candidates=programs.batch(128).map(self.candidate_model)
            )
        )

    def compute_loss(self, features, training=False):
        query_embeddings = self.query_model({
            "Country": features["Country"],
            "Studylvl": features["Studylvl"],
            "name": features["name"],
            "majors": features["majors"],
        })
        candidate_embeddings = self.candidate_model({
            "Country": features["Country"],
            "Studylvl": features["Studylvl"],
            "name": features["name"],
            "majors": features["majors"],
            "program_id": features["program_id"],
        })

        return self.task(query_embeddings, candidate_embeddings)


model = RecommendationModel([128, 64, 32])
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
)

model.fit(
    x=train.batch(2000),
    epochs=20,
    verbose=True,
    validation_data=test.batch(500)
)

Upon attempting to initialize the RecommendationModel, I encounter the following ValueError:

ValueError: Cannot convert '('c', 'o', 'u', 'n', 't', 'e', 'r')' to a shape. Found invalid entry 'c' of type '<class 'str'>'.

Here is the full ErrorLog:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[64], line 1
----> 1 model = RecommendationModel([128, 64, 32])
      2 model.compile(
      3     optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
      4 )
      6 # Train the model

Cell In[63], line 7, in RecommendationModel.__init__(self, layer_sizes)
      4 self.query_model = QueryModel(layer_sizes)
      5 self.candidate_model = CandidateModel(layer_sizes) 
      6 self.task = tfrs.tasks.Retrieval(
----> 7     metrics= tfrs.metrics.FactorizedTopK(
      8         candidates=programs.batch(128).map(self.candidate_model)
      9     )
     10 )

File /usr/local/lib/python3.9/site-packages/tensorflow_recommenders/metrics/factorized_top_k.py:79, in FactorizedTopK.__init__(self, candidates, ks, name)
     75 super().__init__(name=name)
     77 if isinstance(candidates, tf.data.Dataset):
     78   candidates = (
---> 79       layers.factorized_top_k.Streaming(k=max(ks))
     80       .index_from_dataset(candidates)
     81   )
     83 self._ks = ks
     84 self._candidates = candidates

File /usr/local/lib/python3.9/site-packages/tensorflow_recommenders/layers/factorized_top_k.py:376, in Streaming.__init__(self, query_model, k, handle_incomplete_batches, num_parallel_calls, sorted_order)
    373 self._num_parallel_calls = num_parallel_calls
    374 self._sorted = sorted_order
--> 376 self._counter = self.add_weight("counter", dtype=tf.int32, trainable=False)

File /usr/local/lib/python3.9/site-packages/keras/src/layers/layer.py:499, in Layer.add_weight(self, shape, initializer, dtype, trainable, regularizer, constraint, name)
    497 initializer = initializers.get(initializer)
    498 with backend.name_scope(self.name, caller=self):
--> 499     variable = backend.Variable(
    500         initializer=initializer,
    501         shape=shape,
    502         dtype=dtype,
    503         trainable=trainable,
    504         name=name,
    505     )
    506 # Will be added to layer.losses
    507 variable.regularizer = regularizers.get(regularizer)

File /usr/local/lib/python3.9/site-packages/keras/src/backend/common/variables.py:74, in KerasVariable.__init__(self, initializer, shape, dtype, trainable, name)
     72 else:
     73     if callable(initializer):
---> 74         shape = self._validate_shape(shape)
     75         value = initializer(shape, dtype=dtype)
     76     else:

File /usr/local/lib/python3.9/site-packages/keras/src/backend/common/variables.py:97, in KerasVariable._validate_shape(self, shape)
     96 def _validate_shape(self, shape):
---> 97     shape = standardize_shape(shape)
     98     if None in shape:
     99         raise ValueError(
    100             "Shapes used to initialize variables must be "
    101             "fully-defined (no `None` dimensions). Received: "
    102             f"shape={shape} for variable path='{self.path}'"
    103         )

File /usr/local/lib/python3.9/site-packages/keras/src/backend/common/variables.py:426, in standardize_shape(shape)
    424     continue
    425 if not is_int_dtype(type(e)):
--> 426     raise ValueError(
    427         f"Cannot convert '{shape}' to a shape. "
    428         f"Found invalid entry '{e}' of type '{type(e)}'. "
    429     )
    430 if e < 0:
    431     raise ValueError(
    432         f"Cannot convert '{shape}' to a shape. "
    433         "Negative dimensions are not allowed."
    434     )

ValueError: Cannot convert '('c', 'o', 'u', 'n', 't', 'e', 'r')' to a shape. Found invalid entry 'c' of type '<class 'str'>'.

This error suggests an issue with interpreting parameters during weight initialization within TensorFlow or TFRS’s internal code, but I’m at a loss for how to resolve it. I’ve confirmed that my inputs don’t contain any NaN values or other obvious issues, and my learning rate seems reasonable.

Has anyone encountered a similar issue or have suggestions on what might be going wrong? I’m using TensorFlow 2.x and TensorFlow Recommenders 0.x. Any insights or guidance would be greatly appreciated!

rcauvin · March 13, 2024, 1:39pm

Instead of

    self.task = tfrs.tasks.Retrieval(
        metrics= tfrs.metrics.FactorizedTopK(
            candidates=programs.batch(128).map(self.candidate_model)
        )

try

    self.task = tfrs.tasks.Retrieval(
        metrics= tfrs.metrics.FactorizedTopK(
            candidates=programs.batch(128).map(lambda c: (c["program_id"], self.candidate_model(c)))
        )

sulav_dahal · March 16, 2024, 5:06pm

Hey, Thanks for the response. I tried doing that but that didn’t work either. I get the same error

Cell In[22], line 7, in RecommendationModel.init(self, layer_sizes)
4 self.query_model = QueryModel(layer_sizes)
5 self.candidate_model = CandidateModel(layer_sizes)
6 self.task = tfrs.tasks.Retrieval(
----> 7 metrics= tfrs.metrics.FactorizedTopK(
8 candidates=programs.batch(128).map(lambda c: (c[“program_id”], self.candidate_model(c)))
9 )
10 )

ValueError: Cannot convert '('c', 'o', 'u', 'n', 't', 'e', 'r')' to a shape. Found invalid entry 'c' of type '<class 'str'>'.

rcauvin · March 17, 2024, 7:11pm

What is the output of pip show keras in your notebook?

rcauvin · March 18, 2024, 8:52pm

Noting here that I see a potential solution on StackOverflow where the issue is also posted.

rcauvin · April 6, 2024, 6:30pm

I’ve tracked down this bug in the implementation of the tfrs.layers.factorized_top_k module in TensorFlow Recommenders.

In recommenders/tensorflow_recommenders/layers/factorized_top_k.py, we find:

self._counter = self.add_weight("counter", dtype=tf.int32, trainable=False)

Note that the first argument passed to add_weight is the “counter” string.

In the Keras 2.15.0 implementation of add_weight, we find:

    def add_weight(
        self,
        name=None,
        shape=None,
        dtype=None,
        initializer=None,
        regularizer=None,
        trainable=None,
        constraint=None,
        use_resource=None,
        synchronization=tf.VariableSynchronization.AUTO,
        aggregation=tf.VariableAggregation.NONE,
        **kwargs,
    ):

Note that first argument is the name of the weight variable. Passing “counter” as the name in this manner works fine in Keras 2.15.0, and we won’t get the ValueError: Cannot convert '('c', 'o', 'u', 'n', 't', 'e', 'r')' to a shape error.

But the Keras 3.1.1 implementation of add_weight expects shape as the first argument:

    def add_weight(
        self,
        shape=None,
        initializer=None,
        dtype=None,
        trainable=True,
        regularizer=None,
        constraint=None,
        name=None,
    ):

So when it goes on to check for a valid shape, the shape is set to “counter”, which is not valid, and it results in ValueError: Cannot convert '('c', 'o', 'u', 'n', 't', 'e', 'r')' to a shape.

The bug, therefore, is in TensorFlow Recommenders’ implementation of the tfrs.layers.factorized_top_k module.

rcauvin · April 26, 2024, 6:37pm

@Wei_Wei This bug is reported on Github as well: tfrs.metrics.FactorizedTopK · Issue #712 · tensorflow/recommenders · GitHub

The bug means that those of us who want to use the latest version of TensorFlow cannot do so with our TensorFlow Recommenders projects that use FactorizedTopK.

Can we expect a new release of TensorFlow Recommenders in the near future that fixes this bug?