Hey, I have problem with dimensions numbers in candidates embeddings.
This is my Models classes:
class JobModel(tf.keras.Model):
def __init__(self):
super().__init__()
self.vectorize_layer = layers.TextVectorization(
standardize=custom_standardization,
output_sequence_length=5000
)
self.job_embedding = tf.keras.Sequential([
self.vectorize_layer,
layers.Embedding(len(self.vectorize_layer.get_vocabulary()), 256, mask_zero=True),
layers.Reshape((1, -1)),
layers.GlobalAveragePooling1D(),
])
def call(self, input):
return self.job_embedding(input['job'])
class FreelancerModel(tf.keras.Model):
def __init__(self):
super().__init__()
self.title_vectorize_layer = layers.TextVectorization(
standardize=custom_standardization,
output_sequence_length=5000
)
self.title_embedding = tf.keras.Sequential([
self.title_vectorize_layer,
layers.Embedding(len(self.title_vectorize_layer.get_vocabulary()), 64),
layers.Reshape((1, -1)),
layers.GlobalAveragePooling1D()
])
self.description_vectorize_layer = layers.TextVectorization(
standardize=custom_standardization,
output_sequence_length=5000
)
self.description_embedding = tf.keras.Sequential([
self.description_vectorize_layer,
layers.Embedding(len(self.description_vectorize_layer.get_vocabulary()), 64),
layers.Reshape((1, -1)),
layers.GlobalAveragePooling1D()
])
self.project_titles_vectorize_layer = layers.TextVectorization(
standardize=custom_standardization,
output_sequence_length=5000
)
self.project_titles_embedding = tf.keras.Sequential([
self.project_titles_vectorize_layer,
layers.Embedding(len(self.project_titles_vectorize_layer.get_vocabulary()), 64),
layers.Reshape((1, -1)),
layers.GlobalAveragePooling1D()
])
self.project_descriptions_vectorize_layer = layers.TextVectorization(
standardize=custom_standardization,
output_sequence_length=5000
)
self.project_descriptions_embedding = tf.keras.Sequential([
self.project_descriptions_vectorize_layer,
layers.Embedding(len(self.project_descriptions_vectorize_layer.get_vocabulary()), 64),
layers.Reshape((1, -1)),
layers.GlobalAveragePooling1D()
])
def call(self, inputs):
return tf.concat([
self.title_embedding(inputs['freelancer title']),
self.description_embedding(inputs['freelancer description']),
self.project_titles_embedding(inputs['project titles']),
self.project_descriptions_embedding(inputs['project descriptions'])
], axis=1)
class MatchingModel(tfrs.Model):
def __init__(self, job_model, freelancer_model):
super().__init__()
self.job_model: tf.keras.Model = job_model
self.freelancer_model: tf.keras.Model = freelancer_model
self.task: tf.keras.layers.Layer = task
def compute_loss(self, features, training=False):
job_data = {
'job': features['job']
}
job_embeddings = self.job_model(job_data)
freelancer_data = {
'freelancer title': features['freelancer title'],
'freelancer description': features['freelancer description'],
'project titles': features['project titles'],
'project descriptions': features['project descriptions']
}
freelancer_embeddings = self.freelancer_model(freelancer_data)
return self.task(job_embeddings, freelancer_embeddings)
train = dataset.take(388)
test = dataset.skip(97)
matching_model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))
matching_model.fit(train, epochs=10, validation_data=test)
Error:
ValueError: in user code:
File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1338, in train_function *
return step_function(self, iterator)
File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1322, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1303, in run_step **
outputs = model.train_step(data)
File "/usr/local/lib/python3.10/dist-packages/tensorflow_recommenders/models/base.py", line 68, in train_step
loss = self.compute_loss(inputs, training=True)
File "<ipython-input-148-0db967a323e0>", line 28, in compute_loss
return self.task(job_embeddings, freelancer_embeddings)
File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/tmp/__autograph_generated_file9dik1vz2.py", line 159, in tf__call
ag__.if_stmt(ag__.ld(compute_metrics), if_body_5, else_body_5, get_state_7, set_state_7, (), 0)
File "/tmp/__autograph_generated_file9dik1vz2.py", line 155, in if_body_5
ag__.for_stmt(ag__.ld(self)._factorized_metrics, None, loop_body_1, get_state_6, set_state_6, (), {'iterate_names': 'metric'})
File "/tmp/__autograph_generated_file9dik1vz2.py", line 154, in loop_body_1
ag__.converted_call(ag__.ld(update_ops).append, (ag__.converted_call(ag__.ld(metric).update_state, (ag__.ld(query_embeddings), ag__.ld(candidate_embeddings)[:ag__.converted_call(ag__.ld(tf).shape, (ag__.ld(query_embeddings),), None, fscope)[0]]), dict(true_candidate_ids=ag__.ld(candidate_ids)), fscope),), None, fscope)
File "/tmp/__autograph_generated_file0ljxhdke.py", line 50, in tf__update_state
(top_k_predictions, retrieved_ids) = ag__.converted_call(ag__.ld(self)._candidates, (ag__.ld(query_embeddings),), dict(k=ag__.converted_call(ag__.ld(max), (ag__.ld(self)._ks,), None, fscope)), fscope)
File "/tmp/__autograph_generated_file7veqd3f2.py", line 163, in tf__call
results = ag__.converted_call(ag__.converted_call(ag__.ld(candidates).map, (ag__.ld(top_scores),), dict(num_parallel_calls=ag__.ld(self)._num_parallel_calls), fscope).reduce, (ag__.ld(initial_state), ag__.ld(top_k)), None, fscope)
File "/tmp/__autograph_generated_file7veqd3f2.py", line 48, in top_scores
scores = ag__.converted_call(ag__.ld(self)._compute_score, (ag__.ld(queries), ag__.ld(candidate_batch)), None, fscope_1)
File "/usr/local/lib/python3.10/dist-packages/tensorflow_recommenders/layers/factorized_top_k.py", line 331, in _compute_score
return tf.matmul(queries, candidates, transpose_b=True)
ValueError: Exception encountered when calling layer 'retrieval_6' (type Retrieval).
in user code:
File "/usr/local/lib/python3.10/dist-packages/tensorflow_recommenders/tasks/retrieval.py", line 197, in call *
update_ops.append(
File "/usr/local/lib/python3.10/dist-packages/tensorflow_recommenders/metrics/factorized_top_k.py", line 136, in update_state *
top_k_predictions, retrieved_ids = self._candidates(
File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler **
raise e.with_traceback(filtered_tb) from None
File "/tmp/__autograph_generated_file7veqd3f2.py", line 163, in tf__call
results = ag__.converted_call(ag__.converted_call(ag__.ld(candidates).map, (ag__.ld(top_scores),), dict(num_parallel_calls=ag__.ld(self)._num_parallel_calls), fscope).reduce, (ag__.ld(initial_state), ag__.ld(top_k)), None, fscope)
File "/tmp/__autograph_generated_file7veqd3f2.py", line 48, in top_scores
scores = ag__.converted_call(ag__.ld(self)._compute_score, (ag__.ld(queries), ag__.ld(candidate_batch)), None, fscope_1)
File "/usr/local/lib/python3.10/dist-packages/tensorflow_recommenders/layers/factorized_top_k.py", line 331, in _compute_score
return tf.matmul(queries, candidates, transpose_b=True)
ValueError: Exception encountered when calling layer 'streaming_6' (type Streaming).
in user code:
File "/usr/local/lib/python3.10/dist-packages/tensorflow_recommenders/layers/factorized_top_k.py", line 422, in top_scores *
scores = self._compute_score(queries, candidate_batch)
File "/usr/local/lib/python3.10/dist-packages/tensorflow_recommenders/layers/factorized_top_k.py", line 331, in _compute_score **
return tf.matmul(queries, candidates, transpose_b=True)
ValueError: Dimensions must be equal, but are 256 and 1280000 for '{{node MatMul}} = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=true](MatMul/job_model_11/sequential_33/global_average_pooling1d_33/Mean, args_1)' with input shapes: [?,256], [?,1280000].
Call arguments received by layer 'streaming_6' (type Streaming):
• queries=tf.Tensor(shape=(None, 256), dtype=float32)
• k=100
Call arguments received by layer 'retrieval_6' (type Retrieval):
• query_embeddings=tf.Tensor(shape=(None, 256), dtype=float32)
• candidate_embeddings=tf.Tensor(shape=(None, 256), dtype=float32)
• sample_weight=None
• candidate_sampling_probability=None
• candidate_ids=None
• compute_metrics=True
• compute_batch_metrics=True