First time subclassing a model. Need somehelp

Hi guys, i am trying for the first time to use the subclassing method with tensorflow and i am struggling a bit. i was trying to modify the example provided on the tensorflow website. My aim is to change the training step in the future.

import numpy as np
import tensorflow_probability as tfp
import tensorflow as tf
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
tfk = tf.keras
tfkl = tf.keras.layers
tfd = tfp.distributions
tfpl = tfp.layers
def NLL(y, distr): 
      return -distr.log_prob(y)
def  bnn_model():

    inputs= Input(shape=(1,),name="input layer")
    hidden = Dense(50,activation="relu")(inputs)
    hidden = Dropout(0.1)(hidden, training=True)
    hidden = Dense(50,activation="relu")(hidden)
    hidden = Dropout(0.1)(hidden, training=True)
    params_mc = Dense(2,activation="relu")(hidden)
    modello = Model(inputs=inputs, outputs=params_mc)
    return modello
class MyFunModel(tf.keras.Model):

  def __init__(self,  **kwargs):
    super().__init__(**kwargs)

 
  def build(self,input_shape):
    def normal_sp(params): 
      return tfd.Normal(loc=params[:,0:1], scale=1e-3 + tf.math.softplus(0.05 * params[:,1:2]))
    self.mlp1 = bnn_model()
    self.prob=  tfp.layers.DistributionLambda(normal_sp, name='normal_sp')

 

  def call(self,x):
    two= self.mlp1(x)
    mu= self.prob(two)
    return mu 

  def train_step(self, data):
        # Unpack the data. Its structure depends on your model and
        # on what you pass to `fit()`.
        x, y = data
        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)  # Forward pass
            # Compute the loss value
            # (the loss function is configured in `compile()`)
            loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        # Update metrics (includes the metric that tracks the loss)
        self.compiled_metrics.update_state(y, y_pred)
        # Return a dict mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}

my_funmodel = MyFunModel(name="the_model")

but when i try to fit the model

optimizer = tf.optimizers.Adam(learning_rate=0.0002)

my_funmodel.build((-1,))

my_funmodel.compile(optimizer=optimizer,loss= NLL ) 
history_my_funmodel = my_funmodel.fit(X_train, y_train, epochs=2000, verbose=0, batch_size=batch_size,validation_data=(X_val,y_val) )
plt.plot(history_my_funmodel.history['loss'], label='loss', linewidth=3)
plt.plot(history_my_funmodel.history['val_loss'])
plt.legend()
plt.ylabel('Loss')
plt.xlabel('Epochs')
plt.ylim(top=100, bottom=0)
plt.show()

all i get is an empty plot and a fkload of NaN

what can i do to solve this issue? thanks

@P11 You do not need to compile and fit the model.

Simply call the train_step while looping:

for train_step in range(10):
  loss = my_funmodel.train_step([X_train, y_train])
  print(loss)

Update:

def build(self,input_shape):
  ...
  self.optimizer = tf.optimizers.Adam(learning_rate=0.0002)

# my_funmodel.compile(optimizer=optimizer,loss= NLL ) 
# history_my_funmodel = my_funmodel.fit(... )

Note:
You probably need to create your own self.metrics and self.compiled_metrics function.
Try to return loss only in the first step and comment the metrics lines, to see how the model behaves.