Using Keras layers, how to add weights of subclass layer into the weights of the original class?

The code for the subclassing method is given below. I created class GTN inherited from layers.Layer and defined parameters to be trained as,

  1. self.layers and 2. self.weight.

On looking further you will get to know that self.layers has got 2 parameters of itself self.weight and self.bias (defined in class GTconv)

That makes a total of parameters:

  1. self.weight (defined in GTN)
  2. 2xself.weights and 2xself.bias for each GTLayer (defined in GTLayer and GTconv) , and there are 4 layers

Therefore, total parameters : 1 + 4x4 = 17 parameters. But I am not able to find the parameters associated with self.layers in the model.trainable_weights list , wile calculating the grads and optimising the model. Kindly help me with this.

Code:

class GTN(layers.Layer): # layers.Layer keeps track of everything under the hood!

def __init__(self, num_edge, num_channels, w_in, w_out, num_class,num_layers,norm):
    super(GTN, self).__init__()
    self.num_channels = num_channels
    self.w_in = w_in
    self.w_out = w_out
    self.num_class = num_class
    self.num_layers = num_layers # 3 layers
    
    self.layers = []

    for i in tf.range(num_layers):
        if i == 0:
            self.layers.append(GTLayer(num_edge, num_channels, first=True))
        else:
            self.layers.append(GTLayer(num_edge, num_channels, first=False))
    
    w_init = tf.random_normal_initializer()
    self.weight = tf.Variable(initial_value= w_init(shape=(w_in, w_out)),trainable=True)

class GTLayer(keras.layers.Layer):

def __init__(self, in_channels, out_channels):
    super(GTLayer, self).__init__()
    self.in_channels = in_channels
    self.out_channels = out_channels

    self.conv1 = GTConv(in_channels, out_channels)
    self.conv2 = GTConv(in_channels, out_channels)
   
def call(self, A, H_=None):
    if self.first == True:
        
        a = self.conv1(A)
        b = self.conv2(A)
        H = tf.matmul( a, b)
        W = [tf.stop_gradient(tf.nn.softmax(self.conv1.weight, axis=1))]
    return H,W

class GTConv(keras.layers.Layer):

def __init__(self, in_channels, out_channels):
    super(GTConv, self).__init__()
  
    w_init = tf.random_normal_initializer()
    self.weight = tf.Variable(
        initial_value=w_init(shape=(out_channels,in_channels,1,1)),
        trainable=True)
    self.bias = None
    self.scale = tf.Variable([0.1] , trainable=False)
        
def call(self, A):
    A = tf.reduce_sum(A*(tf.nn.softmax(self.weight,1)), 1)
    return A 

Training:

final_f1 = 0

for l in tf.range(1):
    
    model = GTN(num_edge=A.shape[-1],
                        num_channels=num_channels,
                        w_in = node_features.shape[1],
                        w_out = node_dim,
                        num_class=num_classes,
                        num_layers=num_layers)                        
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.005)

    for i in range(epochs):

        with tf.GradientTape() as tape:
            
            loss,y_train,Ws = model(A, node_features, train_node, train_target)
            train_f1 = tf.reduce_mean(f1_score(tf.math.argmax(y_train, 1), train_target, num_classes=num_classes)).cpu() 
            print('Train - Loss: {}, Macro_F1: {}'.format(loss.cpu().numpy(), train_f1))

        grads = tape.gradient(loss, model.trainable_weights) 
        optimizer.apply_gradients(zip(grads, model.trainable_weights))