Unable to save custom keras model after building with input shape

Hi!

I’m attempted to train, then save a custom keras model (TF v 2.7.0) with a custom tf.data.dataset on Apple M1 (Montery v 12.6). I’ve tried many variations of the custom model (all train without problems) but I can’t save the model after training- the model is built before training by invoking model.build(input_shape) but this doesn’t seem to help the problem. I’ve also found even if I pass in tensors (not the dataset) this doesn’t solve the problem either.

I’ve included a code snippet that should reproduce the issue, as well as the response to when this is run in my local environment.

import numpy as np                                                                                                                                      
import sys
import tensorflow as tf
  
import datapipe  as tfds
  
class CANet(tf.keras.Model):
    def __init__(
        self,
          **kwargs,
       ):
        super().__init__(**kwargs)
        self.dimensions = 256
        self.ratio = 4
        self.filters = 256
        self.do = 0.1
        self.num_classes = 6
        self.batch_size = 32
        self.num_heads = 1
   
    def build(self,input_shape):
   
        self.conv1 = tf.keras.layers.Conv1D(self.dimensions//2, 
                                                                         3,
                                                                        strides=2,
                                                                        padding='same', 
                                                                        activation=tf.nn.gelu,
                                                                        input_shape=(32,4901,1)
                                                                       )
        self.conv2 = tf.keras.layers.Conv1D(self.dimensions, 
                                                                         3,
                                                                         strides=2,
                                                                         padding='same', 
                                                                        activation=tf.nn.gelu,
                                                                       )
        self.conv3 = tf.keras.layers.Conv1D(self.dimensions, 
                                                                         3,
                                                                        strides=2,
                                                                        padding='same', 
                                                                       activation=tf.nn.gelu,
                                                                       )
        self.conv4 = tf.keras.layers.Conv1D(self.dimensions, 
                                                                         3,
                                                                         strides=2,
                                                                         padding='same', 
                                                                         activation=tf.nn.gelu,
                                                                    ) 
       self.gap1 = tf.keras.layers.GlobalAveragePooling1D(keepdims=False)
       self.dense4 = tf.keras.layers.Dense(units=self.dimensions, activation=tf.nn.gelu)
       self.dense5 = tf.keras.layers.Dense(units=self.num_classes)
  
    def get_config(self):
        config = super().get_config()
        config.update(
            {
                 "dimensions": self.dimensions,
                 "dropout": self.do,
                 "ratio": self.ratio,
                 "filters": self.dimensions,
                 "num_classes": self.filters,
                 "num_heads": self.num_heads,
            }
       )
        return config
          
    @classmethod
   def from_config(cls, config):
       return cls(**config)
           
    def _calculate_loss(self, inputs, test=False):         
        x,labels = inputs
        x = self.conv1(x['intensity'])
        #x = self.conv1(x)                                                                                                                              
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.gap1(x)
           
        x = self.dense4(x)
        x = self.dense4(x)
        logits = self.dense5(x)
        total_loss = self.compiled_loss(labels, logits)
        return total_loss, logits
           
    def train_step(self, inputs):
        with tf.GradientTape() as tape:
            total_loss, logits = self._calculate_loss(inputs)
            train_vars = [
                     self.conv1.trainable_variables,
                     self.conv2.trainable_variables,
                     self.conv3.trainable_variables,
                     self.conv4.trainable_variables,
                     self.dense4.trainable_variables,
                     self.dense5.trainable_variables,
             ]
           grads = tape.gradient(total_loss, train_vars)
           trainable_variable_list = []
               for (grad, var) in zip(grads, train_vars):
                   for g, v in zip(grad, var):
                       trainable_variable_list.append((g, v))
          self.optimizer.apply_gradients(trainable_variable_list)
           _, labels = inputs
          self.compiled_metrics.update_state(labels, logits)
          return {m.name: m.result() for m in self.metrics}
           
    def test_step(self, inputs):
        total_loss, logits = self._calculate_loss(inputs, test=True)
        _, labels = inputs
        self.compiled_metrics.update_state(labels, logits)
        return {m.name: m.result() for m in self.metrics}
           
    def call(self, inputs):
        x = self.conv1(x['intensity'])
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.gap1(x)
        x = self.dense4(x)
        logits = self.dense5(x)
        return logits, tf.squeeze(viz_weights)[..., 1:]

Here is the code used to build, train and save the model

tf.keras.backend.clear_session()
          
num_epochs = 1           
categories = [int(1), int(2), int(3), int(4), int(5), int(6) ]
num_classes = len(categories)                                                                                                                           
batch_size = 32          
pattern_len = 4901       
trainset = "/Users/vanessa/work/repos/ml_4_unit_cell/data_curation/grand_set/mixed"
ds = tfds.data_pipe(categories, data_dir=trainset, batch_size=batch_size, 
                                   length=pattern_len, num_epochs=num_epochs,) 
dataset = ds.get_tfdataset()

convnet = CANet()        
total_steps = int((16520 / batch_size) * num_epochs)
optimizer = tf.optimizers.Adam()
convnet.compile(         
                             optimizer=optimizer, 
                             loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
                             metrics=[            
                             tf.keras.metrics.CategoricalAccuracy(name="accuracy"),
                            tf.keras.metrics.TopKCategoricalAccuracy(5, name="top-5-accuracy"),
                            ],                   
                          )                        
ishape = (32, 4901, 1)   
print(f'building model with shape {ishape}')
convnet.build(input_shape=ishape)
print(f'TF version: {tf.__version__}')
diter = next(iter(dataset))
x = diter[0]['intensity']
y = diter[1]             
print(f'training data has shape: {x.shape.as_list()}')
print(f'labels have shape {y.shape.as_list()}')
                          
history = convnet.fit(dataset, epochs=num_epochs, steps_per_epoch=16520//batch_size,) 
                          
convnet.save("./model")

Here’s the output:

building model with shape (32, 4901, 1)
TF version: 2.7.0
training data has shape: [32, 4901, 1]
labels have shape [32, 6]
2022-12-19 11:23:00.809121: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
516/516 [==============================] - 82s 157ms/step - loss: 1.4685 - accuracy: 0.3593 - top-5-accuracy: 0.9740
WARNING:tensorflow:Skipping full serialization of Keras layer <__main__.CANet object at 0x154157310>, because it is not built.
Traceback (most recent call last):
  File "/Users/vanessa/work/repos/xtal_powder_pattern_ml/benchmark_data/simulated_data/grand_set/test_attns_short.py", line 175, in <module>
    convnet.save("./model")
  File "/Users/vanessa/miniforge3/envs/xtal_ml_stuff/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/Users/vanessa/miniforge3/envs/xtal_ml_stuff/lib/python3.9/site-packages/keras/saving/saving_utils.py", line 84, in raise_model_input_error
    raise ValueError(
ValueError: Model <__main__.CANet object at 0x154157310> cannot be saved because the input shapes have not been set. Usually, input shapes are automatically determined when calling `.fit()` or `.predict()`. To manually set the shapes, call `model.build(input_shape)

I’m confused why the line convnet.build(input_shape=input_shape) was not effective. I’ve also played with adding an input layer at various locations, but that just shifts the problem elsewhere.

Thanks in advance!

Aetna Medicare Payment

By using model.save to save the whole model and by using load_model to restore previously stored subclassed model. The following code snippets describe how to implement them.

class ThreeLayerMLP(keras.Model):

def init(self, name=None):
super(ThreeLayerMLP, self).init(name=name)
self.dense_1 = layers.Dense(64, activation=‘relu’, name=‘dense_1’)
self.dense_2 = layers.Dense(64, activation=‘relu’, name=‘dense_2’)
self.pred_layer = layers.Dense(10, name=‘predictions’)

def call(self, inputs):
x = self.dense_1(inputs)
x = self.dense_2(x)
return self.pred_layer(x)

def get_model():
return ThreeLayerMLP(name=‘3_layer_mlp’)

model = get_model()

Save the model

model.save(‘path_to_my_model’,save_format=‘tf’)

Recreate the exact same model purely from the file

new_model = keras.models.load_model(‘path_to_my_model’)