Floating point exception when creating a TensorFlow TimeDistributed layer

Logan_Kilpatrick · October 25, 2021, 2:26am

Hey folks, cross posting my question from here: keras - Floating point exception when creating a TensorFlow TimeDistributed layer - Stack Overflow

I have so far been unable to resolve this bug. I am running my code and getting floating point exception with no stack trace. I tried to debug and step through my code and it dies after going through the call function twice. Here is the full code:

import os
import sys
from typing import Counter
import tensorflow as tf
import numpy as np
from tensorflow.keras import models, layers, callbacks
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import mnist
from matplotlib import pyplot as plt

# Change this to the location of the database directories
DB_DIR = os.path.dirname(os.path.realpath(__file__))

# Import databases
sys.path.insert(1, DB_DIR)
from db_utils import get_imdb_dataset, get_speech_dataset, get_single_digit_dataset

def Secure_Voice_Channel(func):
    """Define Secure_Voice_Channel decorator."""
    def execute_func(*args, **kwargs):
        print('Established Secure Connection.')
        returned_value = func(*args, **kwargs)
        print("Ended Secure Connection.")

        return returned_value

    return execute_func

@Secure_Voice_Channel
class generic_vns_function(tf.keras.Model):
    def __init__(self, input_shape, layers, layer_units): 
        super().__init__() 
        self.convolutions = []

        # Dynamically create Convolutional layers and MaxPools
        for layer in range(len(layers)):
            self.convolutions.append(tf.keras.layers.Conv2D(layer, 3, padding="same", input_shape=input_shape, activation="relu"))

        # Add MaxPooling layer
        self.convolutions.append(tf.keras.layers.MaxPooling2D((2,2)))
        
        # Flatten 
        self.flatten = tf.keras.layers.Flatten()
        
        # Dense layer 
        self.dense1 = tf.keras.layers.Dense(1024, activation="relu")

        
    def call(self, input):
        x = input

        for layer in self.convolutions:
            x = layer(x)

        x = self.flatten(x)
        x = self.dense1(x)

        return x

def train_model(model, epochs, batch_size, X_train, y_train, X_test, y_test):
    """Generic Deep Learning Model training function."""
    cb = [callbacks.EarlyStopping(monitor='val_loss', patience=3)]
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs,
              batch_size=batch_size, verbose=1, callbacks=cb)
    scores = model.evaluate(X_test, y_test, verbose=2)

    print("Baseline Error: %.2f%%" % (100-scores[1]*100))

    return model

def choose_dataset(dataset_type):
    """Select dataset based on string variable."""
    if dataset_type == "nlp":
        return get_imdb_dataset(dir=DB_DIR)
    elif dataset_type == "computer_vision":
        (X_train, y_train), (X_test, y_test) = mnist.load_data()
    elif dataset_type == "speech_recognition":
        # (X_train, y_train), (X_test, y_test), (_, _) = get_speech_dataset()
        (X_train, y_train), (X_test, y_test), (_, _) = get_single_digit_dataset(0)        

    else:
        raise ValueError("Couldn't find dataset.")

    (X_train, X_test) = normalize_dataset(dataset_type, X_train, X_test)

    (X_train, y_train), (X_test, y_test) = reshape_dataset(X_train, y_train, X_test, y_test)

    return (X_train, y_train), (X_test, y_test)

def normalize_dataset(string, X_train, X_test):
    """Normalize speech recognition and computer vision datasets."""
    if string == "computer vision":
        X_train = X_train / 255
        X_test = X_test / 255
    else:
        mean = np.mean(X_train)
        std = np.std(X_train)
        X_train = (X_train-std)/mean
        X_test = (X_test-std)/mean

    return (X_train, X_test)

def reshape_dataset(X_train, y_train, X_test, y_test):
    """Reshape Computer Vision and Speech datasets."""

    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    return (X_train, y_train), (X_test, y_test)

def create_LSTM(cnn_model, input_shape, num_classes):
    input_layer = layers.Input(shape=input_shape)
    distributed_cnn = tf.keras.layers.TimeDistributed(cnn_model)(input_layer)

    x, state_a, state_b = layers.LSTM(distributed_cnn)
    output = layers.Dense(num_classes, activation="softmax")(x)

    model = models.Model(inputs=input_layer, outputs=output)

    opt = Adam()
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

    return model

def main():

    # Hyperparameters
    layers = [64, 32]
    layer_units = 1000
    epochs = 10
    batch_size = 200
    lr = 0.001
    filter = 64
    kernal_size = 3
    window = 3

    dataset = "speech_recognition"

    # Import Datasets
    (X_train, y_train), (X_test, y_test) = choose_dataset(dataset)

    num_classes = y_train.shape[1]

    # Reshape both the train and test dataset
    X_train = X_train.reshape(X_train.shape[0], window, int(X_train.shape[1]/window), X_train.shape[2], 1)
    X_test = X_test.reshape(X_test.shape[0], window, int(X_test.shape[1]/window), X_test.shape[2], 1)

    # Generate CNN model
    cnn_model = generic_vns_function(X_train.shape[1:], layers, layer_units)
    print("Created generic CNN model")

    # Create the LSTM CNN with time distributed layer
    model = create_LSTM(cnn_model, X_train.shape[1:], num_classes)
    print("Created LST model")

    trained_model = train_model(model, epochs, batch_size, X_train, y_train, X_test, y_test)

    save_format="tf"
    # # Save model to h5 file
    trained_model.save(save_format, 'models/model_%s_a3.h5' % dataset)

    return None

The error occurs after looping through the call function twice while creating the distributed_cnn = tf.keras.layers.TimeDistributed(cnn_model)(input_layer) . Any ideas as to why? This is baffling to me.

Renu_Patel · January 3, 2024, 6:47am

Hi @Logan_Kilpatrick

Welcome to the TensorFlow Forum!

Please provide some more details on the issue because I did not find any error when I tried replicating the code in Google Colab using TensorFlow 2.15. Attached the replicated gist here for your reference.

Please try again executing the above code with the latest TensorFlow version and let us know if the issue still persists with some more details like which systemOS, TensorFlow version and platform details(Jupyter, VS code, Pycharm etc.) you are using. Thank you.