Floating point exception when creating a TensorFlow TimeDistributed layer

Hey folks, cross posting my question from here: keras - Floating point exception when creating a TensorFlow TimeDistributed layer - Stack Overflow

I have so far been unable to resolve this bug. I am running my code and getting floating point exception with no stack trace. I tried to debug and step through my code and it dies after going through the call function twice. Here is the full code:

import os
import sys
from typing import Counter
import tensorflow as tf
import numpy as np
from tensorflow.keras import models, layers, callbacks
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import mnist
from matplotlib import pyplot as plt

# Change this to the location of the database directories
DB_DIR = os.path.dirname(os.path.realpath(__file__))

# Import databases
sys.path.insert(1, DB_DIR)
from db_utils import get_imdb_dataset, get_speech_dataset, get_single_digit_dataset

def Secure_Voice_Channel(func):
    """Define Secure_Voice_Channel decorator."""
    def execute_func(*args, **kwargs):
        print('Established Secure Connection.')
        returned_value = func(*args, **kwargs)
        print("Ended Secure Connection.")

        return returned_value

    return execute_func

class generic_vns_function(tf.keras.Model):
    def __init__(self, input_shape, layers, layer_units): 
        self.convolutions = []

        # Dynamically create Convolutional layers and MaxPools
        for layer in range(len(layers)):
            self.convolutions.append(tf.keras.layers.Conv2D(layer, 3, padding="same", input_shape=input_shape, activation="relu"))

        # Add MaxPooling layer
        # Flatten 
        self.flatten = tf.keras.layers.Flatten()
        # Dense layer 
        self.dense1 = tf.keras.layers.Dense(1024, activation="relu")

    def call(self, input):
        x = input

        for layer in self.convolutions:
            x = layer(x)

        x = self.flatten(x)
        x = self.dense1(x)

        return x

def train_model(model, epochs, batch_size, X_train, y_train, X_test, y_test):
    """Generic Deep Learning Model training function."""
    cb = [callbacks.EarlyStopping(monitor='val_loss', patience=3)]
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs,
              batch_size=batch_size, verbose=1, callbacks=cb)
    scores = model.evaluate(X_test, y_test, verbose=2)

    print("Baseline Error: %.2f%%" % (100-scores[1]*100))

    return model

def choose_dataset(dataset_type):
    """Select dataset based on string variable."""
    if dataset_type == "nlp":
        return get_imdb_dataset(dir=DB_DIR)
    elif dataset_type == "computer_vision":
        (X_train, y_train), (X_test, y_test) = mnist.load_data()
    elif dataset_type == "speech_recognition":
        # (X_train, y_train), (X_test, y_test), (_, _) = get_speech_dataset()
        (X_train, y_train), (X_test, y_test), (_, _) = get_single_digit_dataset(0)        

        raise ValueError("Couldn't find dataset.")

    (X_train, X_test) = normalize_dataset(dataset_type, X_train, X_test)

    (X_train, y_train), (X_test, y_test) = reshape_dataset(X_train, y_train, X_test, y_test)

    return (X_train, y_train), (X_test, y_test)

def normalize_dataset(string, X_train, X_test):
    """Normalize speech recognition and computer vision datasets."""
    if string == "computer vision":
        X_train = X_train / 255
        X_test = X_test / 255
        mean = np.mean(X_train)
        std = np.std(X_train)
        X_train = (X_train-std)/mean
        X_test = (X_test-std)/mean

    return (X_train, X_test)

def reshape_dataset(X_train, y_train, X_test, y_test):
    """Reshape Computer Vision and Speech datasets."""

    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    return (X_train, y_train), (X_test, y_test)

def create_LSTM(cnn_model, input_shape, num_classes):
    input_layer = layers.Input(shape=input_shape)
    distributed_cnn = tf.keras.layers.TimeDistributed(cnn_model)(input_layer)

    x, state_a, state_b = layers.LSTM(distributed_cnn)
    output = layers.Dense(num_classes, activation="softmax")(x)

    model = models.Model(inputs=input_layer, outputs=output)

    opt = Adam()
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

    return model

def main():

    # Hyperparameters
    layers = [64, 32]
    layer_units = 1000
    epochs = 10
    batch_size = 200
    lr = 0.001
    filter = 64
    kernal_size = 3
    window = 3

    dataset = "speech_recognition"

    # Import Datasets
    (X_train, y_train), (X_test, y_test) = choose_dataset(dataset)

    num_classes = y_train.shape[1]

    # Reshape both the train and test dataset
    X_train = X_train.reshape(X_train.shape[0], window, int(X_train.shape[1]/window), X_train.shape[2], 1)
    X_test = X_test.reshape(X_test.shape[0], window, int(X_test.shape[1]/window), X_test.shape[2], 1)

    # Generate CNN model
    cnn_model = generic_vns_function(X_train.shape[1:], layers, layer_units)
    print("Created generic CNN model")

    # Create the LSTM CNN with time distributed layer
    model = create_LSTM(cnn_model, X_train.shape[1:], num_classes)
    print("Created LST model")

    trained_model = train_model(model, epochs, batch_size, X_train, y_train, X_test, y_test)

    # # Save model to h5 file
    trained_model.save(save_format, 'models/model_%s_a3.h5' % dataset)

    return None

The error occurs after looping through the call function twice while creating the distributed_cnn = tf.keras.layers.TimeDistributed(cnn_model)(input_layer) . Any ideas as to why? This is baffling to me.