Loss = nan accuracy = 0 when model.fit()

Hello everyone,

I’ve had an error for 1 week that I can’t solve. My problem is that when I train my model I get this:

Epoch 1/5
426/426 [==============================] - 8s 12ms/step - loss: nan - accuracy: 0.0000e+00 - val_loss: nan - val_accuracy: 0.0000e+00
Epoch 2/5
426/426 [==============================] - 4s 9ms/step - loss: nan - accuracy: 0.0000e+00 - val_loss: nan - val_accuracy: 0.0000e+00
Epoch 3/5
426/426 [==============================] - 3s 7ms/step - loss: nan - accuracy: 0.0000e+00 - val_loss: nan - val_accuracy: 0.0000e+00
Epoch 4/5
426/426 [==============================] - 2s 6ms/step - loss: nan - accuracy: 0.0000e+00 - val_loss: nan - val_accuracy: 0.0000e+00
Epoch 5/5
426/426 [==============================] - 2s 6ms/step - loss: nan - accuracy: 0.0000e+00 - val_loss: nan - val_accuracy: 0.0000e+00

My images are of size (64,32,1) and I have to predict 17 classes. My images contain no nan and I’ve used a minmax to normalize my images (because they take values between 0 and 10000). I’ve also tried changing the learning rate but nothing changes. I’m in a bit of a bind. Do you know what I should do? Is the problem with the data, my normalization or the model itself (my images aren’t RGB, they’re just 1-dimensional)?

Strangely enough, my model.fit() works when I output a layer higher than 17 neurons. I no longer have this error, but it’s not what I want because I want to predict for 17 classes.

I’d be very grateful if you could help me. You can find my code below:

reference_df = pd.read_csv('/gdrive/MyDrive/HDA_project/reference.csv',names=['id_image','label_subject','label_posture'])
reference_df = reference_df.drop(index=reference_df.index[0], axis=0) #drop first column
reference_df = reference_df.set_index('id_image') #set index
reference_df.index.name = None #remove the name of the index
reference_df['label_subject'] = reference_df['label_subject'].astype(int)
reference_df['label_posture'] = reference_df['label_posture'].astype(int)

# 72 % train, 8% validation, 20% test
train_val_reference_df, test_reference_df = train_test_split(reference_df, train_size=0.8, stratify=reference_df['label_posture'], random_state=123)
train_reference_df, val_reference_df = train_test_split(train_val_reference_df, train_size=0.9, random_state=123) 

def load_data(file_name, data_dir):
  if isinstance(data_dir, bytes):
    data_dir = data_dir.decode()
  if isinstance(file_name, bytes):
    file_name = file_name.decode()

  # Load the image from the .txt file
  file_mat = '/gdrive/MyDrive/HDA_project/' + data_dir + '/' + file_name + '.txt'
  data = np.loadtxt(file_mat)
  return data

def normalize_data(data):
    min_val = np.min(data)
    max_val = np.max(data)
    data = (data - min_val) / (max_val - min_val)    
    return data

def load_and_preprocess_data(file_name, data_dir):
    # Load data
    data = load_data(file_name, data_dir)

    # Normalize
    data = normalize_data(data)

    return data.astype(np.float32)

def create_dataset(reference_df, batch_size, shuffle, cache_file=None):
  # Convert dataframe to lists
  file_names = list(reference_df.index)
  labels = reference_df['label_posture']

  # Create a Dataset object
  dataset = tf.data.Dataset.from_tensor_slices((file_names, labels))

  # Map the load_and_preprocess_data function
  py_func = lambda file_name, label: (tf.numpy_function(load_and_preprocess_data, [file_name, data_dir],
                                                             tf.float32), label)
  dataset = dataset.map(py_func, num_parallel_calls=os.cpu_count())

  # Cache dataset
  if cache_file:
    dataset = dataset.cache(cache_file)

  # Shuffle
  if shuffle:
    dataset = dataset.shuffle(len(file_names))

  # Repeat the dataset indefinitely
  dataset = dataset.repeat()

  # Correct input shape for the network
  dataset = dataset.map(lambda data, label: (tf.reshape(data, [64,32,1]), label))

  # Batch
  dataset = dataset.batch(batch_size=batch_size)

  # Prefetch
  dataset = dataset.prefetch(buffer_size=1)

  return dataset

batch_size = 32
train_dataset = create_dataset(train_reference_df,
                               batch_size=batch_size,
                               shuffle=True,
                               cache_file='train_cache')

val_dataset = create_dataset(val_reference_df,
                             batch_size=batch_size,
                             shuffle=False,
                             cache_file='val_cache')

test_dataset = create_dataset(test_reference_df,
                             batch_size=batch_size,
                             shuffle=False,
                             cache_file='test_cache')

train_steps = int(np.ceil(len(train_reference_df)/batch_size))
val_steps = int(np.ceil(len(val_reference_df)/batch_size))
test_steps = int(np.ceil(len(test_reference_df)/batch_size))

# FUNCTION: PostureModel

def PostureModel(input_shape):
    """
    Implementation of the PostureModel

    Arguments:
    input_shape -- shape of the images of the dataset

    Returns:
    model -- a Model() instance in TensorFlow
    """

    # Input placeholder as a tensor with shape input_shape
    X_input = tf.keras.Input(input_shape)

    # CONV -> Batch Normalization -> ReLU Block -> MAXPOOL applied to X
    X = tf.keras.layers.Conv2D(16, (3, 3), strides=(1,1), padding='same', activation=None)(X_input)
    X = tf.keras.layers.BatchNormalization(axis=-1)(X)
    X = tf.keras.layers.Activation('relu')(X)
    X = tf.keras.layers.MaxPool2D((2, 2), strides=(2, 2), padding='same')(X)

    # CONV -> Batch Normalization -> ReLU Block -> MAXPOOL applied to X
    X = tf.keras.layers.Conv2D(32, (3, 3), strides=(1,1), padding='same', activation=None)(X)
    X = tf.keras.layers.BatchNormalization(axis=-1)(X)
    X = tf.keras.layers.Activation('relu')(X)
    X = tf.keras.layers.MaxPool2D((2, 2), strides=(2, 2), padding='same')(X)

    # FLATTEN THE TENSOR
    X = tf.keras.layers.Flatten()(X)

    # 2 FULLYCONNECTED (DENSE) LAYERS WITH RELU ACTIVATION AND 256 OUTPUT NEURONS
    X = tf.keras.layers.Dense(256, activation='relu')(X)

    X = tf.keras.layers.Dense(256, activation='relu')(X)

    # DROPOUT LAYER (DISCARD PROBABILITY 0.4)
    X = tf.keras.layers.Dropout(0.4)(X)

    # DENSE LAYER WITHOUT ACTIVATION AND 17 OUTPUT NEURONS
    X = tf.keras.layers.Dense(17, activation='softmax')(X)

    model = tf.keras.Model(inputs = X_input, outputs = X, name='PostureModel')

    return model

# Create and compile the network model

input_shape = (64, 32, 1)
network_model = PostureModel(input_shape)
network_model.compile(optimizer = 'adam', loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])

# Train the model
num_epochs = 5
early_stop_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

# Fit the model
history = network_model.fit(train_dataset,
                            epochs=num_epochs,
                            steps_per_epoch=train_steps,
                            validation_data=val_dataset,
                            validation_steps=val_steps,
                            callbacks=[early_stop_callback])

# Save the model
network_model.save('my_model.h5')

Hi @VeTr0x,As you said you have an image dataset, the image pixel values range in between 0 to 255. It cannot be between 0 and 10000. For normalizing the image we will divide the pixel value by 255. Also, as you are using sparse_categorical_crossentropy did you convert your labels from int to category. If not could you please try to convert your labels to categorical using the to_categorical method and pass those categorical values during training. Thank You.

I have found the error it was because my label vector started from 1 to 17. I modified it from 0-16 and it works now. Thank you !