Nan values for sparse categorical cross entropy loss using RNN

Hello, I am new to machine learning and have a question as to why the model fit function is producing a nan value for loss using the sparse categorical cross entropy loss function. the data is scaled so that there are no negative numbers in the dataset and contains no NAN values. The target values are binary 1 or zero and are stored as floats in a numpy arra. Thank you!

def add_indicators(kline_df):

BollingerBands(kline_df)

calculate_macd(kline_df)

calculate_rsi(kline_df)

calculate_ema(kline_df, 150)

calculate_ema(kline_df, 300)

calculate_ema(kline_df, 450)

calculate_ema(kline_df, 600)

calculate_stochastic_oscillator(kline_df)

kline_df = kline_df.dropna()

return kline_df

def target(df, future_window=10, greater=1):

Shift the ‘Close’ column by the future_window value

df[‘Future’] = df[‘Close’].shift(-future_window)

Define the classify function

def classify(current, future):

if current < future * greater:

  return 1

else:

  return 0

Create a new column ‘Target’ using the classify function and the ‘Close’ and ‘Future’ columns

df[‘Target’] = df.apply(lambda x: classify(x[‘Close’], x[‘Future’]), axis=1)

Drop the ‘Future’ column

df = df.drop(columns=[‘Future’], axis=1)

return df

def scale_dataset(data):

minimum = np.min(data)

return data - minimum

def preprocess(df, seq_len=60):

skip_cols = [‘Target’] #, ‘RSI’, ‘MACD’, ‘MACD Signal’, ‘MACD Histogram’, ‘stochastic_oscillator_k’, ‘stochastic_oscillator_d’

for col in df.columns: # go through all of the columns

if col not in skip_cols:  # normalize all ... except for the target

    df[col] = df[col].pct_change()  # pct change "normalizes" 

    df.dropna(inplace=True)  # remove the nas created by pct_change

    df[col] = scale_dataset(df[col].values)

df.fillna(0, inplace=True)

sequential_data = []

prev_periods = deque(maxlen=seq_len)

for i in df.values:

prev_periods.append([n for n in i[:-1]])

if len(prev_periods) == seq_len:

  sequential_data.append([np.array(prev_periods) , i[-1]])

random.shuffle(sequential_data)

buys = []

sells = []

for seq, target in sequential_data:

if target == 0:

  sells.append([seq, target])

else:

  buys.append([seq, target])

random.shuffle(buys)

random.shuffle(sells)

lower = min(len(buys), len(sells))

buys = buys[:lower]

sells = sells[:lower]

sequential_data = buys + sells

random.shuffle(sequential_data)

X = []

y= []

for seq, target in sequential_data:

X.append(seq)

y.append(target)

return np.array(X), y

def create_model(train_x):

Define the network architecture

model = tf.keras.Sequential()

model.add(tf.keras.layers.LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))

model.add(tf.keras.layers.Dropout(0.2))

model.add(tf.keras.layers.BatchNormalization())

model.add(tf.keras.layers.LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))

model.add(tf.keras.layers.Dropout(0.2))

model.add(tf.keras.layers.BatchNormalization())

model.add(tf.keras.layers.LSTM(128, input_shape=(train_x.shape[1:])))

model.add(tf.keras.layers.Dropout(0.2))

model.add(tf.keras.layers.BatchNormalization())

model.add(tf.keras.layers.Dense(32, activation=‘relu’))

model.add(tf.keras.layers.Dropout(0.2))

model.add(tf.keras.layers.Dense(2, activation=‘softmax’))

Compile the model

model.compile(loss=‘sparse_categorical_crossentropy’, optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), metrics=[‘accuracy’])

return model

def train_model(train_x, train_y, test_x, test_y, batch_size=128, epochs=10, seq_length=60):

train_x = np.asarray(train_x)

train_y = np.asarray(train_y)

test_x = np.asarray(test_x)

test_y = np.asarray(test_y)

name = f’{seq_length}-{batch_size}-BTC-Indicator-{datetime.now()}’

tensorboard = TensorBoard(log_dir=f’/content/drive/MyDrive/ML Models/Logs/{name}’)

filepath = “RNN_Final-{epoch:02d}-{val_acc:.3f}”

#checkpoint = ModelCheckpoint(’/content/drive/MyDrive/ML Models/Models/{}.model’.format(filepath, monitor=‘val_acc’, verbose=1, save_best_only=True, mode=‘max’))

model = create_model(train_x)

history = model.fit(train_x, train_y, batch_size=batch_size, epochs=epochs, validation_data=(test_x, test_y), callbacks=[tensorboard])

return model

def run_all(data, seq_len=60, future_window=10, batch_size=128, epochs=10, greater_v=1):

data_indic = add_indicators(data)

data_indic = target(data_indic, future_window=future_window, greater=greater_v)



train_size = int(0.9 * len(data_indic))

train_df = data_indic.iloc[:train_size]

test_df = data_indic.iloc[train_size:]



train_x, train_y = preprocess(train_df, seq_len=seq_len)

test_x, test_y = preprocess(test_df, seq_len=seq_len)

model = create_model(train_x)



model = train_model(train_x, train_y, test_x, test_y, batch_size=batch_size, epochs=epochs, seq_length=seq_len)

kline_indic = kline_indic.iloc[int(0.6 * len(kline_indic)):]

run_all(kline_indic)

Hi @harmonic_vega

Welcome to the TensorFlow Forum!

Could you please share the code in understandable format because it’s difficult to find the issue with the given code? Also please share the dataset type and or the dataset(if it is shareable) to replicate and fix the error. Thank you.