Custom loss with external parameters in Keras Tuner

While my code runs without any problems with Keras Tuner and standard loss functions like ‘mse’ I am trying to figure out how to write a custom loss function that accept an external argument in addition to true and forecasted y to use inside Keras Tuner for LSTM model selection. I am looking for the easiest and less painful way and I didn’t find a working solution in old posts.

One approach I follewed is this one. Let’s say I have these variables

# external vector needed in custom loss function
ex_loss= np.logical_not(klines_backtest.loc[i_sel,['d']].to_numpy(dtype=np.float32)[:sample_start])
# create data sequences for x and vector to forecasy y
x_train, y_train = lstm_data_sequence(dataset[:sample_start,:-1], dataset[:sample_start,-1], lstm_sequence)
# concatenate external vector to y so y shape is Nx2
y_train = np.vstack((y_train, ex_loss[lstm_sequence:,0])).T

I have defined the following loss function

def bande_loss(y_true, y_pred):
    mse = K.square(y_pred - y_true[:,0])
    i_loss = K.equal(y_true[:,1], 1) and K.greater_equal(y_pred, y_true[:,0])
    i_loss = K.cast(~i_loss, 'float32')
    return K.mean(mse*i_loss)

Basically I tryied to avoid the loss function override passing the additional variable (of the same size of y_true) I need in the loss function inside y_train where I expext to have y_true and the corresponding external variable correctly sized for the batch.

The LSTM for model selection is

def lstm_model(hp):
    model = Sequential()
    model.add(InputLayer(input_shape=(48*3, 13)))
    num_layers = hp.Int('num_layers', min_value=4, max_value=8, step=2)
    num_units = hp.Choice('units', values=[50, 100, 250, 500])
    n_dropout = hp.Choice('n_dropout', values=[float(0), 0.10, 0.20])
    n_rec_dropout = hp.Choice('n_rec_dropout', values=[float(0), 0.10, 0.20])
    learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4, 1e-5, 1e-6])
    for i in range(num_layers):
        if i < num_layers - 1:
            r_sequence = True
        else:
            r_sequence = False
        model.add(LSTM(
            units=num_units,
            dropout=n_dropout,
            recurrent_dropout=n_rec_dropout,
            return_sequences=r_sequence))

    model.add(Dense(1))
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss=bande_loss,
        metrics=[bande_loss])
    return model

Executing this code

tuner = Hyperband(
    hypermodel=lstm_model,
    objective=Objective("bande_loss", direction="min"),
    max_epochs=50,
    hyperband_iterations=2,
    executions_per_trial=1,
    overwrite=True,
    project_name='hyperband_tuner')
stop_early = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=3, verbose=1)
tuner.search(x_train, y_train, epochs=30, validation_split=p_train, callbacks=[stop_early],
    shuffle=False, verbose=1)

I get this error

 The second input must be a scalar, but it has shape [32]
     [[{{node bande_loss/cond/switch_pred/_2736}}]] [Op:__inference_train_function_45266]

Function call stack:
train_function

Note that 32 is the (default) batch size.

Also running the same code with

def bande_loss(y_true, y_pred):
    mse = K.square(y_pred - y_true[:,0])
    return K.mean(mse)

seems to work fine while running with

def bande_loss(y_true, y_pred):
    mse = K.square(y_pred - y_true[:,1])
    return K.mean(mse)

gives me the same error and I cannot understand why.

I also tried the loss function override in this way

def lstm_model(hp):
    model = Sequential()
    model.add(InputLayer(input_shape=(48*3, 13)))
    num_layers = hp.Int('num_layers', min_value=4, max_value=8, step=2)
    num_units = hp.Choice('units', values=[50, 100, 250, 500])
    n_dropout = hp.Choice('n_dropout', values=[float(0), 0.10, 0.20])
    n_rec_dropout = hp.Choice('n_rec_dropout', values=[float(0), 0.10, 0.20])
    learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4, 1e-5, 1e-6])
    for i in range(num_layers):
        if i < num_layers - 1:
            r_sequence = True
        else:
            r_sequence = False
        model.add(LSTM(
            units=num_units,
            dropout=n_dropout,
            recurrent_dropout=n_rec_dropout,
            return_sequences=r_sequence))

    model.add(Dense(1))
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss=bande_loss(ex_loss),
        metrics=[bande_loss(ex_loss)])
    return model

def bande_loss(ex_loss):
    def loss(y_true, y_pred):
        mse = K.square(y_pred - y_true)
        i_loss = K.equal(ex_loss, True) and K.greater_equal(y_pred, y_true)
        i_loss = K.cast(~i_loss, 'float32')
        return K.mean(mse*i_loss)
    return loss

...

# external vector needed in custom loss function
ex_loss= np.logical_not(klines_backtest.loc[i_sel,['d']].to_numpy(dtype=np.float32)[:sample_start])
# create data sequences for x and vector to forecasy y
x_train, y_train = lstm_data_sequence(dataset[:sample_start,:-1], dataset[:sample_start,-1], lstm_sequence)
ex_loss = K.variable(ex_loss[lstm_sequence:], dtype=bool)

tuner = Hyperband(
    hypermodel=lstm_model,
    objective=Objective("bande_loss(ex_loss)", direction="min"),
    max_epochs=50,
    hyperband_iterations=2,
    executions_per_trial=1,
    overwrite=True,
    project_name='hyperband_tuner')
stop_early = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=3, verbose=1)
tuner.search(x_train, y_train, epochs=30, validation_split=p_train, callbacks=[stop_early],
    shuffle=False, verbose=1)

But I get this error

tensorflow.python.framework.errors_impl.InvalidArgumentError:  The second input must be a scalar, but it has shape [4176]
         [[{{node cond/switch_pred/_12}}]] [Op:__inference_train_function_34471]

Function call stack:
train_function

Can anyone provide me help or a simpler and effective way to implement custom loss functions with external parameters inside Keras Tuner?

1 Like