How can one apply `keras_tuner` to `xgboost`-based models?

Roboticist · July 18, 2021, 4:47pm

Is keras_tuner also able to tune the hyperparameters of xgboost-based models? In particular, I am trying to use keras_tuner to tune the hyperparameters of the blender (an XGBRegressor ) of a stacking regressor. The hyperparameters of its lower-level regressors are already found, so I am only interested in the optimal values for max_depth and learning_rate of the blender. Here is my procedure to do so:

import tensorflow as tf
from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
import xgboost
import keras_tuner as kt


housing = fetch_california_housing()

X_train_full, X_test, y_train_full, y_test = train_test_split(housing.data, housing.target, train_size=0.8, test_size=0.2)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, train_size=0.8, test_size=0.2)

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

def build_dnn_reg_opt():
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.InputLayer(input_shape=X_train.shape[1:]))
    model.add(tf.keras.layers.BatchNormalization(momentum=0.999))
    model.add(tf.keras.layers.Dense(42, tf.keras.activations.selu, kernel_initializer="lecun_normal"))
    model.add(tf.keras.layers.BatchNormalization(momentum=0.999))
    model.add(tf.keras.layers.Dense(42, tf.keras.activations.selu, kernel_initializer="lecun_normal"))
    model.add(tf.keras.layers.BatchNormalization(momentum=0.999))
    model.add(tf.keras.layers.Dense(1, kernel_initializer="lecun_normal"))
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.05)
    model.compile(loss="mae", optimizer=optimizer, metrics=["mse"])
    return model

dnn_reg_opt = build_dnn_reg_opt()

rnd_reg_opt = DecisionTreeRegressor(max_depth=8,
                            min_samples_leaf=32,
                            max_leaf_nodes=10)

rf_reg_opt = RandomForestRegressor(n_estimators=76, max_leaf_nodes=20)

def build_model_stack(hp):
    max_depth = hp.Int("max_depth", min_value=1, max_value=10, step=1)
    learning_rate = hp.Choice("learning_rate", values=[0.01,0.02,0.03,0.04,0.05,0.06,0.07])
    model = StackingRegressor(estimators=[("rnd_reg_opt", rnd_reg_opt), ("rf_reg_opt", rf_reg_opt), ("dnn_reg_opt", dnn_reg_opt)],
                              final_estimator=xgboost.XGBRegressor(max_depth=max_depth,
                                                                   learning_rate=learning_rate))
    return model

rnd_reg_opt.fit(X_train, y_train)


def exponential_decay(lr0, s):
    def exponential_decay_fn(epoch):
        return lr0 * 0.1 ** (epoch / s)
    return exponential_decay_fn

exponential_decay_fn = exponential_decay(lr0=0.01, s=20)

lr_scheduler_cb = tf.keras.callbacks.LearningRateScheduler(exponential_decay_fn)
early_stop_cb = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5)

dnn_reg_opt.fit(X_train, y_train,
            validation_data = (X_valid, y_valid),
            epochs=50,
            callbacks =[early_stop_cb, lr_scheduler_cb])


rf_reg_opt.fit(X_train, y_train)

tuner_BO = kt.BayesianOptimization(build_model_stack,
                                   objective=kt.Objective("val_loss", direction="min"),
                                   max_trials=10,
                                   seed=seed_value)

tuner_BO.search(X_train, y_train, epochs=50,
                  validation_data = (X_valid, y_valid),
                  callbacks =[early_stop_cb, lr_scheduler_cb])

best_hps_BO = tuner_BO.get_best_hyperparameters(num_trials=1)[0]

print("BO results:")
print("max_depth: {0}".format(best_hps_BO.get("max_depth")))
print("learning_rate: {0}".format(best_hps_BO.get("learning_rate")))

But, the following error is thrown:

RuntimeError: Model-building function did not return a valid Keras Model instance, found StackingRegressor(estimators=[('rnd_reg_opt',
                               DecisionTreeRegressor(max_depth=8,
                                                     max_leaf_nodes=10,
                                                     min_samples_leaf=32)),
                              ('rf_reg_opt',
                               RandomForestRegressor(max_leaf_nodes=20,
                                                     n_estimators=76)),
                              ('dnn_reg_opt',
                               <tensorflow.python.keras.engine.sequential.Sequential object at 0x0000012D308E0D30>)],
                  final_estimator=XGBRegressor(base_score=None, booster=None,
                                               col...
                                               importance_type='gain',
                                               interaction_constraints=None,
                                               learning_rate=0.01,
                                               max_delta_step=None, max_depth=1,
                                               min_child_weight=None,
                                               missing=nan,
                                               monotone_constraints=None,
                                               n_estimators=100, n_jobs=None,
                                               num_parallel_tree=None,
                                               random_state=None,
                                               reg_alpha=None, reg_lambda=None,
                                               scale_pos_weight=None,
                                               subsample=None, tree_method=None,
                                               validate_parameters=None,
                                               verbosity=None))

Can one kindly share a work-around for this issue?

lgusm · January 2, 2024, 11:11am

I don’t think Keras Tuner can to hyperparameter search for non Keras models (eg: xgboost)

The workaround I’d try is using TensorFlow Decision Forests instead: TensorFlow Decision Forests