WGAN-GP implementation for stock market forecasting using tfjs

Hey there, I am trying to implement a WGAN model with gradient penalty based on this paper . I have managed to convert the python code to tfjs and got the training to work.

The issue I am facing is that both the discriminator and generator loss increases exponentially and are absurd.

Also I have tested the same architecture in python with every hyperparameter being the same and everything is fine.

Here is my code

Generator Model :

const generator = ({ input_dimension, output_dimension, feature_size, weight_initializers }) => {
    const model = tf.sequential();

    model.add(tf.layers.conv1d({
        filters: 32,
        kernelSize: 2,
        strides: 1,
        padding: 'same',
        kernelInitializer: weight_initializers,
        batchInputShape: [null, input_dimension, feature_size]
    }))

    model.add(tf.layers.leakyReLU({ alpha: 0.1 }))

    model.add(tf.layers.bidirectional({
        layer: tf.layers.lstm({
            units: 64,
            activation: 'relu',
            kernelInitializer: weight_initializers,
            returnSequences: false,
            dropout: 0.3,
            recurrentDropout: 0.0
        })
    }))

    model.add(tf.layers.dense({ units: 64, activation: 'linear' }))
    model.add(tf.layers.leakyReLU({ alpha: 0.1 }))
    model.add(tf.layers.dropout({ rate: 0.2 }))

    model.add(tf.layers.dense({ units: 32, activation: 'linear' }))
    model.add(tf.layers.leakyReLU({ alpha: 0.1 }))
    model.add(tf.layers.dropout({ rate: 0.2 }))

    model.add(tf.layers.dense({ units: output_dimension }))

    return model
}

Discriminator Model :

const discriminator = ({ timeStep, lookAhead, weight_initializers }) => {
    const model = tf.sequential();

    model.add(tf.layers.conv1d({
        filters: 32,
        kernelSize: 2,
        strides: 1,
        padding: 'same',
        kernelInitializer: weight_initializers,
        inputShape: [timeStep + lookAhead, 1]
    }))

    model.add(tf.layers.leakyReLU({ alpha: 0.1 }))

    model.add(tf.layers.conv1d({
        filters: 64,
        kernelSize: 2,
        strides: 1,
        padding: 'same',
        kernelInitializer: weight_initializers,
    }))

    model.add(tf.layers.leakyReLU({ alpha: 0.1 }))

    model.add(tf.layers.flatten())

    model.add(tf.layers.dense({ units: 64, activation: 'linear', useBias: true }))
    model.add(tf.layers.leakyReLU({ alpha: 0.1 }))
    model.add(tf.layers.dropout({ rate: 0.2 }))

    model.add(tf.layers.dense({ units: 32, activation: 'linear', useBias: true }))
    model.add(tf.layers.leakyReLU({ alpha: 0.1 }))
    model.add(tf.layers.dropout({ rate: 0.2 }))

    model.add(tf.layers.dense({ units: 1, activation: 'linear' }))

    return model
}

Main train function :

// time_step = 14
// look_ahead = 5
// feature_size = 5
async function train(XTrain, yTrain, pastY, epochs, time_step, look_ahead, feature_size, batchSize) {
    // Define the optimizer for both discriminator and generator
    const dOptimizer = tf.train.adam(0.0004, 0.5, 0.9)
    const gOptimizer = tf.train.adam(0.0001, 0.5, 0.9);
    const weight_initializers = tf.initializers.randomNormal({ mean: 0.0, stddev: 0.02 });
    const generator_ = generator({ 
          input_dimension: time_step, 
          output_dimension: look_ahead, 
          feature_size, 
          weight_initializers 
       })

    const discriminator_ = discriminator({ 
         timeStep: time_step, 
         lookAhead: look_ahead, 
         weight_initializers
      })

    const trainHist = {
        losses: [],
        D_losses: [],
        G_losses: [],
        per_epoch_times: [],
        total_ptime: []
    };
    let Real_price
    let Generated_price
    let preds = []

    const data = [tf.tensor(XTrain), tf.tensor(yTrain), tf.tensor(pastY)];
    for (let epoch = 0; epoch < epochs; epoch++) {
        log.error(`Epoch ${epoch + 1} of ${epochs}`);
        const {
            yTrainTensor,
            generatorData,
            discriminatorLoss,
            generatorLoss
        } = await trainStep(data, time_step, look_ahead, dOptimizer, gOptimizer, generator_, discriminator_);
    }
}

Here is my trainStep function

const generateNoise = (generator_, pastYTrainTensor, xTrain_data) => {
    const generator_data = generator_.apply(xTrain_data, { training: true });
    const generator_data_reshape = generator_data.reshape([generator_data.shape[0], generator_data.shape[1], 1]);
    return tf.cast(pastYTrainTensor.concat(generator_data_reshape, 1), 'float32');
}

 // Calculate gradient penalty 
const gradientPenalty = (batchSize, time_step, look_ahead, discriminator_, fakeData, realData) => tf.tidy(() => {
    const alpha = tf.randomNormal([batchSize, time_step + look_ahead, 1], 0.0, 1.0, 'float32');
    const diff = fakeData.sub(realData);
    const interpolated = realData.add(alpha.mul(diff));

    const gradientsFn = tf.grad(x => discriminator_.apply(x, { training: true }));
    const grad_fo_calc = tf.tensor(gradientsFn(interpolated).arraySync())

    const gradientsNorm = grad_fo_calc.square().sum([1, 2]).sqrt();
    const gp = gradientsNorm.sub(tf.scalar(1)).square().mean();

    return tf.cast(gp, 'float32')
})

async function trainStep(data, time_step, look_ahead, dOptimizer, gOptimizer, generator_, discriminator_) {
    const [xTrainTensor, yTrainTensor, pastYTrainTensor] = data;
    // xTrainTensorshape (82, 14, 1)
    // yTrainTensors shape (82, 5)
    // pastYTrainTensor shape (82, 14, 1)
    const batchSize = xTrainTensor.shape[0];
    let dLossValue = 0;
    let gLossValue = 0;
    let generatorData;
    const LAMBDA = tf.tensor(10.0, [1], 'float32') // Gradient penalty lambda hyperparameter
    const lambda1 = 0.5; // Extra loss term for speeding up training
    const lambda2 = 0.5; // Extra loss term for speeding up training

    try {
        // Process real data
        const realYReshape = yTrainTensor.reshape([yTrainTensor.shape[0], yTrainTensor.shape[1], 1]);
        const realOutput = tf.cast(pastYTrainTensor, 'float32').concat(tf.cast(realYReshape, 'float32'), 1);

        // Train the discriminator
        for (let i = 0; i < 5; i++) {
            // Calculate discriminator loss, compute gradients of the loss with respect to discriminator's inputs
            const { value: d_value, grads: d_grads } = dOptimizer.computeGradients(() => tf.tidy(() => {
                // Generate fake data
                const generatorData = generateNoise(generator_, pastYTrainTensor, xTrainTensor)

                // Get predictions from discriminator
                const DReal = discriminator_.apply(realOutput, { training: true }) // shape [batchSize, 1]
                const DFake = discriminator_.apply(generatorData, { training: true }) // shape [batchSize, 1]
                // console.log('D : ', DReal.arraySync()[0][0], DFake.arraySync()[0][0])

                // Wasserstein Loss - If this value is 0 that means 
                // both the distributions are same and discriminator is 
                // guessing 50% of the time
                const dCost = (tf.cast(DReal, 'float32').mean().sub(tf.cast(DFake, 'float32').mean())).mul(-1);

                // Calculate gradient penalty 
                const gp = gradientPenalty(batchSize, time_step, look_ahead, discriminator_, generatorData, realOutput)
                return DFake.mean().sub(DReal.mean()).add(gp.mul(LAMBDA)).asScalar();
            }), discriminator_.getWeights());

            dLossValue = d_value.dataSync();
            dOptimizer.applyGradients(d_grads)
        }

        console.log('<----------------------------------------->')

        // Train the generator only once
        // Compute gradients of the loss with respect to generator's inputs
        const { value: g_value, grads: g_grads } = gOptimizer.computeGradients(() => tf.tidy(() => {
            // Generate fake output
            generatorData = generateNoise(generator_, pastYTrainTensor, xTrainTensor)
            tf.keep(generatorData)
            const realYReshape = yTrainTensor.reshape([yTrainTensor.shape[0], yTrainTensor.shape[1], 1]);
            const realOutput = tf.cast(pastYTrainTensor, 'float32').concat(tf.cast(realYReshape, 'float32'), 1);

            // Get the discriminator logits for fake data
            const GGenerated = discriminator_.apply(generatorData, { training: true });

            // Calculate the generator loss
            const g_mean = GGenerated.mean().mul(-1)
            const gMse = tf.losses.meanSquaredError(realOutput, generatorData)
            const gSign = tf.abs(tf.sign(realOutput).sub(tf.sign(generatorData))).mean();

            const gLoss = g_mean.add(gMse.mul(lambda1)).add(gSign.mul(lambda2));
            return gLoss;
        }), generator_.getWeights());

        gLossValue = g_value.dataSync();
        gOptimizer.applyGradients(g_grads);

    }
    catch (e) {
        tf.dispose()
        console.log('Error in training discriminator')
        console.log(e.stack)
    }
    return { yTrainTensor, generatorData, discriminatorLoss: dLossValue, generatorLoss: gLossValue };
}

Maybe something might have been lost in translation. I have been debugging this for days and I am at a full stop.

Any and all help and insight is rellay appreciated.