Gradients for Encoder in my BiGAN network are returning None values. Can someone with experience in this field identify the issue?

PaulD153 · December 2, 2022, 11:18am

I’m implementing a BiGAN network for multivariate time-series data and I’m running into problems with training my Encoder. When calling tape.gradient(e_loss, encoder.trainable_variables) I’m receiving only None values. Calling encoder.trainable_variables does yield the correct variables. I’ve used multiple implementations but seem to be running into the same problem. Also I’ve gone through multiple stackoverflow issues that address this issue however those solutions don’t apply to my use case. This is the most clear example I have:

Building the networks:

def build_encoder(self, x_shape, latent_code_length):
    x = Input(x_shape)
    model = Dense(128, name='encoder_conv_1')(x)
    model = LeakyReLU(alpha=0.2)(model)
    model = Dense(128, name='encoder_conv_2')(model)
    model = LeakyReLU(alpha=0.2)(model)
    model = Dense(256, name='encoder_conv_3')(model)
    model = LeakyReLU(alpha=0.2)(model)
    model = Dense(256, name='encoder_conv_4')(model)
    model = LeakyReLU(alpha=0.2)(model)
    model = Dense(512, name='encoder_conv_5')(model)
    model = LeakyReLU(alpha=0.2)(model)
    model = Dense(latent_code_length[-1], name='encoder_conv_6')(model)

    return Model(x, model)

def build_generator(self, x_shape, latent_code_length):
    model = Sequential()
    z = Input(latent_code_length)

    
    model = Dense(512, name = 'first_dense_layer_generator', kernel_regularizer = 'l2')(z)
    model = LeakyReLU(alpha=0.2)(model)
    model = BatchNormalization(momentum=0.8)(model)
    model = Dense(512, name = 'second_dense_layer_generator', kernel_regularizer = 'l2')(model)
    model = LeakyReLU(alpha=0.2)(model)
    model = BatchNormalization(momentum=0.8)(model)
    model = Dense(x_shape[-1], activation='tanh', name = 'third_dense_layer_generator', kernel_regularizer = 'l2')(model)
    model = (Reshape(x_shape))(model)

    return Model(z, model)

def build_discriminator(self, x_shape, latent_code_length):

    z = Input(latent_code_length)
    x = Input(x_shape)
    _z = Flatten()(z)
    _z = Dense(x_shape[0]*x_shape[1])(_z)
    _z = Reshape(x_shape)(_z)
    
    model = Concatenate()([x,_z])
    model = LSTM(64, kernel_initializer='he_uniform', return_sequences=True, name='encoder_1')(x)
    model = LeakyReLU(alpha=0.2)(model)
    model = Dropout(0.5)(model)
    model = LSTM(32, kernel_initializer='he_uniform', return_sequences=True, name='encoder_2')(model)
    model = LeakyReLU(alpha=0.2)(model)
    model = Dropout(0.5)(model)
    model = LSTM(16, kernel_initializer='he_uniform', return_sequences=False, name='encoder_3')(model)
    model = LeakyReLU(alpha=0.2)(model)
    model = Dropout(0.5)(model)
    model = RepeatVector(window_size, name='encoder_decoder_bridge')(model)
    model = LSTM(16, kernel_initializer='he_uniform', return_sequences=True, name='decoder_1')(model)
    model = LSTM(32, kernel_initializer='he_uniform', return_sequences=True, name='decoder_2')(model)
    model = LSTM(64, kernel_initializer='he_uniform', return_sequences=True, name='decoder_3')(model)
    validity = Dense(1, activation="sigmoid")(model)
    
    return Model([x, z], validity)

Defining train functions:

def build_train_step(self, generator, encoder, discriminator):
    g_optimizer = Adam(learning_rate=0.0001, beta_1=0.0, beta_2=0.9)
    e_optimizer = Adam(learning_rate=0.0001, beta_1=0.0, beta_2=0.9)
    d_optimizer = Adam(learning_rate=0.0001, beta_1=0.0, beta_2=0.9)
    
    @tf.function
    def train_step(x, z):
        tf.keras.backend.learning_phase()
        with tf.GradientTape(persistent = True) as tape:
          
          x_ = generator(z)
          z_  = encoder(x)

          d_inputs = [tf.concat([x_, x], axis=0),
                      tf.concat([z, z_], axis=0)]
          d_preds = discriminator(d_inputs)
          pred_g, pred_e = tf.split(d_preds,num_or_size_splits=2, axis=0)

          d_loss = tf.reduce_mean(tf.nn.softplus(pred_g)) + \
                   tf.reduce_mean(tf.nn.softplus(-pred_e))
          g_loss = tf.reduce_mean(tf.nn.softplus(-pred_g))
          e_loss = tf.reduce_mean(tf.nn.softplus(pred_e))

          d_gradients = tape.gradient(d_loss, discriminator.trainable_variables)
          g_gradients = tape.gradient(g_loss, generator.trainable_variables)
          e_gradients = tape.gradient(e_loss, encoder.trainable_variables)

          d_optimizer.apply_gradients(zip(d_gradients, discriminator.trainable_variables))
          g_optimizer.apply_gradients(zip(g_gradients, generator.trainable_variables))
          e_optimizer.apply_gradients(zip(e_gradients, encoder.trainable_variables))

        return d_loss, g_loss, e_loss

    return train_step

def train(self):
    check_point = 1000
    iters = 200 * check_point
    x_shape = (1,15)
    latent_code_length = (1,20)
    batch_size = 64

    feat, x_train, _, x_test, _ = generate_datasets_for_training(pdf_train.drop(columns=['timestamp', 'shuttle_id']), window_size)

    num_of_data = x_train.shape[0]
    x_train = np.reshape(x_train, (-1, )+x_shape)
    x_train = (x_train.astype("float32"))

    z_train = np.random.uniform(-1.0, 1.0, (num_of_data, )+latent_code_length).astype("float32")
    z_test = np.random.uniform(-1.0, 1.0, (100, )+latent_code_length).astype("float32")
    
    generator = self.build_generator(x_shape, latent_code_length)
    encoder = self.build_encoder(x_shape, latent_code_length)

    discriminator = self.build_discriminator(x_shape, latent_code_length)
    train_step = self.build_train_step(generator, encoder, discriminator)

    for i in range(iters):
        X_batched = x_train[self.generate_indices_sequence_batches(x_train, batch_size)]
        x = X_batched[np.random.randint(0, X_batched.shape[0])][:][:][:]
        print(x.shape)
        z = z_train[np.random.permutation(num_of_data)[:batch_size]]
        print(z.shape)
        d_loss, g_loss, e_loss = train_step(x,z)
        print("\r[{}/{}]  d_loss: {:.4}, g_loss: {:.4}, e_loss: {:.4}".format(i,iters, d_loss, g_loss, e_loss),end="")

I’ve tried to use tf.gradients but also with no luck. I hope someone can help me or point out what I’m missing. If more information is needed I’m happy to provide it.

Calling the function in a class with:

if __name__ == '__main__':
  
    bigan = BIGAN()
    bigan.train()