After modified Huggingface Transformers TFBertModel to adapter-BERT, I got an error "ValueError: The first argument to Layer.call
must always be passed. ". I used a Layer class to define adapter output layer, and wanted to replace the encoder/attention output layers and encoder output layers.
The code for defing adapter output layer is as following:
class AdapterOutputLayer(tf.keras.layers.Layer):
def __init__(self, pretrained_dense, pretrained_ln, config, **kwargs):
super(AdapterOutputLayer, self).__init__(**kwargs)
self.pretrained_dense = pretrained_dense
self.pretrained_ln = pretrained_ln
self.config = config
def build(self, input_shape):
self.dense = tf.keras.layers.Dense(units=self.params.hidden_size,
kernel_initializer=self.create_initializer(),
name="dense")
self.dropout = tf.keras.layers.Dropout(self.config.hidden_dropout_prob)
self.adapter_down = tf.keras.layers.Dense(
units=self.config.bottleneck_size,
kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=1e-3),
activation=ACT2FN[self.config.non_linearity],
name="adapter-down")
self.adapter_up = tf.keras.layers.Dense(
units=self.config.hidden_size,
kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=1e-3),
name="adapter-up")
self.layer_norm = LayerNormalization(name="LayerNorm")
super(AdapterOutputLayer, self).build(input_shape)
def call(self, inputs, training=False, **kwargs):
output, residual = inputs
output = self.dense(output)
output = self.dropout(output, training=training)
adapted = self.adapter_down(output)
adapted = self.adapter_up(adapted)
output = tf.add(output, adapted)
output = self.LayerNorm(tf.add(output, residual))
return output
The code for using AdapterOutputLayer to replace TFBert encoder output layers:
bert_model = TFBertModel(config).from_pretrained(model_path)
ly_bert = bert_model.bert
for i in range(config.num_hidden_layers):
ly_bert.encoder.layer[i].attention.dense_output = AdapterOutputLayer(
ly_bert.encoder.layer[i].attention.dense_output.dense,
ly_bert.encoder.layer[i].attention.dense_output.LayerNorm,
config)
ly_bert.encoder.layer[i].bert_output = AdapterOutputLayer(
ly_bert.encoder.layer[i].bert_output.dense,
ly_bert.encoder.layer[i].bert_output.LayerNorm,
config)
Then, when I build model or train model, tensorflow raise the ValueError.
Could you please help me to solve this error, or tell me the correct method to build adapter layer on BERT?
Thank you so much!