Quantization aware training with quantizationConfig -> 4 % Accuracy loss

Horst_G · October 30, 2021, 9:50pm

Hey,
I want to use quantization aware training to quantize my model to int8. Unfortunately, I cant simply quantize the entire model, since my first layer is a batch normalization (after InputLayer), so I need to use a custom quantizationConfig for that layer. My problem is that I have a accuracy drop of around 4%, while using post-quantization the drop is only 2%. Is there anything wrong in the following code?

class DefaultDenseQuantizeConfig(tfmot.quantization.keras.QuantizeConfig):

def get_weights_and_quantizers(self, layer):
  return [(layer.weights[i], LastValueQuantizer(num_bits=8, symmetric=True, narrow_range=False, per_axis=False)) for i in range(2)]

def get_activations_and_quantizers(self, layer):
  return []

def set_quantize_weights(self, layer, quantize_weights):
  layer.weights[0] = quantize_weights[0]
  layer.weights[1] = quantize_weights[1]

def set_quantize_activations(self, layer, quantize_activations):
  pass

def get_output_quantizers(self, layer):
    return [tfmot.quantization.keras.quantizers.MovingAverageQuantizer(
        num_bits=8, per_axis=False, symmetric=False, narrow_range=False)]

def get_config(self):
  return {}

def apply_quantization_to_dense(self,layer):
    if layer != self.base_model.layers[1]:
        return tfmot.quantization.keras.quantize_annotate_layer(layer)
    else:
        return tfmot.quantization.keras.quantize_annotate_layer(
            layer,
            quantize_config=DefaultDenseQuantizeConfig())

 annotated_model = tf.keras.models.clone_model(
            self.base_model,
            clone_function=self.apply_quantization_to_dense,
        )
q_aware_model = quantize_annotate_model(annotated_model)
with quantize_scope(
    {'DefaultDenseQuantizeConfig': DefaultDenseQuantizeConfig,}):
        q_aware_model = tfmot.quantization.keras.quantize_apply(q_aware_model)

 q_aware_model.compile(optimizer=self.optimizer, loss=self.loss, loss_weights=self.loss_weights)
 q_aware_model.fit(self.train_dataset,
                          steps_per_epoch= int(self.ds_train_size / self.batch_size),     #
                          epochs=self.quantization_dict["quantization_epochs"],
                          callbacks=callbacks, )

converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
quantized_tflite_model = converter.convert()

Patrick_Kohn · April 25, 2022, 5:21pm

I’m having the same problem. Did you find any solutions or explanations?