Source code for tensorflow_quantization.quantize

#
# SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#    
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List, Union
import tensorflow as tf
import tensorflow_quantization.quantize_wrappers as quantize_wrappers
import tensorflow_quantization.global_config as cfg
import tensorflow_quantization.quantize_config as quantize_config
from dataclasses import dataclass
from tensorflow_quantization.quantize_wrappers import DISABLED_LAYER_QUANTIZATION_DEFAULT


@dataclass
class LayerConfig:
    """
    Internal dataclass for a single layer config.
    Args:
        name (str): Name of the layer. As seen from utilities such as `model.summary()`
        is_keras_class (bool) : Set this to True if layer_name passed represents a layer class from Keras.
            Default is False.
        quantize_input (bool): Set this to True if input to the layers should be quantized. Default is True
            since default behavior is following Nvidia quantization recipe.
        quantize_weight (bool): Set this to True if weights to the layers should be quantized. Default is True
            since default behavior is following Nvidia quantization recipe. For weightless layers, value is
            ignored.
        quantization_index (List): Indices on inputs to which quantization is applied for the layers with
            multiple inputs. E.g Add, Concatenate
    Returns:
        None
    """

    name: str = None
    is_keras_class: bool = False
    quantize_input: bool = True
    quantize_weight: bool = True
    quantization_index: list = None


[docs]class QuantizationSpec:
    """
    Helper class holding config objects for all layers to quantize.
    """

    def __init__(self) -> None:
        self.layers = []

    def __str__(self) -> str:
        for l in self.layers:
            print(l)
        return ""

[docs]    def add(
        self,
        name: Union[str, List],
        is_keras_class: Union[bool, List] = False,
        quantize_input: Union[bool, List] = True,
        quantize_weight: Union[bool, List] = True,
        quantization_index: Union[List, List[List]] = None,
    ) -> None:
        """
        Takes user parameters and adds LayerConfig object to a list for each add call.

        Args:
            name (Union[str, List]): Name of the layer. As seen from utilities such as `model.summary()`
            is_keras_class (Union[bool, List]): List or a single value. Set this to True if layer_name passed represents a layer class from Keras.
                Default is False.
            quantize_input (Union[bool, List]): List or a single value. Set this to True if input to the layers should be quantized. Default is True
                since default behavior is following Nvidia quantization recipe.
            quantize_weight (Union[bool, List]): List or a single value. Set this to True if weights to the layers should be quantized. Default is True
                since default behavior is following Nvidia quantization recipe. For weightless layers, value is
                ignored.
            quantization_index (Union[List, List[List]]): List or List of List. List with indices on inputs to which quantization is applied for the layers with
                multiple inputs. E.g Add, Concatenate
        Returns:
            None
        """
        if not isinstance(name, list):
            self.layers.append(
                LayerConfig(
                    name=name,
                    is_keras_class=is_keras_class,
                    quantize_input=quantize_input,
                    quantize_weight=quantize_weight,
                    quantization_index=quantization_index,
                )
            )
        else:
            # layer names is passed as a list
            if isinstance(is_keras_class, list):
                assert len(name) == len(
                    is_keras_class
                ), "[E] `is_keras_class` is a list but length is not same as layer `name` list"
            if isinstance(quantize_input, list):
                assert len(name) == len(
                    quantize_input
                ), "[E] `quantize_input` is a list but length is not same as layer `name` list"
            if isinstance(quantize_weight, list):
                assert len(name) == len(
                    quantize_weight
                ), "[E] `quantize_weight` is a list but length is not same as layer `name` list"
            if isinstance(quantization_index, list):
                assert len(name) == len(
                    quantization_index
                ), "[E] `quantization_index` is list but length is not same as layer `name` list"
            for i, e in enumerate(name):
                cl_name = e
                cl_is_keras_class = (
                    is_keras_class[i]
                    if isinstance(is_keras_class, list)
                    else is_keras_class
                )
                cl_quantize_input = (
                    quantize_input[i]
                    if isinstance(quantize_input, list)
                    else quantize_input
                )
                cl_quantize_weight = (
                    quantize_weight[i]
                    if isinstance(quantize_weight, list)
                    else quantize_weight
                )
                cl_quantization_index = (
                    quantization_index[i]
                    if isinstance(quantization_index, list)
                    else quantization_index
                )
                self.layers.append(
                    LayerConfig(
                        name=cl_name,
                        is_keras_class=cl_is_keras_class,
                        quantize_input=cl_quantize_input,
                        quantize_weight=cl_quantize_weight,
                        quantization_index=cl_quantization_index,
                    )
                )


def _skip_layer(layer: tf.keras.layers.Layer) -> bool:
    """
    Decide whether quantization wrapping should be skipped for the given layer.
    The decision is made based on an internal quantize config object parameters.

    Args:
        layer (tf.keras.layers.Layer): Keras model layer
    Returns:
        bool: True if given layer should not be quantized else False
    """
    config_object = cfg.get_config_object()

    # Check if any layer with Disabled Quantization by default are in the 'config_object.layer_classes_to_quantize'.
    #   If so, that layer will be enabled for quantization. Otherwise, skip (return True).
    layer_class_name = layer.__class__.__name__
    if layer_class_name in DISABLED_LAYER_QUANTIZATION_DEFAULT:
        if layer_class_name not in config_object.layer_classes_to_quantize:
            if layer.name in config_object.get_layer_config():
                # User can enable a single layer even if the default behavior of a Class is to not quantize.
                # The decision of whether to quantize this layer or not will be left for later checks, such as when
                #   quantize_input and quantize_weight = False.
                pass
            else:
                # Default behavior: skip layer
                return True

    # 1. When quantize_input = False, quantize_weight = False and quantization_index=None, don't even wrap the layer.
    if layer.name in config_object.get_layer_config():
        current_layer_config = config_object.get_layer_config()[layer.name]
        if (
            current_layer_config["qbool_list"][0] == False  # quantize_input
            and current_layer_config["qbool_list"][1] == False  # quantize_weight
            and "qindex_list" not in current_layer_config
        ):
            print(
                "[I] Layer `{layer_name}` is not quantized. There is nothing to quantize since "
                "quantize_input = False, quantize_weight = False and quantization_index=None".format(
                    layer_name=layer.name
                )
            )

            return True

    # 2. Called when quantization_mode is `partial`
    if config_object.config_class_id == 2:
        # A. Skip current `layer class` if current layer class is not in user provided QuantizationSpec class
        #      object. However, when current layer name is passed by user to quantize, don't skip the layer.
        if (
                len(config_object.layer_classes_to_quantize) != 0
                and layer.__class__.__name__ not in config_object.layer_classes_to_quantize
        ):
            if layer.name in config_object.get_layer_config():
                return False
            else:
                print(
                    "[I] Layer class `{layer_class_name}` is not quantized. Partial quantization is enabled "
                    "and layer class is not in user provided QuantizationSpec class object".format(
                        layer_class_name=layer.__class__.__name__
                    )
                )
                return True
        # B. Skip current layer if `layer.name` is not in user provided QuantizationSpec class object.
        #      However, if current layer class is passed by user to quantize, don't skip the layer.
        elif layer.name not in config_object.get_layer_config():
            if layer.__class__.__name__ in config_object.layer_classes_to_quantize:
                return False
            else:
                print(
                    "[I] Layer `{layer_name}` is not quantized. Partial quantization is enabled and layer name is not "
                    "in user provided QuantizationSpec class object".format(
                        layer_name=layer.name
                    )
                )
                return True

    return False


def _quantize_model_layer_clone_function(
    layer: tf.keras.layers.Layer,
) -> "BaseQuantizeWrapper":
    """
    Wrap or leave given layer based on quantize config object parameters.
    Args:
        layer (tf.keras.layers.Layer): Keras model layer
    Returns:
        BaseQuantizeWrapper: layer wrapped in BaseQuantizeWrapper class.
    """
    layer_wrapper = layer
    if _skip_layer(layer):
        # Skip the layers not specified by the user.
        pass
    else:
        child_wrappers_dict = quantize_wrappers.BaseQuantizeWrapper.CHILD_WRAPPERS
        possible_wrapper_name_for_this_layer = (
            layer.__class__.__name__ + "QuantizeWrapper"
        )
        if possible_wrapper_name_for_this_layer in child_wrappers_dict:
            wrapper_function = child_wrappers_dict[possible_wrapper_name_for_this_layer]
            layer_wrapper = wrapper_function(layer)
    return layer_wrapper


def _execute_quantize_model(
    model: tf.keras.Model, class_id: int, qspec: QuantizationSpec = None
) -> tf.keras.Model:
    """
    clone the model and apply quantization to specific layers based on quantize config object parameters.
    Args:
        model (tf.keras.Model): Keras functional or sequential model.
            * Currently Subclassed models are not supported
        class_id (int): internal quantization class ID
        qspec (QuantizationSpec): object of QuantizationSpec class. If few layers or layer classes are to be treated
            differently, LayerConfig class objects for that layer/layer class are created internally and
            added to QuantizationSpec class.
    Returns:
        tf.keras.Model: Quantized model with QDQ nodes added.
    """
    config_id_class_name_map = {
        0: "FullNetworkQuantization",
        1: "FullNetworkSpecialQuantization",
        2: "PartialNetworkQuantization",
    }

    # 1. Create quantize config object
    q_config_object = getattr(quantize_config, config_id_class_name_map[class_id])()

    # 2. Update object attributes
    if qspec:
        q_config_object.add_quantization_spec_object(qspec, model.layers)

    assert (
        cfg.is_config_object_created()
    ), "[E] Have you created the quantization config object before calling `quantize_model`?"

    # Wrap quantizable layers
    model = tf.keras.models.clone_model(
        model, input_tensors=None, clone_function=_quantize_model_layer_clone_function
    )

    # Clean global space afterwards
    q_config_object.clean()

    return model


def _recognize_config_class_id(
    quantization_mode: str = "full", qspec: QuantizationSpec = None
) -> int:
    """
    Interpret internal quantize config class based on parameters passed by user to
    `quantize_model` function.
    Args:
        quantization_mode (str): Either 'full' or 'partial' quantization mode
        qspec (QuantizationSpec): object of QuantizationSpec class. If few layers or layer classes are to be treated
            differently, LayerConfig class objects for that layer/layer class are created internally and
            added to QuantizationSpec class.
    Returns:
        int: ID for quantization category class used internally.
    Raises:
        Exception: if no class can be interpreted for given parameter combination
    """
    if quantization_mode == "full" and qspec is None:
        return 0
    elif quantization_mode == "full" and qspec is not None:
        return 1
    elif quantization_mode == "partial" and qspec is not None:
        return 2
    else:
        raise Exception(
            "Could not recognize config class ID."
            " Are parameters passed to `quantize_model` function correct?"
        )


def _validate_config(
    quantization_mode: str = "full", qspec: QuantizationSpec = None
) -> None:
    """
    Validate if parameters passed to `quantize_model` makes sense.
    Args:
        quantization_mode (str): quantization mode can be either 'full' or 'partial'
        qspec (QuantizationSpec): object of QuantizationSpec class. If few layers or layer classes are to be treated
            differently, LayerConfig class objects for that layer/layer class are created internally and
            added to QuantizationSpec class.
    Returns:
        None
    Raises:
        AssertionError: when configuration is not valid.
    """

    def _verify_support_for_all_layer_classes(qspec: QuantizationSpec):
        for layer in qspec.layers:
            if layer.is_keras_class:
                # Layer class name is provided.
                child_wrappers_dict = (
                    quantize_wrappers.BaseQuantizeWrapper.CHILD_WRAPPERS
                )
                possible_wrapper_name_for_this_layer = layer.name + "QuantizeWrapper"
                assert possible_wrapper_name_for_this_layer in child_wrappers_dict, (
                    "[E] layer class `{layer_name}` is not supported yet! Either there is no native wrapper or user "
                    "provided wrapper registration failed.".format(
                        layer_name=layer.name
                    )
                )

    if qspec:
        _verify_support_for_all_layer_classes(qspec)

    if quantization_mode == "partial":
        assert (
            qspec is not None
        ), "[E] `QuantizationSpec` class object must be passed when `quantization_mode=partial`."


[docs]def quantize_model(
    model,
    quantization_mode: str = "full",
    quantization_spec: QuantizationSpec = None,
    custom_qdq_cases: List["CustomQDQInsertionCase"] = None,
) -> tf.keras.Model:
    """
    Insert Q/DQ nodes in Keras model and return a copy. Weights are preserved unlike native keras clone.

    Args:
        model(tf.keras.Model): Keras Functional or Sequential model.subclassed models are not yet supported.
        quantization_mode(str): quantization mode can be either 'full' or 'partial'
        quantization_spec(QuantizationSpec) : object of QuantizationSpec class. If few layers or layer classes are to
            be treated differently, LayerConfig class objects for that layer/layer class are created internally and
            added to QuantizationSpec class.
        custom_qdq_cases(List[CustomQDQInsertionCase]) : `Case` method on every object in this list is called by passing
            model and user passed quantization_spec as arguments. Each member of this list is an object of a class
            inherited from CustomQDQInsertionCase class.

    Raises:
        AssertionError: When passed model is subclassed.
        AssertionError: When CustomQDQInsertionCase does not return QuantizationSpec object.
        AssertionError: When quantization mode is `partial` but QuantizationSpec object is not passed.
        AssertionError: When quantization wrapper is not found for desired layer class.
        ExceptionError: When internal quantization class ID can't be detected. This happens when passed parameters
            do not make sense.
    Returns:
        tf.keras.Model: Quantized model with QDQ nodes inserted according to NVIDIA quantization recipe.
    """
    supported_model_classes = {"Functional", "Sequential"}
    assert (
        model.__class__.__name__ in supported_model_classes
    ), "[E] Currently only `Functional` or `Sequential` model quantization is supported."

    # Update quantization_spec object based on output of special QDQ cases.
    custom_quantization_spec = QuantizationSpec()
    if custom_qdq_cases:
        for custom_qdq_case in custom_qdq_cases:
            qspec_case_object = custom_qdq_case.case(model, quantization_spec)
            if qspec_case_object:
                assert isinstance(
                    qspec_case_object, QuantizationSpec
                ), "[E] {} \
                does not return an object of QuantizationSpec.".format(
                    qspec_case_object.__class__.__name__
                )
                custom_quantization_spec.layers.extend(qspec_case_object.layers)

    # if user has passed quantization_spec then extend it with custom_quantization_spec
    # else use just custom_quantization_spec
    if quantization_spec:
        quantization_spec.layers.extend(custom_quantization_spec.layers)
    else:
        if len(custom_quantization_spec.layers) != 0:
            quantization_spec = custom_quantization_spec

    # Check if config is valid and quantize model
    _validate_config(quantization_mode, quantization_spec)
    cid = _recognize_config_class_id(quantization_mode, quantization_spec)
    return _execute_quantize_model(model, cid, quantization_spec)