Source code for polygraphy.comparator.struct

#
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from collections import OrderedDict

from polygraphy import mod, util, config
from polygraphy.common.interface import TypedDict, TypedList
from polygraphy.json import Decoder, Encoder, add_json_methods, load_json, save_json
from polygraphy.logger import G_LOGGER

np = mod.lazy_import("numpy")


class LazyNumpyArray(object):
    """
    Represents a lazily loaded NumPy array.
    For example, large NumPy arrays may be serialized to temporary files on the disk
    to save memory.
    """

    def __init__(self, arr):
        """
        Args:
            arr (np.ndarray): The NumPy array.
        """
        self.arr = None
        self.tmpfile = None
        if config.ARRAY_SWAP_THRESHOLD_MB >= 0 and arr.nbytes > (config.ARRAY_SWAP_THRESHOLD_MB << 20):
            self.tmpfile = util.NamedTemporaryFile(suffix=".json")
            G_LOGGER.extra_verbose(
                "Evicting large array ({:.3f} MiB) from memory and saving to {:}".format(
                    arr.nbytes / (1024.0 ** 2), self.tmpfile.name
                )
            )
            save_json(arr, self.tmpfile.name)
        else:
            self.arr = arr

    def numpy(self):
        """
        Get the NumPy array, deserializing from the disk if it was stored earlier.

        Returns:
            np.ndarray: The NumPy array
        """
        if self.arr is not None:
            return self.arr

        assert self.tmpfile is not None, "Path and NumPy array cannot both be None!"
        return load_json(self.tmpfile.name)


@Encoder.register(LazyNumpyArray)
def encode(lazy_arr):
    return {
        "values": lazy_arr.numpy(),
    }


@Decoder.register(LazyNumpyArray)
def decode(dct):
    return LazyNumpyArray(dct["values"])


[docs]@mod.export()
class IterationResult(TypedDict(lambda: str, lambda: LazyNumpyArray)):
    """
    An ordered dictionary containing the result of a running a single iteration of a runner.

    This maps output names to NumPy arrays, and preserves the output ordering from the runner.

    NOTE: The ``POLYGRAPHY_ARRAY_SWAP_THRESHOLD_MB`` environment variable can be set to enable
    the arrays to be swapped to the disk.

    Also includes additional fields indicating the name of the runner which produced the
    outputs, and the time required to do so.
    """

    @staticmethod
    def _to_lazy(nparray):
        if isinstance(nparray, LazyNumpyArray):
            return nparray
        return LazyNumpyArray(nparray)

    @staticmethod
    def _to_lazy_dict(nparray_dict):
        if nparray_dict is None:
            return None

        # Converts a Dict[str, np.ndarray] to a Dict[str, LazyNumpyArray]
        lazy = OrderedDict()
        for name, out in nparray_dict.items():
            lazy[name] = IterationResult._to_lazy(out)
        return lazy

    def __init__(self, outputs=None, runtime=None, runner_name=None):
        """
        Args:
            outputs (Dict[str, np.array]): The outputs of this iteration, mapped to their names.


            runtime (float): The time required for this iteration, in seconds.
            runner_name (str): The name of the runner that produced this output.
        """
        if outputs and config.ARRAY_SWAP_THRESHOLD_MB < 0:
            total_size_gb = sum(arr.nbytes for arr in outputs.values() if isinstance(arr, np.ndarray)) / (1024.0 ** 3)
            if total_size_gb >= 1:
                G_LOGGER.warning(
                    "It looks like the outputs of this network are very large ({:.3f} GiB).\n"
                    "To reduce memory usage, you may want to allow Polygraphy to swap these arrays to the disk using "
                    "the POLYGRAPHY_ARRAY_SWAP_THRESHOLD_MB environment variable.".format(total_size_gb)
                )

        super().__init__(IterationResult._to_lazy_dict(outputs))
        self.runtime = runtime
        self.runner_name = util.default(runner_name, "")

    # Convenience methods to preserve np.ndarray in the interface.
    def update(self, other):
        return super().update(IterationResult._to_lazy_dict(other))

    def __setitem__(self, name, arr):
        return super().__setitem__(name, IterationResult._to_lazy(arr))

    def values(self):
        for arr in super().values():
            yield arr.numpy()

    def items(self):
        for name, arr in super().items():
            yield name, arr.numpy()

    def __getitem__(self, name):
        return super().__getitem__(name).numpy()

    def __eq__(self, other):
        if self.runtime != other.runtime or self.runner_name != other.runner_name:
            return False

        for key, val in self.items():
            if key not in other:
                return False

            if not np.array_equal(val, other[key]):
                return False

        return True


@Encoder.register(IterationResult)
def encode(iter_result):
    return {
        "outputs": iter_result.dct,
        "runtime": iter_result.runtime,
        "runner_name": iter_result.runner_name,
    }


@Decoder.register(IterationResult)
def decode(dct):
    return IterationResult(outputs=dct["outputs"], runtime=dct["runtime"], runner_name=dct["runner_name"])


[docs]@mod.export()
@add_json_methods("inference results")
class RunResults(TypedList(lambda: tuple)):
    """
    Maps runner names to zero or more IterationResults.

    Note: Technically, this is a ``List[Tuple[str, List[IterationResult]]]``, but includes
    helpers that make it behave like an OrderedDict that can contain duplicates.
    """

[docs]    def items(self):
        """
        Creates a generator that yields ``Tuple[str, List[IterationResult]]`` - runner names
        and corresponding outputs.
        """
        for name, iteration_results in self.lst:
            yield name, iteration_results

[docs]    def keys(self):
        """
        Creates a generator that yields runner names (str).
        """
        for name, _ in self.lst:
            yield name

[docs]    def values(self):
        """
        Creates a generator that yields runner outputs (List[IterationResult]).
        """
        for _, iteration_results in self.lst:
            yield iteration_results

[docs]    def update(self, other):
        """
        Updates the results stored in this instance.

        Args:
            other (Union[Dict[str, List[IterationResult]], RunResults]):
                    A dictionary or RunResults instance from which to update this one.
        """
        for name, iteration_results in other.items():
            self.lst[name] = iteration_results
        return self

    def __getitem__(self, key):
        if isinstance(key, int):
            return self.lst[key]

        for name, iteration_results in self.lst:
            if name == key:
                return iteration_results

        G_LOGGER.critical(
            "{:35} does not exist in this RunResults instance. Note: Available runners: {:}".format(
                key, list(self.keys())
            )
        )

    def __setitem__(self, key, value):
        if isinstance(key, int):
            self.lst[key] = value
            return

        for index, name in enumerate(self.keys()):
            if name == key:
                self.lst[index] = (key, value)
                break
        else:
            self.append((key, value))

    def __contains__(self, val):
        if isinstance(val, str) or isinstance(val, bytes):
            return val in list(self.keys())
        return val in self.lst

    def __eq__(self, other):
        for (r0, its0), (r1, its1) in zip(self.lst, other.lst):
            if r0 != r1:
                return False

            if its0 != its1:
                return False
        return True


@Encoder.register(RunResults)
def encode(results):
    return {"lst": results.lst}


@Decoder.register(RunResults)
def decode(dct):
    return RunResults(list(map(tuple, dct["lst"])))


[docs]@mod.export()
class AccuracyResult(TypedDict(lambda: tuple, lambda: list)):
    """
    An ordered dictionary including details about the result of ``Comparator.compare_accuracy``.

    More specifically, it is an ``OrderedDict[Tuple[str, str], List[OrderedDict[str, bool]]]`` which maps a runner
    pair (a tuple containing both runner names) to a list of dictionaries of booleans (or anything that can be
    converted into a boolean, such as an ``OutputCompareResult``), indicating whether there was a match in the outputs of
    the corresponding iteration. The ``List[OrderedDict[str, bool]]`` is constructed from the dictionaries returned
    by ``compare_func`` in ``compare_accuracy``.

    For example, to see if there's a match between ``runner0`` and
    ``runner1`` during the 1st iteration for an output called ``output0``:
    ::

        runner_pair = ("runner0", "runner1")
        iteration = 0
        output_name = "output0"
        match = bool(accuracy_result[runner_pair][iteration][output_name])

    If there's a mismatch, you can inspect the outputs from
    the results of ``Comparator.run()``, assumed here to be called ``run_results``:
    ::

        runner0_output = run_results["runner0"][iteration][output_name]
        runner1_output = run_results["runner1"][iteration][output_name]
    """

[docs]    def __bool__(self):
        """
        Whether all outputs matched for every iteration.
        You can use this function to avoid manually checking each output. For example:
        ::

            if accuracy_result:
                print("All matched!")

        Returns:
            bool
        """
        return all([bool(match) for outs in self.values() for out in outs for match in out.values()])

    def _get_runner_pair(self, runner_pair):
        return util.default(runner_pair, list(self.keys())[0])

[docs]    def percentage(self, runner_pair=None):
        """
        Returns the percentage of iterations that matched for the given pair of runners,
        expressed as a decimal between 0.0 and 1.0.

        Always returns 1.0 when the number of iterations is 0, or when there are no runner comparisons.

        Args:
            runner_pair (Tuple[str, str]):
                    A pair of runner names describing which runners to check.
                    Defaults to the first pair in the dictionary.
        """
        if not list(self.keys()):
            return 1.0  # No data in this result.

        matched, _, total = self.stats(runner_pair)
        if not total:
            return 1.0  # No iterations
        return float(matched) / float(total)

[docs]    def stats(self, runner_pair=None):
        """
        Returns the number of iterations that matched, mismatched, and the total number of iterations.

        Args:
            runner_pair (Tuple[str, str]):
                    A pair of runner names describing which runners to check.
                    Defaults to the first pair in the dictionary.

        Returns:
            Tuple[int, int, int]: Number of iterations that matched, mismatched, and total respectively.
        """
        runner_pair = self._get_runner_pair(runner_pair)
        outs = self[runner_pair]
        matched = sum([all([match for match in out.values()]) for out in outs])
        total = len(outs)
        return matched, total - matched, total