Source code for polygraphy.tools.args.comparator.data_loader

#
# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import numbers

from polygraphy import constants, mod, util
from polygraphy.logger import G_LOGGER
from polygraphy.tools.args import util as args_util
from polygraphy.tools.args.base import BaseArgs
from polygraphy.tools.args.model import ModelArgs
from polygraphy.tools.script import (
    Script,
    inline,
    make_invocable,
    make_invocable_if_nondefault,
    safe,
)


[docs] @mod.export() class DataLoaderArgs(BaseArgs): """ Data Loader: loading or generating input data for inference. Depends on: - ModelArgs: if allow_custom_input_shapes == True """ def __init__(self, allow_custom_input_shapes: bool = None): """ Args: allow_custom_input_shapes (bool): Whether to allow custom input shapes when randomly generating data. Defaults to True. """ super().__init__() self._allow_custom_input_shapes = util.default(allow_custom_input_shapes, True) def add_parser_args_impl(self): self.group.add_argument( "--seed", metavar="SEED", help="Seed to use for random inputs", type=int, default=None, ) self.group.add_argument( "--val-range", help="Range of values to generate in the data loader. " "To specify per-input ranges, use the format: --val-range <input_name>:[min,max]. " "If no input name is provided, the range is used for any inputs not explicitly specified. " "For example: --val-range [0,1] inp0:[2,50] inp1:[3.0,4.6]", nargs="+", default=None, ) self.group.add_argument( "--int-min", help="[DEPRECATED: Use --val-range] Minimum integer value for random integer inputs", type=int, default=None, ) self.group.add_argument( "--int-max", help="[DEPRECATED: Use --val-range] Maximum integer value for random integer inputs", type=int, default=None, ) self.group.add_argument( "--float-min", help="[DEPRECATED: Use --val-range] Minimum float value for random float inputs", type=float, default=None, ) self.group.add_argument( "--float-max", help="[DEPRECATED: Use --val-range] Maximum float value for random float inputs", type=float, default=None, ) self.group.add_argument( "--iterations", "--iters", metavar="NUM", help="Number of inference iterations for which the default data loader should supply data", type=int, default=None, dest="iterations", ) self._array_modules = ["numpy", "torch"] self.group.add_argument( "--data-loader-backend-module", type=str, choices=self._array_modules, help=f"The module to use for generating input arrays. Currently supported options: {', '.join(self._array_modules)}", default=None, ) custom_loader_group = self.group.add_mutually_exclusive_group() custom_loader_group.add_argument( "--load-inputs", "--load-input-data", help="Path(s) to load inputs. The file(s) should be a JSON-ified " "List[Dict[str, numpy.ndarray]], i.e. a list where each element is the feed_dict for a single iteration. " "When this option is used, all other data loader arguments are ignored. ", default=[], dest="load_inputs_paths", nargs="+", ) custom_loader_group.add_argument( "--data-loader-script", help="Path to a Python script that defines a function that loads input data. " "The function should take no arguments and return a generator or iterable that yields input data (Dict[str, np.ndarray]). " "When this option is used, all other data loader arguments are ignored. " "By default, Polygraphy looks for a function called `load_data`. You can specify a custom function name " "by separating it with a colon. For example: `my_custom_script.py:my_func`", default=None, ) self.group.add_argument( "--data-loader-func-name", help="[DEPRECATED - function name can be specified with --data-loader-script like so: `my_custom_script.py:my_func`] " "When using a data-loader-script, this specifies the name of the function " "that loads data. Defaults to `load_data`. ", default=None, )
[docs] def parse_impl(self, args): """ Attributes: seed (int): The seed to use for random data generation. val_range (Dict[str, Tuple[int]]): Per-input ranges of values to generate. iterations (int): The number of iterations for which to generate data. load_inputs_paths (List[str]): Path(s) from which to load inputs. data_loader_script (str): Path to a custom script to load inputs. data_loader_func_name (str): Name of the function in the custom data loader script that loads data. data_loader_backend_module (str): Module to be used that provides arrays. """ def omit_none_tuple(tup): if all([elem is None for elem in tup]): return None return tup self.seed = args_util.get(args, "seed") self._int_range = omit_none_tuple( tup=(args_util.get(args, "int_min"), args_util.get(args, "int_max")) ) self._float_range = omit_none_tuple( tup=(args_util.get(args, "float_min"), args_util.get(args, "float_max")) ) if self._int_range or self._float_range: mod.warn_deprecated( "--int-min/--int-max and --float-min/--float-max", use_instead="--val-range, which allows you to specify per-input data ranges,", remove_in="0.50.0", always_show_warning=True, ) self.val_range = args_util.parse_arglist_to_dict( args_util.get(args, "val_range"), cast_to=lambda x: tuple(args_util.cast(x)) ) if self.val_range is not None: for name, vals in self.val_range.items(): if len(vals) != 2: G_LOGGER.critical( f"In --val-range, for input: {name}, expected to receive exactly 2 values, " f"but received {len(vals)}.\nNote: Option was parsed as: input: {name}, range: {vals}" ) if any(not isinstance(elem, numbers.Number) for elem in vals): G_LOGGER.critical( f"In --val-range, for input: {name}, one or more elements of the range could not be parsed as a number.\n" f"Note: Option was parsed as: input: {name}, range: {vals}" ) self.iterations = args_util.get(args, "iterations") self.load_inputs_paths = args_util.get(args, "load_inputs_paths") self.data_loader_backend_module = args_util.get( args, "data_loader_backend_module" ) self.data_loader_script, self.data_loader_func_name = ( args_util.parse_script_and_func_name( args_util.get(args, "data_loader_script"), default_func_name="load_data" ) ) func_name = args_util.get(args, "data_loader_func_name") if func_name is not None: mod.warn_deprecated( "--data-loader-func-name", "--data-loader-script", "0.50.0", always_show_warning=True, ) self.data_loader_func_name = func_name if self.load_inputs_paths or self.data_loader_script: for arg in [ "seed", "int_min", "int_max", "float_min", "float_max", "val_range", "iterations", ]: val = args_util.get(args, arg) if val is not None: G_LOGGER.warning( f"Argument: '--{arg.replace('_', '-')}' will be ignored since a custom data loader was provided.\n" "This argument is only valid when using the default data loader." )
def _add_to_script_helper(self, script, user_input_metadata_str=None): needs_invoke = False using_random_data = False if self.data_loader_script: script.add_import(imports=["mod"], frm="polygraphy") data_loader = make_invocable( "mod.import_from_script", self.data_loader_script, name=self.data_loader_func_name, ) needs_invoke = True elif self.load_inputs_paths: script.add_import(imports=["load_json"], frm="polygraphy.json") data_loader = safe( "[]\nfor input_data_path in {load_inputs_paths}:" "\n{tab}{data_loader}.extend(load_json(input_data_path, description='input data'))", load_inputs_paths=self.load_inputs_paths, data_loader=Script.DATA_LOADER_NAME, tab=inline(safe(constants.TAB)), ) else: using_random_data = True if ( user_input_metadata_str is None and self._allow_custom_input_shapes and self.arg_groups[ModelArgs].input_shapes ): user_input_metadata_str = self.arg_groups[ModelArgs].input_shapes if user_input_metadata_str: script.add_import(imports=["TensorMetadata"], frm="polygraphy.common") data_loader = make_invocable_if_nondefault( "DataLoader", seed=self.seed, iterations=self.iterations, input_metadata=user_input_metadata_str, int_range=self._int_range, float_range=self._float_range, val_range=self.val_range, data_loader_backend_module=self.data_loader_backend_module, ) if data_loader: script.add_import(imports=["DataLoader"], frm="polygraphy.comparator") if using_random_data != self.is_using_random_data(): G_LOGGER.internal_error("is_using_random_data() reported a false positive!") return (script.set_data_loader(data_loader), needs_invoke)
[docs] def add_to_script_impl(self, script, user_input_metadata_str=None): """ Args: user_input_metadata_str (str(TensorMetadata)): The name of a variable containing TensorMetadata. This will control the shape and data type of the generated data. Returns: str: The data loader, as a string. This may either be the variable name, or an invocation of the data loader function. """ data_loader, needs_invoke = self._add_to_script_helper( script, user_input_metadata_str ) if needs_invoke: data_loader = make_invocable(data_loader) return data_loader
[docs] def get_data_loader(self, user_input_metadata=None): """ Creates a data loader according to arguments provided on the command-line. Returns: Sequence[OrderedDict[str, numpy.ndarray]] """ from polygraphy.comparator import DataLoader needs_invoke = False # run_script expects the callable to return just the variable name, but self.add_to_script # has 2 return values. We wrap it here to create a function with the right signature. def add_to_script_wrapper(script, *args, **kwargs): nonlocal needs_invoke name, needs_invoke = self._add_to_script_helper(script, *args, **kwargs) return name data_loader = util.default( args_util.run_script(add_to_script_wrapper, user_input_metadata), DataLoader(), ) if needs_invoke: data_loader = data_loader() return data_loader
[docs] def is_using_random_data(self): """ Whether this data loader will randomly generate data rather than use real data. Returns: bool """ return not self.data_loader_script and not self.load_inputs_paths