Source code for polygraphy.backend.onnxrt.runner

#
# SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import time
from collections import OrderedDict

from polygraphy import mod, util
from polygraphy.backend.base import BaseRunner
from polygraphy.common import TensorMetadata

np = mod.lazy_import("numpy")


[docs]@mod.export() class OnnxrtRunner(BaseRunner): """ Runs inference using an ONNX-Runtime inference session. """ def __init__(self, sess, name=None): """ Args: sess (Union[onnxruntime.InferenceSession, Callable() -> onnxruntime.InferenceSession]): An ONNX-Runtime inference session or a callable that returns one. """ super().__init__(name=name, prefix="onnxrt-runner") self._sess = sess def activate_impl(self): self.sess, _ = util.invoke_if_callable(self._sess) def get_input_metadata_impl(self): ONNX_RT_TYPE_TO_NP = { "tensor(double)": np.float64, "tensor(float)": np.float32, "tensor(float16)": np.float16, "tensor(int16)": np.int16, "tensor(int32)": np.int32, "tensor(int64)": np.int64, "tensor(int8)": np.int8, "tensor(uint16)": np.uint16, "tensor(uint32)": np.uint32, "tensor(uint64)": np.uint64, "tensor(uint8)": np.uint8, "tensor(bool)": bool, "tensor(string)": np.unicode, } meta = TensorMetadata() for node in self.sess.get_inputs(): dtype = ONNX_RT_TYPE_TO_NP[node.type] if node.type in ONNX_RT_TYPE_TO_NP else None meta.add(node.name, dtype=dtype, shape=node.shape) return meta def infer_impl(self, feed_dict): start = time.time() inference_outputs = self.sess.run(None, feed_dict) end = time.time() out_dict = OrderedDict() for node, out in zip(self.sess.get_outputs(), inference_outputs): out_dict[node.name] = out self.inference_time = end - start return out_dict def deactivate_impl(self): del self.sess