TensorRT  6.0.1.5
NvInferRuntime.h
Go to the documentation of this file.
1 /*
2  * Copyright 1993-2019 NVIDIA Corporation. All rights reserved.
3  *
4  * NOTICE TO LICENSEE:
5  *
6  * This source code and/or documentation ("Licensed Deliverables") are
7  * subject to NVIDIA intellectual property rights under U.S. and
8  * international Copyright laws.
9  *
10  * These Licensed Deliverables contained herein is PROPRIETARY and
11  * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12  * conditions of a form of NVIDIA software license agreement by and
13  * between NVIDIA and Licensee ("License Agreement") or electronically
14  * accepted by Licensee. Notwithstanding any terms or conditions to
15  * the contrary in the License Agreement, reproduction or disclosure
16  * of the Licensed Deliverables to any third party without the express
17  * written consent of NVIDIA is prohibited.
18  *
19  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20  * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21  * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22  * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23  * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24  * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25  * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27  * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28  * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29  * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30  * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32  * OF THESE LICENSED DELIVERABLES.
33  *
34  * U.S. Government End Users. These Licensed Deliverables are a
35  * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36  * 1995), consisting of "commercial computer software" and "commercial
37  * computer software documentation" as such terms are used in 48
38  * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39  * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40  * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41  * U.S. Government End Users acquire the Licensed Deliverables with
42  * only those rights set forth herein.
43  *
44  * Any use of the Licensed Deliverables in individual and commercial
45  * software must include, in the user documentation and internal
46  * comments to the code, the above Disclaimer and U.S. Government End
47  * Users Notice.
48  */
49 
50 #ifndef NV_INFER_RUNTIME_H
51 #define NV_INFER_RUNTIME_H
52 
58 
59 #include "NvInferRuntimeCommon.h"
60 
61 namespace nvinfer1
62 {
63 
64 class IExecutionContext;
65 class ICudaEngine;
66 class IPluginFactory;
67 
76 enum class EngineCapability : int
77 {
78  kDEFAULT = 0,
79  kSAFE_GPU = 1,
80  kSAFE_DLA = 2,
81 };
82 
83 template <>
84 constexpr inline int EnumMax<EngineCapability>()
85 {
86  return 3;
87 }
88 
89 
98 class Weights
99 {
100 public:
102  const void* values;
103  int64_t count;
104 };
105 
117 {
118 public:
119  virtual void* data() const noexcept = 0;
120  virtual std::size_t size() const noexcept = 0;
121  virtual DataType type() const noexcept = 0;
122  virtual void destroy() noexcept = 0;
123 protected:
124  virtual ~IHostMemory() {}
125 };
126 
134 class IPlugin
135 {
136 public:
144  virtual int getNbOutputs() const TRTNOEXCEPT = 0;
145 
155  virtual Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) TRTNOEXCEPT = 0;
156 
173  virtual void configure(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, int maxBatchSize) TRTNOEXCEPT = 0;
174 
180  virtual int initialize() TRTNOEXCEPT = 0;
181 
186  virtual void terminate() TRTNOEXCEPT = 0;
187 
196  virtual size_t getWorkspaceSize(int maxBatchSize) const TRTNOEXCEPT = 0;
197 
209  virtual int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) TRTNOEXCEPT = 0;
210 
216  virtual size_t getSerializationSize() TRTNOEXCEPT = 0;
217 
225  virtual void serialize(void* buffer) TRTNOEXCEPT = 0;
226 
227  virtual ~IPlugin() {}
228 };
229 
238 class IPluginExt : public IPlugin
239 {
240 public:
246  virtual int getTensorRTVersion() const TRTNOEXCEPT
247  {
248  return NV_TENSORRT_VERSION;
249  }
250 
261  virtual bool supportsFormat(DataType type, PluginFormat format) const TRTNOEXCEPT = 0;
262 
279  virtual void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) TRTNOEXCEPT = 0;
280 
281  virtual ~IPluginExt() {}
282 
283 protected:
287  void configure(const Dims* /*inputDims*/, int /*nbInputs*/, const Dims* /*outputDims*/, int /*nbOutputs*/, int /*maxBatchSize*/) _TENSORRT_FINAL TRTNOEXCEPT {}
288 };
289 
300 enum class DimensionOperation : int
301 {
302  kSUM = 0,
303  kPROD = 1,
304  kMAX = 2,
305  kMIN = 3,
306  kSUB = 4,
307  kEQUAL = 5,
308  kLESS = 6,
309  kFLOOR_DIV = 7,
310  kCEIL_DIV = 8
311 };
312 
313 template <>
314 constexpr inline int EnumMax<DimensionOperation>()
315 {
316  return 9;
317 }
318 
330 {
331 public:
333  virtual bool isConstant() const = 0;
334 
337  virtual int getConstantValue() const = 0;
338 };
339 
358 {
359 public:
361  virtual const IDimensionExpr* constant(int value) = 0;
362 
365  virtual const IDimensionExpr* operation(DimensionOperation op, const IDimensionExpr& first, const IDimensionExpr& second) = 0;
366 
367 protected:
368  virtual ~IExprBuilder() {}
369 };
370 
377 {
378 public:
379  int nbDims;
381 };
382 
389 {
392 
395 
398 };
399 
420 {
421 public:
422  IPluginV2DynamicExt* clone() const _TENSORRT_OVERRIDE TRTNOEXCEPT = 0;
423 
448  virtual DimsExprs getOutputDimensions(int outputIndex, const DimsExprs* inputs, int nbInputs, IExprBuilder& exprBuilder) = 0;
449 
453  static constexpr int kFORMAT_COMBINATION_LIMIT = 100;
454 
486  virtual bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) TRTNOEXCEPT = 0;
487 
499  virtual void configurePlugin(const DynamicPluginTensorDesc* in, int nbInputs, const DynamicPluginTensorDesc* out, int nbOutputs) TRTNOEXCEPT = 0;
500 
510  virtual size_t getWorkspaceSize(const PluginTensorDesc* inputs, int nbInputs, const PluginTensorDesc* outputs, int nbOutputs) const TRTNOEXCEPT = 0;
511 
524  virtual int enqueue(const PluginTensorDesc* inputDesc, const PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) TRTNOEXCEPT = 0;
525 
526 protected:
527  int getTensorRTVersion() const _TENSORRT_OVERRIDE TRTNOEXCEPT
528  {
529  return (static_cast<int>(PluginVersion::kV2_DYNAMICEXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));
530  }
531 
532  virtual ~IPluginV2DynamicExt() {}
533 
534  // Rest of the methods below are obsolete inherited methods, and marked final when using a C++11 compiler.
535  // Derived classes should not override them.
536 
542  TRT_DEPRECATED
543  Dims getOutputDimensions(int /*index*/, const Dims* /*inputs*/, int /*nbInputDims*/) _TENSORRT_FINAL TRTNOEXCEPT
544  {
545  Dims result;
546  result.nbDims = -1;
547  return result;
548  }
549 
555  TRT_DEPRECATED
556  bool isOutputBroadcastAcrossBatch(int /*outputIndex*/, const bool* /*inputIsBroadcasted*/, int /*nbInputs*/) const _TENSORRT_FINAL TRTNOEXCEPT
557  {
558  return false;
559  }
560 
566  TRT_DEPRECATED
567  bool canBroadcastInputAcrossBatch(int /*inputIndex*/) const _TENSORRT_FINAL TRTNOEXCEPT
568  {
569  return true;
570  }
571 
580  TRT_DEPRECATED
581  bool supportsFormat(DataType /*type*/, PluginFormat /*format*/) const _TENSORRT_FINAL TRTNOEXCEPT { return false; }
582 
591  TRT_DEPRECATED
592  void configurePlugin(const Dims* /*inputDims*/, int /*nbInputs*/, const Dims* /*outputDims*/,
593  int /*nbOutputs*/, const DataType* /*inputTypes*/, const DataType* /*outputTypes*/,
594  const bool* /*inputIsBroadcast*/, const bool* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, int /*maxBatchSize*/) _TENSORRT_FINAL TRTNOEXCEPT {}
595 
605  TRT_DEPRECATED
606  size_t getWorkspaceSize(int /*maxBatchSize*/) const _TENSORRT_FINAL TRTNOEXCEPT { return 0; }
607 
615  TRT_DEPRECATED
616  int enqueue(int /*batchSize*/, const void* const* /*inputs*/, void** /*outputs*/, void* /*workspace*/, cudaStream_t /*stream*/) _TENSORRT_FINAL TRTNOEXCEPT
617  {
618  return 1;
619  }
620 };
621 
633 {
634 public:
641  virtual void reportLayerTime(const char* layerName, float ms) TRTNOEXCEPT = 0;
642 
643  virtual ~IProfiler() {}
644 };
645 
652 enum class WeightsRole : int
653 {
654  kKERNEL = 0,
655  kBIAS = 1,
656  kSHIFT = 2,
657  kSCALE = 3,
658  kCONSTANT = 4,
659 };
660 
661 template <>
662 constexpr inline int EnumMax<WeightsRole>()
663 {
664  return 5;
665 }
666 
672 enum class DeviceType : int
673 {
674  kGPU,
675  kDLA,
676 };
677 template <>
678 constexpr inline int EnumMax<DeviceType>()
679 {
680  return 2;
681 }
682 
683 
691 class IRuntime
692 {
693 public:
703  virtual nvinfer1::ICudaEngine* deserializeCudaEngine(const void* blob, std::size_t size, IPluginFactory* pluginFactory) noexcept = 0;
704 
710  virtual void setDLACore(int dlaCore) noexcept = 0;
711 
716  virtual int getDLACore() const noexcept = 0;
717 
721  virtual int getNbDLACores() const noexcept = 0;
722 
726  virtual void destroy() noexcept = 0;
727 
728 protected:
729  virtual ~IRuntime() {}
730 
731 public:
740  virtual void setGpuAllocator(IGpuAllocator* allocator) noexcept = 0;
741 
751  //
754  virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
755 
766  virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
767 
776  nvinfer1::ICudaEngine* deserializeCudaEngine(const void* blob, std::size_t size) noexcept
777  {
778  return deserializeCudaEngine(blob, size, nullptr);
779  }
780 };
781 
790 {
791 public:
802  virtual bool setWeights(const char* layerName, WeightsRole role, Weights weights) TRTNOEXCEPT = 0;
803 
809  virtual bool refitCudaEngine() TRTNOEXCEPT = 0;
810 
827  virtual int getMissing(int size, const char** layerNames, WeightsRole* roles) TRTNOEXCEPT = 0;
828 
841  virtual int getAll(int size, const char** layerNames, WeightsRole* roles) TRTNOEXCEPT = 0;
842 
843  virtual void destroy() TRTNOEXCEPT = 0;
844 
845 protected:
846  virtual ~IRefitter() {}
847 
848 public:
859  virtual bool setDynamicRange(const char* tensorName, float min, float max) TRTNOEXCEPT = 0;
860 
868  virtual float getDynamicRangeMin(const char* tensorName) const TRTNOEXCEPT = 0;
869 
877  virtual float getDynamicRangeMax(const char* tensorName) const TRTNOEXCEPT = 0;
878 
890  virtual int getTensorsWithDynamicRange(int size, const char** tensorNames) const TRTNOEXCEPT = 0;
891 
901  //
904  virtual void setErrorRecorder(IErrorRecorder* recorder) TRTNOEXCEPT = 0;
905 
916  virtual IErrorRecorder* getErrorRecorder() const TRTNOEXCEPT = 0;
917 };
918 
926 {
927 public:
942  virtual IPlugin* createPlugin(const char* layerName, const void* serialData, size_t serialLength) TRTNOEXCEPT = 0;
943 };
944 
955 enum class OptProfileSelector : int
956 {
957  kMIN = 0,
958  kOPT = 1,
959  kMAX = 2
960 };
961 
962 template <>
963 constexpr inline int EnumMax<OptProfileSelector>()
964 {
965  return 3;
966 }
967 
991 {
992 public:
1016  virtual bool setDimensions(const char* inputName, OptProfileSelector select, Dims dims) noexcept = 0;
1017 
1023  virtual Dims getDimensions(const char* inputName, OptProfileSelector select) const noexcept = 0;
1024 
1046  virtual bool setShapeValues(
1047  const char* inputName, OptProfileSelector select, const int32_t* values, int nbValues) noexcept = 0;
1048 
1055  virtual int getNbShapeValues(const char* inputName) const noexcept = 0;
1056 
1062  virtual const int32_t* getShapeValues(const char* inputName, OptProfileSelector select) const noexcept = 0;
1063 
1077  virtual bool setExtraMemoryTarget(float target) noexcept = 0;
1078 
1082  virtual float getExtraMemoryTarget() const noexcept = 0;
1083 
1095  virtual bool isValid() const noexcept = 0;
1096 
1097 protected:
1098  ~IOptimizationProfile() noexcept = default;
1099 };
1100 
1109 {
1110 public:
1116  virtual int getNbBindings() const noexcept = 0;
1117 
1131  virtual int getBindingIndex(const char* name) const noexcept = 0;
1132 
1143  virtual const char* getBindingName(int bindingIndex) const noexcept = 0;
1144 
1153  virtual bool bindingIsInput(int bindingIndex) const noexcept = 0;
1154 
1164  virtual Dims getBindingDimensions(int bindingIndex) const noexcept = 0;
1165 
1174  virtual DataType getBindingDataType(int bindingIndex) const noexcept = 0;
1175 
1181  virtual int getMaxBatchSize() const noexcept = 0;
1182 
1191  virtual int getNbLayers() const noexcept = 0;
1192 
1199  TRT_DEPRECATED
1200  virtual std::size_t getWorkspaceSize() const noexcept = 0;
1201 
1211  virtual IHostMemory* serialize() const noexcept = 0;
1212 
1218  virtual IExecutionContext* createExecutionContext() noexcept = 0;
1219 
1223  virtual void destroy() noexcept = 0;
1224 
1235  virtual TensorLocation getLocation(int bindingIndex) const noexcept = 0;
1236 
1237 protected:
1238  virtual ~ICudaEngine() {}
1239 
1240 public:
1247  virtual IExecutionContext* createExecutionContextWithoutDeviceMemory() noexcept = 0;
1248 
1254  virtual size_t getDeviceMemorySize() const noexcept = 0;
1255 
1261  virtual bool isRefittable() const noexcept = 0;
1262 
1272  virtual int getBindingBytesPerComponent(int bindingIndex) const noexcept = 0;
1273 
1283  virtual int getBindingComponentsPerElement(int bindingIndex) const noexcept = 0;
1284 
1290  virtual TensorFormat getBindingFormat(int bindingIndex) const noexcept = 0;
1291 
1306  virtual const char* getBindingFormatDesc(int bindingIndex) const noexcept = 0;
1307 
1315  virtual int getBindingVectorizedDim(int bindingIndex) const noexcept = 0;
1316 
1327  virtual const char* getName() const noexcept = 0;
1328 
1335  virtual int getNbOptimizationProfiles() const noexcept = 0;
1336 
1347  virtual Dims getProfileDimensions(int bindingIndex, int profileIndex, OptProfileSelector select) const noexcept = 0;
1348 
1364  virtual const int32_t* getProfileShapeValues(int profileIndex, int inputIndex, OptProfileSelector select) const
1365  noexcept
1366  = 0;
1367 
1399  virtual bool isShapeBinding(int bindingIndex) const noexcept = 0;
1400 
1410  virtual bool isExecutionBinding(int bindingIndex) const noexcept = 0;
1411 
1421  virtual EngineCapability getEngineCapability() const noexcept = 0;
1422 
1431  //
1434  virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
1435 
1446  virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
1447 };
1448 
1460 {
1461 public:
1473  virtual bool execute(int batchSize, void** bindings) noexcept = 0;
1474 
1488  virtual bool enqueue(int batchSize, void** bindings, cudaStream_t stream, cudaEvent_t* inputConsumed) noexcept = 0;
1489 
1497  virtual void setDebugSync(bool sync) noexcept = 0;
1498 
1504  virtual bool getDebugSync() const noexcept = 0;
1505 
1511  virtual void setProfiler(IProfiler*) noexcept = 0;
1512 
1518  virtual IProfiler* getProfiler() const noexcept = 0;
1519 
1525  virtual const ICudaEngine& getEngine() const noexcept = 0;
1526 
1530  virtual void destroy() noexcept = 0;
1531 
1532 protected:
1533  virtual ~IExecutionContext() noexcept {}
1534 
1535 public:
1543  virtual void setName(const char* name) noexcept = 0;
1544 
1550  virtual const char* getName() const noexcept = 0;
1551 
1563  virtual void setDeviceMemory(void* memory) noexcept = 0;
1564 
1573  virtual Dims getStrides(int bindingIndex) const noexcept = 0;
1574 
1575 public:
1604  virtual bool setOptimizationProfile(int profileIndex) noexcept = 0;
1605 
1613  virtual int getOptimizationProfile() const noexcept = 0;
1614 
1633  virtual bool setBindingDimensions(int bindingIndex, Dims dimensions) noexcept = 0;
1634 
1655  virtual Dims getBindingDimensions(int bindingIndex) const noexcept = 0;
1656 
1672  virtual bool setInputShapeBinding(int bindingIndex, const int32_t* data) noexcept = 0;
1673 
1690  virtual bool getShapeBinding(int bindingIndex, int32_t* data) const noexcept = 0;
1691 
1702  virtual bool allInputDimensionsSpecified() const noexcept = 0;
1703 
1713  virtual bool allInputShapesSpecified() const noexcept = 0;
1714 
1724  //
1727  virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
1728 
1739  virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
1740 
1753  virtual bool executeV2(void** bindings) noexcept = 0;
1754 
1770  virtual bool enqueueV2(void** bindings, cudaStream_t stream, cudaEvent_t* inputConsumed) noexcept = 0;
1771 };
1772 }
1773 
1774 extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int version);
1775 
1776 extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(void* engine, void* logger, int version);
1777 
1778 namespace nvinfer1
1779 {
1780 namespace // unnamed namespace avoids linkage surprises when linking objects built with different versions of this header.
1781 {
1787 inline IRuntime* createInferRuntime(ILogger& logger)
1788 {
1789  return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
1790 }
1796 inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger)
1797 {
1798  return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
1799 }
1800 }
1801 }
1802 
1803 #endif // NV_INFER_RUNTIME_H
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:1108
Substract the second element from the first.
DataType
The type of weights and tensors.
Definition: NvInferRuntimeCommon.h:162
void * createInferRuntime_INTERNAL(void *logger, int version)
Internal C entry point for creating IRuntime.
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:391
Safety restricted capability, TensorRT flow that can only run on GPU devices.
constexpr int EnumMax< WeightsRole >()
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:662
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:955
struct CUstream_st * cudaStream_t
Forward declaration of cudaStream_t.
Definition: NvInferRuntimeCommon.h:112
TRT_DEPRECATED bool canBroadcastInputAcrossBatch(int) const _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:567
const void * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:102
void configure(const Dims *, int, const Dims *, int, int) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:287
Definition: NvInferRuntime.h:419
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:691
Plugin class for user-implemented layers.
Definition: NvInferRuntime.h:134
static const int MAX_DIMS
The maximum number of dimensions supported for a tensor.
Definition: NvInferRuntimeCommon.h:208
TRT_DEPRECATED bool isOutputBroadcastAcrossBatch(int, const bool *, int) const _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:556
bias for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntimeCommon.h:925
shift part of IScaleLayer
virtual int getTensorRTVersion() const
Return the API version with which this plugin was built.
Definition: NvInferRuntime.h:246
DataType type
The type of the weights.
Definition: NvInferRuntime.h:101
The TensorRT API version 1 namespace.
Sum of the two elements.
Plugin factory for deserialization.
Definition: NvInferRuntime.h:925
nvinfer1::ICudaEngine * deserializeCudaEngine(const void *blob, std::size_t size) noexcept
Definition: NvInferRuntime.h:776
This is used to set or get the value that is used in the optimization (kernel selection).
Full capability, TensorRT mode without any restrictions.
1 if operands are equal, 0 otherwise.
Product of the two elements.
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:990
TRT_DEPRECATED Dims getOutputDimensions(int, const Dims *, int) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:543
int getTensorRTVersion() const _TENSORRT_OVERRIDE
Return the API version with which this plugin was built.
Definition: NvInferRuntime.h:527
int nbDims
The number of dimensions.
Definition: NvInferRuntime.h:379
constexpr int EnumMax< DimensionOperation >()
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:314
Reference counted application-implemented error reporting interface for TensorRT objects.
Definition: NvInferRuntimeCommon.h:1141
1 if first operand is less than second operand, 0 otherwise.
TRT_DEPRECATED int enqueue(int, const void *const *, void **, void *, cudaStream_t) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:616
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeCommon.h:205
constexpr int EnumMax< DeviceType >()
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:678
Minimum of the two elements.
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:103
Definition: NvInferRuntime.h:357
kernel for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:98
int nbDims
The number of dimensions.
Definition: NvInferRuntimeCommon.h:209
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntimeCommon.h:235
Application-implemented logging interface for the builder, engine and runtime.
Definition: NvInferRuntimeCommon.h:985
constexpr int EnumMax< EngineCapability >()
Maximum number of elements in EngineCapability enum.
Definition: NvInferRuntime.h:84
Plugin class for user-implemented layers.
Definition: NvInferRuntime.h:238
Floor division of the first element by the second.
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimeCommon.h:310
EngineCapability
Forward declaration of IPluginFactory for use by other interfaces.
Definition: NvInferRuntime.h:76
Safety restricted capability, TensorRT flow that can only run on DLA devices.
Definition: NvInferRuntime.h:329
Definition: NvInferRuntime.h:388
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:116
constexpr int EnumMax< OptProfileSelector >()
Number of different values of OptProfileSelector enum.
Definition: NvInferRuntime.h:963
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:397
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:300
#define _TENSORRT_OVERRIDE
Items that are marked as deprecated will be removed in a future release.
Definition: NvInferRuntimeCommon.h:62
Application-implemented class for controlling allocation on the GPU.
Definition: NvInferRuntimeCommon.h:942
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:1459
Updates weights in an engine.
Definition: NvInferRuntime.h:789
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:394
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:672
TRT_DEPRECATED size_t getWorkspaceSize(int) const _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:606
Definition: NvInferRuntime.h:376
Application-implemented interface for profiling.
Definition: NvInferRuntime.h:632
Plugin class for user-implemented layers.
Definition: NvInferRuntimeCommon.h:513
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:652
TRT_DEPRECATED bool supportsFormat(DataType, PluginFormat) const _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:581
struct CUevent_st * cudaEvent_t
Forward declaration of cudaEvent_t.
Definition: NvInferRuntimeCommon.h:113
void * createInferRefitter_INTERNAL(void *engine, void *logger, int version)
Internal C entry point for creating IRefitter.
TRT_DEPRECATED void configurePlugin(const Dims *, int, const Dims *, int, const DataType *, const DataType *, const bool *, const bool *, PluginFormat, int) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:592