TensorRT 11.0.0
NvInferImpl.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef NV_INFER_IMPL_H
19#define NV_INFER_IMPL_H
20
21#include "NvInferLegacyDims.h"
23
24// @cond SuppressDoxyWarnings
25
26namespace nvinfer1
27{
28
29namespace v_1_0
30{
31class ILogger;
33} // namespace v_1_0
36
37namespace v_1_0
38{
39class IProfiler;
40} // namespace v_1_0
42
43namespace v_1_0
44{
46} // namespace v_1_0
48
49namespace v_1_0
50{
51class IDebugListener;
52} // namespace v_1_0
54
55class IActivationLayer;
56class IAssertionLayer;
57class IAttention;
58class IBuilder;
59class IBuilderConfig;
60class IConcatenationLayer;
61class IConditionLayer;
62class IConstantLayer;
63class IConvolutionLayer;
64class ICudaEngine;
65class ICumulativeLayer;
66class IDeconvolutionLayer;
67class IDequantizeLayer;
68class IDimensionExpr;
69class IDynamicQuantizeLayer;
70class IEinsumLayer;
71class IElementWiseLayer;
72class IEngineInspector;
73class IExecutionContext;
74class IFillLayer;
75class IGatherLayer;
76class IGridSampleLayer;
77class IHostMemory;
78class IIdentityLayer;
79class ICastLayer;
80class IIfConditional;
81class IIfConditionalInputLayer;
82class IIfConditionalOutputLayer;
83class IIteratorLayer;
84class IKVCacheUpdateLayer;
85class ILayer;
86class ILoop;
87class ILoopOutputLayer;
88class ILRNLayer;
89class IMatrixMultiplyLayer;
90class IMoELayer;
91class IDistCollectiveLayer;
92class INetworkDefinition;
93class INormalizationLayer;
94class INMSLayer;
95class INonZeroLayer;
96class IOneHotLayer;
97class IOptimizationProfile;
98class IPaddingLayer;
99class IParametricReLULayer;
100class IPlugin;
101class IPluginExt;
102class IPluginFactory;
103class IPluginLayer;
104class IPluginRegistry;
105class IPluginV2Layer;
106class IRotaryEmbeddingLayer;
107class IRuntimeConfig;
108
109namespace v_1_0
110{
111class IPluginV3;
112} // namespace v_1_0
114
115namespace v_1_0
116{
117class IStreamReader;
118class IStreamWriter;
119} // namespace v_1_0
122namespace v_1_0
123{
124class IStreamReaderV2;
125} // namespace v_1_0
127
128class IPluginV3Layer;
129class IPoolingLayer;
130class IQuantizeLayer;
131class IRaggedSoftMaxLayer;
132class IRecurrenceLayer;
133class IReduceLayer;
134class IRefitter;
135class IResizeLayer;
136class IReverseSequenceLayer;
137class IRuntime;
138class IScaleLayer;
139class IScatterLayer;
140class ISelectLayer;
141class ISerializationConfig;
142class IShapeLayer;
143class IShuffleLayer;
144class ISliceLayer;
145class ISoftMaxLayer;
146class ISqueezeLayer;
147class ITensor;
148
149namespace v_1_0
150{
151struct TimingCacheKey;
152struct TimingCacheValue;
153} // namespace v_1_0
154using TimingCacheKey = v_1_0::TimingCacheKey;
155using TimingCacheValue = v_1_0::TimingCacheValue;
156
157class ITimingCache;
158class ITopKLayer;
159class ITripLimitLayer;
160class IUnaryLayer;
161class IUnsqueezeLayer;
162struct Permutation;
163class Weights;
164
165enum class ActivationType : int32_t;
166enum class AttentionIOForm : int32_t;
167enum class AttentionNormalizationOp : int32_t;
168enum class BoundingBoxFormat : int32_t;
169enum class CausalMaskKind : int32_t;
170enum class BuilderFlag : int32_t;
171enum class CumulativeOperation : int32_t;
172enum class DeviceType : int32_t;
173enum class DimensionOperation : int32_t;
174enum class ElementWiseOperation : int32_t;
175enum class EngineCapability : int32_t;
176enum class FillOperation : int32_t;
177enum class GatherMode : int32_t;
178enum class KVCacheMode : int32_t;
179enum class LayerInformationFormat : int32_t;
180enum class LayerType : int32_t;
181enum class LoopOutput : int32_t;
182enum class MatrixOperation : int32_t;
183enum class MemoryPoolType : int32_t;
184enum class MoEActType : int32_t;
185enum class NetworkDefinitionCreationFlag : int32_t;
186enum class OptProfileSelector : int32_t;
187enum class PaddingMode : int32_t;
188enum class PoolingType : int32_t;
189enum class ProfilingVerbosity : int32_t;
190enum class ReduceOperation : int32_t;
191enum class CollectiveOperation : int32_t;
192enum class ResizeCoordinateTransformation : int32_t;
193enum class InterpolationMode : int32_t;
194enum class ResizeRoundMode : int32_t;
195enum class ResizeSelector : int32_t;
196enum class ScaleMode : int32_t;
197enum class ScatterMode : int32_t;
198enum class SampleMode : int32_t;
199enum class SerializationFlag : int32_t;
200enum class TensorIOMode : int32_t;
201enum class TensorLocation : int32_t;
202enum class TopKOperation : int32_t;
203enum class TripLimit : int32_t;
204enum class UnaryOperation : int32_t;
205enum class WeightsRole : int32_t;
206enum class PreviewFeature : int32_t;
207enum class HardwareCompatibilityLevel : int32_t;
208enum class ExecutionContextAllocationStrategy : int32_t;
209enum class RuntimePlatform : int32_t;
210enum class TilingOptimizationLevel : int32_t;
211enum class EngineStat : int32_t;
212
213
214using TacticSources = uint32_t;
215using TensorFormats = uint32_t;
216using BuilderFlags = uint32_t;
217using NetworkDefinitionCreationFlags = uint32_t;
218using TempfileControlFlags = uint32_t;
219using SerializationFlags = uint32_t;
220
228
229namespace apiv
230{
231
232class VRoot
233{
234public:
235 virtual ~VRoot() noexcept = default;
236};
237
238class VHostMemory : public VRoot
239{
240public:
241 virtual void* data() const noexcept = 0;
242 virtual std::size_t size() const noexcept = 0;
243 virtual DataType type() const noexcept = 0;
244};
245
246class VDimensionExpr : public VRoot
247{
248public:
249 virtual bool isConstant() const = 0;
250 virtual int64_t getConstantValue() const = 0;
251 virtual bool isSizeTensor() const = 0;
252};
253
254class VExprBuilder : public VRoot
255{
256public:
257 virtual IDimensionExpr const* constant(int64_t value) = 0;
258 virtual IDimensionExpr const* operation(
259 DimensionOperation op, IDimensionExpr const& first, IDimensionExpr const& second)
260 = 0;
261 virtual IDimensionExpr const* declareSizeTensor(
262 int32_t outputIndex, IDimensionExpr const& opt, IDimensionExpr const& upper)
263 = 0;
264};
265
266class VRuntime : public VRoot
267{
268public:
269 virtual IRuntime* getPImpl() noexcept = 0;
270 virtual nvinfer1::ICudaEngine* deserializeCudaEngine(void const* blob, std::size_t size) noexcept = 0;
271 virtual void setDLACore(int32_t dlaCore) noexcept = 0;
272 virtual int32_t getDLACore() const noexcept = 0;
273 virtual int32_t getNbDLACores() const noexcept = 0;
274 virtual void setGpuAllocator(IGpuAllocator* allocator) noexcept = 0;
275 virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
276 virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
277 virtual ILogger* getLogger() const noexcept = 0;
278 virtual bool setMaxThreads(int32_t maxThreads) noexcept = 0;
279 virtual int32_t getMaxThreads() const noexcept = 0;
280 virtual void setTemporaryDirectory(char const*) noexcept = 0;
281 virtual char const* getTemporaryDirectory() const noexcept = 0;
282 virtual void setTempfileControlFlags(TempfileControlFlags) noexcept = 0;
283 virtual TempfileControlFlags getTempfileControlFlags() const noexcept = 0;
284 virtual IPluginRegistry& getPluginRegistry() noexcept = 0;
285 virtual void setPluginRegistryParent(IPluginRegistry* parent) noexcept = 0;
286 virtual IRuntime* loadRuntime(char const* path) noexcept = 0;
287 virtual void setEngineHostCodeAllowed(bool allowed) noexcept = 0;
288 virtual bool getEngineHostCodeAllowed() const noexcept = 0;
289 // Added in TensorRT version 10.7
290 virtual nvinfer1::ICudaEngine* deserializeCudaEngineV2(IStreamReaderV2& streamReader) noexcept = 0;
291};
292
293class VRefitter : public VRoot
294{
295public:
296 virtual IRefitter* getPImpl() noexcept = 0;
297 virtual bool setWeights(char const* layerName, WeightsRole role, const Weights weights) noexcept = 0;
298 virtual bool refitCudaEngine() noexcept = 0;
299 virtual int32_t getMissing(int32_t size, char const** layerNames, WeightsRole* roles) noexcept = 0;
300 virtual int32_t getAll(int32_t size, char const** layerNames, WeightsRole* roles) noexcept = 0;
301 virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
302 virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
303 virtual bool setNamedWeights(char const* name, Weights weights) noexcept = 0;
304 virtual int32_t getMissingWeights(int32_t size, char const** weightsNames) noexcept = 0;
305 virtual int32_t getAllWeights(int32_t size, char const** weightsNames) noexcept = 0;
306 virtual ILogger* getLogger() const noexcept = 0;
307 virtual bool setMaxThreads(int32_t maxThreads) noexcept = 0;
308 virtual int32_t getMaxThreads() const noexcept = 0;
309 virtual bool setNamedWeightsWithLocation(char const* name, Weights weights, TensorLocation location) noexcept = 0;
310 virtual Weights getNamedWeights(char const* weightsName) const noexcept = 0;
311 virtual TensorLocation getWeightsLocation(char const* weightsName) const noexcept = 0;
312 virtual bool unsetNamedWeights(char const* weightsName) noexcept = 0;
313 virtual void setWeightsValidation(bool weightsValidation) noexcept = 0;
314 virtual bool getWeightsValidation() const noexcept = 0;
315 virtual bool refitCudaEngineAsync(cudaStream_t stream) noexcept = 0;
316 virtual Weights getWeightsPrototype(char const* weightsName) const noexcept = 0;
317};
318
319class VOptimizationProfile : public VRoot
320{
321public:
322 virtual bool setDimensions(char const* inputName, OptProfileSelector select, Dims const& dims) noexcept = 0;
323 virtual Dims getDimensions(char const* inputName, OptProfileSelector select) const noexcept = 0;
324 virtual int32_t getNbShapeValues(char const* inputName) const noexcept = 0;
325 virtual bool setExtraMemoryTarget(float target) noexcept = 0;
326 virtual float getExtraMemoryTarget() const noexcept = 0;
327 virtual bool isValid() const noexcept = 0;
328 // Added in TensorRT 10.11
329 TRT_NODISCARD virtual bool setShapeValuesV2(
330 char const* inputName, OptProfileSelector select, int64_t const* values, int32_t nbValues) noexcept = 0;
331 TRT_NODISCARD virtual int64_t const* getShapeValuesV2(
332 char const* inputName, OptProfileSelector select) const noexcept = 0;
333};
334
335class VCudaEngine : public VRoot
336{
337public:
338 virtual ICudaEngine* getPImpl() noexcept = 0;
339 virtual int32_t getNbLayers() const noexcept = 0;
340 virtual IHostMemory* serialize() const noexcept = 0;
341 virtual IExecutionContext* createExecutionContext(ExecutionContextAllocationStrategy strategy) noexcept = 0;
342 virtual bool isRefittable() const noexcept = 0;
343 virtual char const* getName() const noexcept = 0;
344 virtual int32_t getNbOptimizationProfiles() const noexcept = 0;
345 virtual EngineCapability getEngineCapability() const noexcept = 0;
346 virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
347 virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
348 virtual TacticSources getTacticSources() const noexcept = 0;
349 virtual ProfilingVerbosity getProfilingVerbosity() const noexcept = 0;
350 virtual IEngineInspector* createEngineInspector() const noexcept = 0;
351 virtual Dims getTensorShape(char const* tensorName) const noexcept = 0;
352 virtual DataType getTensorDataType(char const* tensorName) const noexcept = 0;
353 virtual TensorLocation getTensorLocation(char const* tensorName) const noexcept = 0;
354 virtual bool isShapeInferenceIO(char const* tensorName) const noexcept = 0;
355 virtual TensorIOMode getTensorIOMode(char const* tensorName) const noexcept = 0;
356 virtual int32_t getTensorBytesPerComponent(char const* tensorName) const noexcept = 0;
357 virtual int32_t getTensorComponentsPerElement(char const* tensorName) const noexcept = 0;
358 virtual TensorFormat getTensorFormat(char const* tensorName) const noexcept = 0;
359 virtual char const* getTensorFormatDesc(char const* tensorName) const noexcept = 0;
360 virtual int32_t getTensorVectorizedDim(char const* tensorName) const noexcept = 0;
361 virtual Dims getProfileShape(
362 char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept = 0;
363 virtual int32_t getNbIOTensors() const noexcept = 0;
364 virtual char const* getIOTensorName(int32_t index) const noexcept = 0;
365 virtual HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept = 0;
366 virtual int32_t getNbAuxStreams() const noexcept = 0;
367
368 virtual int32_t getTensorBytesPerComponentV2(char const* tensorName, int32_t profileIndex) const noexcept = 0;
369 virtual int32_t getTensorComponentsPerElementV2(char const* tensorName, int32_t profileIndex) const noexcept = 0;
370 virtual TensorFormat getTensorFormatV2(char const* tensorName, int32_t profileIndex) const noexcept = 0;
371 virtual char const* getTensorFormatDescV2(char const* tensorName, int32_t profileIndex) const noexcept = 0;
372 virtual int32_t getTensorVectorizedDimV2(char const* tensorName, int32_t profileIndex) const noexcept = 0;
373
374 virtual ISerializationConfig* createSerializationConfig() noexcept = 0;
375 virtual IHostMemory* serializeWithConfig(ISerializationConfig& config) const noexcept = 0;
376
377 virtual IRefitter* createRefitter(ILogger& logger) noexcept = 0;
378
379 virtual int64_t getStreamableWeightsSize() const noexcept = 0;
380
381 virtual bool isDebugTensor(char const* name) const noexcept = 0;
382
383 // Added in TensorRT 10.1
384 virtual bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept = 0;
385 virtual int64_t getWeightStreamingBudgetV2() const noexcept = 0;
386 virtual int64_t getWeightStreamingAutomaticBudget() const noexcept = 0;
387 virtual int64_t getWeightStreamingScratchMemorySize() const noexcept = 0;
388 virtual int64_t getDeviceMemorySizeV2() const noexcept = 0;
389 virtual int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept = 0;
390 // Added in TensorRT 10.11
391 TRT_NODISCARD virtual int64_t const* getProfileTensorValuesV2(
392 char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept = 0;
393 TRT_NODISCARD virtual IExecutionContext* createExecutionContextWithRuntimeConfig(
394 IRuntimeConfig* runtimeConfig) noexcept = 0;
395 TRT_NODISCARD virtual IRuntimeConfig* createRuntimeConfig() noexcept = 0;
396 TRT_NODISCARD virtual int64_t getEngineStat(EngineStat stat) const noexcept = 0;
397 // Added in TensorRT 10.15
398 TRT_NODISCARD virtual char const* getAliasedInputTensor(char const* tensorName) const noexcept = 0;
399};
400
401class VExecutionContext : public VRoot
402{
403public:
404 virtual IExecutionContext* getPImpl() noexcept = 0;
405 virtual void setDebugSync(bool sync) noexcept = 0;
406 virtual bool getDebugSync() const noexcept = 0;
407 virtual void setProfiler(IProfiler*) noexcept = 0;
408 virtual IProfiler* getProfiler() const noexcept = 0;
409 virtual ICudaEngine const& getEngine() const noexcept = 0;
410 virtual void setName(char const* name) noexcept = 0;
411 virtual char const* getName() const noexcept = 0;
412 virtual void setDeviceMemory(void* memory) noexcept = 0;
413 virtual int32_t getOptimizationProfile() const noexcept = 0;
414 virtual bool allInputDimensionsSpecified() const noexcept = 0;
415 virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
416 virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
417 virtual bool executeV2(void* const* bindings) noexcept = 0;
418 virtual bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept = 0;
419 virtual void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept = 0;
420 virtual bool getEnqueueEmitsProfile() const noexcept = 0;
421 virtual bool reportToProfiler() const noexcept = 0;
422 virtual bool setInputShape(char const* tensorName, Dims const& dims) noexcept = 0;
423 virtual Dims getTensorShape(char const* tensorName) const noexcept = 0;
424 virtual Dims getTensorStrides(char const* tensorName) const noexcept = 0;
425 virtual bool setTensorAddress(char const* tensorName, void* data) noexcept = 0;
426 virtual void const* getTensorAddress(char const* tensorName) const noexcept = 0;
427 virtual bool setInputTensorAddress(char const* tensorName, void const* data) noexcept = 0;
428 virtual bool setOutputTensorAddress(char const* tensorName, void* data) noexcept = 0;
429 virtual int32_t inferShapes(int32_t nbMaxNames, char const** tensorNames) noexcept = 0;
430 virtual bool setInputConsumedEvent(cudaEvent_t event) noexcept = 0;
431 virtual cudaEvent_t getInputConsumedEvent() const noexcept = 0;
432 virtual void* getOutputTensorAddress(char const* tensorName) const noexcept = 0;
433 virtual bool setOutputAllocator(char const* tensorName, IOutputAllocator* outputAllocator) noexcept = 0;
434 virtual IOutputAllocator* getOutputAllocator(char const* name) noexcept = 0;
435 virtual int64_t getMaxOutputSize(char const* tensorName) const noexcept = 0;
436 virtual bool setTemporaryStorageAllocator(IGpuAllocator* allocator) noexcept = 0;
437 virtual IGpuAllocator* getTemporaryStorageAllocator() const noexcept = 0;
438 virtual bool enqueueV3(cudaStream_t stream) noexcept = 0;
439 virtual void setPersistentCacheLimit(size_t size) noexcept = 0;
440 virtual size_t getPersistentCacheLimit() const noexcept = 0;
441 virtual bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept = 0;
442 virtual ProfilingVerbosity getNvtxVerbosity() const noexcept = 0;
443 virtual void setAuxStreams(cudaStream_t* auxStreams, int32_t nbStreams) noexcept = 0;
444 virtual bool setDebugListener(IDebugListener* listener) noexcept = 0;
445 virtual IDebugListener* getDebugListener() noexcept = 0;
446 virtual bool setTensorDebugState(char const* name, bool flag) noexcept = 0;
447 virtual bool getDebugState(char const* name) const noexcept = 0;
448 virtual bool setAllTensorsDebugState(bool flag) noexcept = 0;
449 virtual size_t updateDeviceMemorySizeForShapes() noexcept = 0;
450 virtual void setDeviceMemoryV2(void* memory, int64_t size) noexcept = 0;
451 TRT_NODISCARD virtual IRuntimeConfig* getRuntimeConfig() const noexcept = 0;
452 virtual bool setUnfusedTensorsDebugState(bool flag) noexcept = 0;
453 virtual bool getUnfusedTensorsDebugState() const noexcept = 0;
454 virtual bool setCommunicator(void* communicator) noexcept = 0;
455};
456
457class VEngineInspector : public VRoot
458{
459public:
460 virtual IEngineInspector* getPImpl() noexcept = 0;
461 virtual bool setExecutionContext(IExecutionContext const* context) noexcept = 0;
462 virtual IExecutionContext const* getExecutionContext() const noexcept = 0;
463 virtual char const* getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept = 0;
464 virtual char const* getEngineInformation(LayerInformationFormat format) const noexcept = 0;
465 virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
466 virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
467};
468
469class VTensor : public VRoot
470{
471public:
472 virtual void setName(char const* name) noexcept = 0;
473 virtual char const* getName() const noexcept = 0;
474 virtual void setDimensions(Dims const& dimensions) noexcept = 0;
475 virtual Dims getDimensions() const noexcept = 0;
476 virtual DataType getType() const noexcept = 0;
477 virtual bool isNetworkInput() const noexcept = 0;
478 virtual bool isNetworkOutput() const noexcept = 0;
479 virtual void setBroadcastAcrossBatch(bool broadcastAcrossBatch) noexcept = 0;
480 virtual bool getBroadcastAcrossBatch() const noexcept = 0;
481 virtual TensorLocation getLocation() const noexcept = 0;
482 virtual void setLocation(TensorLocation location) noexcept = 0;
483 virtual void setAllowedFormats(TensorFormats formats) noexcept = 0;
484 virtual TensorFormats getAllowedFormats() const noexcept = 0;
485 virtual bool isShapeTensor() const noexcept = 0;
486 virtual bool isExecutionTensor() const noexcept = 0;
487 virtual void setDimensionName(int32_t index, char const* name) noexcept = 0;
488 virtual char const* getDimensionName(int32_t index) const noexcept = 0;
489};
490
491class VLayer : public VRoot
492{
493public:
494 virtual LayerType getType() const noexcept = 0;
495 virtual void setName(char const* name) noexcept = 0;
496 virtual char const* getName() const noexcept = 0;
497 virtual int32_t getNbInputs() const noexcept = 0;
498 virtual ITensor* getInput(int32_t index) const noexcept = 0;
499 virtual int32_t getNbOutputs() const noexcept = 0;
500 virtual ITensor* getOutput(int32_t index) const noexcept = 0;
501 virtual void setInput(int32_t index, ITensor& tensor) noexcept = 0;
502 virtual DataType getOutputType(int32_t index) const noexcept = 0;
503 virtual void setMetadata(char const* docString) noexcept = 0;
504 virtual char const* getMetadata() const noexcept = 0;
505 virtual bool setNbRanks(int32_t nbRanks) noexcept = 0;
506 virtual int32_t getNbRanks() const noexcept = 0;
507};
508
509class VConvolutionLayer : public VRoot
510{
511public:
512 virtual void setNbOutputMaps(int64_t nbOutputMaps) noexcept = 0;
513 virtual int64_t getNbOutputMaps() const noexcept = 0;
514 virtual void setNbGroups(int64_t nbGroups) noexcept = 0;
515 virtual int64_t getNbGroups() const noexcept = 0;
516 virtual void setKernelWeights(Weights weights) noexcept = 0;
517 virtual Weights getKernelWeights() const noexcept = 0;
518 virtual void setBiasWeights(Weights weights) noexcept = 0;
519 virtual Weights getBiasWeights() const noexcept = 0;
520 virtual void setPrePadding(Dims const& padding) noexcept = 0;
521 virtual Dims getPrePadding() const noexcept = 0;
522 virtual void setPostPadding(Dims const& padding) noexcept = 0;
523 virtual Dims getPostPadding() const noexcept = 0;
524 virtual void setPaddingMode(PaddingMode paddingMode) noexcept = 0;
525 virtual PaddingMode getPaddingMode() const noexcept = 0;
526 virtual void setKernelSizeNd(Dims const& kernelSize) noexcept = 0;
527 virtual Dims getKernelSizeNd() const noexcept = 0;
528 virtual void setStrideNd(Dims const& stride) noexcept = 0;
529 virtual Dims getStrideNd() const noexcept = 0;
530 virtual void setPaddingNd(Dims const& padding) noexcept = 0;
531 virtual Dims getPaddingNd() const noexcept = 0;
532 virtual void setDilationNd(Dims const& dilation) noexcept = 0;
533 virtual Dims getDilationNd() const noexcept = 0;
534};
535
536class VActivationLayer : public VRoot
537{
538public:
539 virtual void setActivationType(ActivationType type) noexcept = 0;
540 virtual ActivationType getActivationType() const noexcept = 0;
541 virtual void setAlpha(float alpha) noexcept = 0;
542 virtual void setBeta(float beta) noexcept = 0;
543 virtual float getAlpha() const noexcept = 0;
544 virtual float getBeta() const noexcept = 0;
545};
546
547class VPoolingLayer : public VRoot
548{
549public:
550 virtual void setPoolingType(PoolingType type) noexcept = 0;
551 virtual PoolingType getPoolingType() const noexcept = 0;
552 virtual void setBlendFactor(float blendFactor) noexcept = 0;
553 virtual float getBlendFactor() const noexcept = 0;
554 virtual void setAverageCountExcludesPadding(bool exclusive) noexcept = 0;
555 virtual bool getAverageCountExcludesPadding() const noexcept = 0;
556 virtual void setPrePadding(Dims const& padding) noexcept = 0;
557 virtual Dims getPrePadding() const noexcept = 0;
558 virtual void setPostPadding(Dims const& padding) noexcept = 0;
559 virtual Dims getPostPadding() const noexcept = 0;
560 virtual void setPaddingMode(PaddingMode paddingMode) noexcept = 0;
561 virtual PaddingMode getPaddingMode() const noexcept = 0;
562 virtual void setWindowSizeNd(Dims const& windowSize) noexcept = 0;
563 virtual Dims getWindowSizeNd() const noexcept = 0;
564 virtual void setStrideNd(Dims const& stride) noexcept = 0;
565 virtual Dims getStrideNd() const noexcept = 0;
566 virtual void setPaddingNd(Dims const& padding) noexcept = 0;
567 virtual Dims getPaddingNd() const noexcept = 0;
568};
569
570class VLRNLayer : public VRoot
571{
572public:
573 virtual void setWindowSize(int64_t windowSize) noexcept = 0;
574 virtual int64_t getWindowSize() const noexcept = 0;
575 virtual void setAlpha(float alpha) noexcept = 0;
576 virtual float getAlpha() const noexcept = 0;
577 virtual void setBeta(float beta) noexcept = 0;
578 virtual float getBeta() const noexcept = 0;
579 virtual void setK(float k) noexcept = 0;
580 virtual float getK() const noexcept = 0;
581};
582
583class VScaleLayer : public VRoot
584{
585public:
586 virtual void setMode(ScaleMode mode) noexcept = 0;
587 virtual ScaleMode getMode() const noexcept = 0;
588 virtual void setShift(Weights shift) noexcept = 0;
589 virtual Weights getShift() const noexcept = 0;
590 virtual void setScale(Weights scale) noexcept = 0;
591 virtual Weights getScale() const noexcept = 0;
592 virtual void setPower(Weights power) noexcept = 0;
593 virtual Weights getPower() const noexcept = 0;
594 virtual int32_t getChannelAxis() const noexcept = 0;
595 virtual void setChannelAxis(int32_t channelAxis) noexcept = 0;
596};
597
598class VSoftMaxLayer : public VRoot
599{
600public:
601 virtual void setAxes(uint32_t axes) noexcept = 0;
602 virtual uint32_t getAxes() const noexcept = 0;
603};
604
605class VConcatenationLayer : public VRoot
606{
607public:
608 virtual void setAxis(int32_t axis) noexcept = 0;
609 virtual int32_t getAxis() const noexcept = 0;
610};
611
612class VDeconvolutionLayer : public VRoot
613{
614public:
615 virtual void setNbOutputMaps(int64_t nbOutputMaps) noexcept = 0;
616 virtual int64_t getNbOutputMaps() const noexcept = 0;
617 virtual void setNbGroups(int64_t nbGroups) noexcept = 0;
618 virtual int64_t getNbGroups() const noexcept = 0;
619 virtual void setKernelWeights(Weights weights) noexcept = 0;
620 virtual Weights getKernelWeights() const noexcept = 0;
621 virtual void setBiasWeights(Weights weights) noexcept = 0;
622 virtual Weights getBiasWeights() const noexcept = 0;
623 virtual void setPrePadding(Dims const& padding) noexcept = 0;
624 virtual Dims getPrePadding() const noexcept = 0;
625 virtual void setPostPadding(Dims const& padding) noexcept = 0;
626 virtual Dims getPostPadding() const noexcept = 0;
627 virtual void setPaddingMode(PaddingMode paddingMode) noexcept = 0;
628 virtual PaddingMode getPaddingMode() const noexcept = 0;
629 virtual void setKernelSizeNd(Dims const& kernelSize) noexcept = 0;
630 virtual Dims getKernelSizeNd() const noexcept = 0;
631 virtual void setStrideNd(Dims const& stride) noexcept = 0;
632 virtual Dims getStrideNd() const noexcept = 0;
633 virtual void setPaddingNd(Dims const& padding) noexcept = 0;
634 virtual Dims getPaddingNd() const noexcept = 0;
635 virtual void setDilationNd(Dims const& dilation) noexcept = 0;
636 virtual Dims getDilationNd() const noexcept = 0;
637};
638
639class VElementWiseLayer : public VRoot
640{
641public:
642 virtual void setOperation(ElementWiseOperation op) noexcept = 0;
643 virtual ElementWiseOperation getOperation() const noexcept = 0;
644};
645
646class VGatherLayer : public VRoot
647{
648public:
649 virtual void setGatherAxis(int32_t axis) noexcept = 0;
650 virtual int32_t getGatherAxis() const noexcept = 0;
651 virtual void setNbElementWiseDims(int32_t k) noexcept = 0;
652 virtual int32_t getNbElementWiseDims() const noexcept = 0;
653 virtual void setMode(GatherMode mode) noexcept = 0;
654 virtual GatherMode getMode() const noexcept = 0;
655};
656
657class VPluginLayer : public VRoot
658{
659public:
660 virtual IPlugin& getPlugin() noexcept = 0;
661};
662
663class VPluginV2Layer : public VRoot
664{
665public:
666 virtual IPluginV2& getPlugin() noexcept = 0;
667};
668
669class VPluginV3Layer : public VRoot
670{
671public:
672 virtual IPluginV3& getPlugin() noexcept = 0;
673};
674
675class VUnaryLayer : public VRoot
676{
677public:
678 virtual void setOperation(UnaryOperation op) noexcept = 0;
679 virtual UnaryOperation getOperation() const noexcept = 0;
680};
681
682class VReduceLayer : public VRoot
683{
684public:
685 virtual void setOperation(ReduceOperation op) noexcept = 0;
686 virtual ReduceOperation getOperation() const noexcept = 0;
687 virtual void setReduceAxes(uint32_t reduceAxes) noexcept = 0;
688 virtual uint32_t getReduceAxes() const noexcept = 0;
689 virtual void setKeepDimensions(bool keepDimensions) noexcept = 0;
690 virtual bool getKeepDimensions() const noexcept = 0;
691};
692
693class VPaddingLayer : public VRoot
694{
695public:
696 virtual void setPrePaddingNd(Dims const& padding) noexcept = 0;
697 virtual Dims getPrePaddingNd() const noexcept = 0;
698 virtual void setPostPaddingNd(Dims const& padding) noexcept = 0;
699 virtual Dims getPostPaddingNd() const noexcept = 0;
700};
701
702class VShuffleLayer : public VRoot
703{
704public:
705 virtual void setFirstTranspose(Permutation const& permutation) noexcept = 0;
706 virtual Permutation const& getFirstTranspose() const noexcept = 0;
707 virtual void setReshapeDimensions(Dims const& dimensions) noexcept = 0;
708 virtual Dims getReshapeDimensions() const noexcept = 0;
709 virtual void setSecondTranspose(Permutation const& permutation) noexcept = 0;
710 virtual Permutation const& getSecondTranspose() const noexcept = 0;
711 virtual void setZeroIsPlaceholder(bool zeroIsPlaceholder) noexcept = 0;
712 virtual bool getZeroIsPlaceholder() const noexcept = 0;
713};
714
715class VSliceLayer : public VRoot
716{
717public:
718 virtual void setStart(Dims const& start) noexcept = 0;
719 virtual Dims getStart() const noexcept = 0;
720 virtual void setSize(Dims const& size) noexcept = 0;
721 virtual Dims getSize() const noexcept = 0;
722 virtual void setStride(Dims const& stride) noexcept = 0;
723 virtual Dims getStride() const noexcept = 0;
724 virtual void setMode(SampleMode mode) noexcept = 0;
725 virtual SampleMode getMode() const noexcept = 0;
726 virtual void setAxes(Dims const& axes) noexcept = 0;
727 virtual Dims getAxes() const noexcept = 0;
728};
729
730class VShapeLayer : public VRoot
731{
732public:
733};
734
735class VTopKLayer : public VRoot
736{
737public:
738 virtual void setOperation(TopKOperation op) noexcept = 0;
739 virtual TopKOperation getOperation() const noexcept = 0;
740 virtual void setK(int32_t k) noexcept = 0;
741 virtual int32_t getK() const noexcept = 0;
742 virtual void setReduceAxes(uint32_t reduceAxes) noexcept = 0;
743 virtual uint32_t getReduceAxes() const noexcept = 0;
744 virtual bool setIndicesType(DataType type) noexcept = 0;
745 virtual DataType getIndicesType() const noexcept = 0;
746};
747
748class VMatrixMultiplyLayer : public VRoot
749{
750public:
751 virtual void setOperation(int32_t index, MatrixOperation op) noexcept = 0;
752 virtual MatrixOperation getOperation(int32_t index) const noexcept = 0;
753};
754
755class VNonZeroLayer : public VRoot
756{
757public:
758 virtual bool setIndicesType(DataType type) noexcept = 0;
759 virtual DataType getIndicesType() const noexcept = 0;
760};
761
762class VRaggedSoftMaxLayer : public VRoot
763{
764public:
765};
766
767class VIdentityLayer : public VRoot
768{
769public:
770};
771
772class VCastLayer : public VRoot
773{
774public:
775 virtual void setToType(DataType toType) noexcept = 0;
776 virtual DataType getToType() const noexcept = 0;
777};
778
779class VConstantLayer : public VRoot
780{
781public:
782 virtual void setWeights(Weights weights) noexcept = 0;
783 virtual Weights getWeights() const noexcept = 0;
784 virtual void setDimensions(Dims const& dimensions) noexcept = 0;
785 virtual Dims getDimensions() const noexcept = 0;
786};
787
788class VParametricReLULayer : public VRoot
789{
790public:
791};
792
793class VResizeLayer : public VRoot
794{
795public:
796 virtual void setOutputDimensions(Dims const& dimensions) noexcept = 0;
797 virtual Dims getOutputDimensions() const noexcept = 0;
798 virtual void setScales(float const* scales, int32_t nbScales) noexcept = 0;
799 virtual int32_t getScales(int32_t size, float* scales) const noexcept = 0;
800 virtual void setResizeMode(InterpolationMode interpolationMode) noexcept = 0;
801 virtual InterpolationMode getResizeMode() const noexcept = 0;
802 virtual void setCoordinateTransformation(ResizeCoordinateTransformation coordTransform) noexcept = 0;
803 virtual ResizeCoordinateTransformation getCoordinateTransformation() const noexcept = 0;
804 virtual void setSelectorForSinglePixel(ResizeSelector selector) noexcept = 0;
805 virtual ResizeSelector getSelectorForSinglePixel() const noexcept = 0;
806 virtual void setNearestRounding(ResizeRoundMode value) noexcept = 0;
807 virtual ResizeRoundMode getNearestRounding() const noexcept = 0;
808 virtual void setCubicCoeff(float value) noexcept = 0;
809 virtual float getCubicCoeff() const noexcept = 0;
810 virtual void setExcludeOutside(bool value) noexcept = 0;
811 virtual bool getExcludeOutside() const noexcept = 0;
812};
813
814class VLoopBoundaryLayer : public VRoot
815{
816public:
817 virtual ILoop* getLoop() const noexcept = 0;
818};
819
820class VRecurrenceLayer : public VRoot
821{
822public:
823};
824
825class VLoopOutputLayer : public VRoot
826{
827public:
828 virtual LoopOutput getLoopOutput() const noexcept = 0;
829 virtual void setAxis(int32_t axis) noexcept = 0;
830 virtual int32_t getAxis() const noexcept = 0;
831};
832
833class VTripLimitLayer : public VRoot
834{
835public:
836 virtual TripLimit getTripLimit() const noexcept = 0;
837};
838
839class VIteratorLayer : public VRoot
840{
841public:
842 virtual void setAxis(int32_t axis) noexcept = 0;
843 virtual int32_t getAxis() const noexcept = 0;
844 virtual void setReverse(bool reverse) noexcept = 0;
845 virtual bool getReverse() const noexcept = 0;
846};
847class VLoop : public VRoot
848{
849public:
850 virtual IRecurrenceLayer* addRecurrence(ITensor& initialValue) noexcept = 0;
851 virtual ITripLimitLayer* addTripLimit(ITensor& tensor, TripLimit limit) noexcept = 0;
852 virtual IIteratorLayer* addIterator(ITensor& tensor, int32_t axis = 0, bool reverse = false) noexcept = 0;
853 virtual ILoopOutputLayer* addLoopOutput(ITensor& tensor, LoopOutput outputKind, int32_t axis = 0) noexcept = 0;
854 virtual void setName(char const* name) noexcept = 0;
855 virtual char const* getName() const noexcept = 0;
856};
857
858class VConditionalBoundaryLayer : public VRoot
859{
860public:
861 virtual IIfConditional* getConditional() const noexcept = 0;
862};
863
864class VConditionLayer : public VRoot
865{
866public:
867};
868
869class VConditionalInputLayer : public VRoot
870{
871public:
872};
873
874class VConditionalOutputLayer : public VRoot
875{
876public:
877};
878
879class VIfConditional : public VRoot
880{
881public:
882 virtual IConditionLayer* setCondition(ITensor& tensor) noexcept = 0;
883 virtual IIfConditionalInputLayer* addInput(ITensor& tensor) noexcept = 0;
884 virtual IIfConditionalOutputLayer* addOutput(ITensor& trueTensor, ITensor& falseTensor) noexcept = 0;
885 virtual void setName(char const* name) noexcept = 0;
886 virtual char const* getName() const noexcept = 0;
887};
888
889class VAttentionBoundaryLayer : public VRoot
890{
891public:
892 virtual IAttention* getAttention() const noexcept = 0;
893};
894
895class VAttentionInputLayer : public VRoot
896{
897public:
898};
899
900class VAttentionOutputLayer : public VRoot
901{
902public:
903};
904
905class VAttention : public VRoot
906{
907public:
908 TRT_NODISCARD virtual bool setInput(int32_t index, ITensor& input) noexcept = 0;
909 TRT_NODISCARD virtual int32_t getNbInputs() const noexcept = 0;
910 TRT_NODISCARD virtual ITensor* getInput(int32_t index) const noexcept = 0;
911 TRT_NODISCARD virtual int32_t getNbOutputs() const noexcept = 0;
912 TRT_NODISCARD virtual ITensor* getOutput(int32_t index) const noexcept = 0;
913 TRT_NODISCARD virtual bool setName(char const* name) noexcept = 0;
914 TRT_NODISCARD virtual char const* getName() const noexcept = 0;
915 TRT_NODISCARD virtual bool setNormalizationOperation(AttentionNormalizationOp op) noexcept = 0;
916 TRT_NODISCARD virtual AttentionNormalizationOp getNormalizationOperation() const noexcept = 0;
917 TRT_DEPRECATED virtual bool setCausal(bool isCausal) noexcept = 0;
918 TRT_DEPRECATED virtual bool getCausal() const noexcept = 0;
919 TRT_NODISCARD virtual bool setMask(ITensor& mask) noexcept = 0;
920 TRT_NODISCARD virtual ITensor* getMask() const noexcept = 0;
921 TRT_NODISCARD virtual bool setDecomposable(bool decomposable) noexcept = 0;
922 TRT_NODISCARD virtual bool getDecomposable() const noexcept = 0;
923 TRT_NODISCARD virtual bool setNormalizationQuantizeScale(ITensor& tensor) noexcept = 0;
924 TRT_NODISCARD virtual ITensor* getNormalizationQuantizeScale() const noexcept = 0;
925 TRT_NODISCARD virtual bool setNormalizationQuantizeToType(DataType type) noexcept = 0;
926 TRT_NODISCARD virtual DataType getNormalizationQuantizeToType() const noexcept = 0;
927 TRT_NODISCARD virtual bool setMetadata(char const* docString) noexcept = 0;
928 TRT_NODISCARD virtual char const* getMetadata() const noexcept = 0;
929 TRT_NODISCARD virtual bool setNbRanks(int32_t nbRanks) noexcept = 0;
930 TRT_NODISCARD virtual int32_t getNbRanks() const noexcept = 0;
931 TRT_NODISCARD virtual bool setCausalKind(CausalMaskKind kind) noexcept = 0;
932 TRT_NODISCARD virtual CausalMaskKind getCausalKind() const noexcept = 0;
933 TRT_NODISCARD virtual bool setQueryForm(AttentionIOForm form) noexcept = 0;
934 TRT_NODISCARD virtual AttentionIOForm getQueryForm() const noexcept = 0;
935 TRT_NODISCARD virtual bool setKeyValueForm(AttentionIOForm form) noexcept = 0;
936 TRT_NODISCARD virtual AttentionIOForm getKeyValueForm() const noexcept = 0;
937 TRT_NODISCARD virtual bool setQueryLengths(ITensor* lengths) noexcept = 0;
938 TRT_NODISCARD virtual ITensor* getQueryLengths() const noexcept = 0;
939 TRT_NODISCARD virtual bool setKeyValueLengths(ITensor* lengths) noexcept = 0;
940 TRT_NODISCARD virtual ITensor* getKeyValueLengths() const noexcept = 0;
941}; // class VAttention
942
943class VSelectLayer : public VRoot
944{
945};
946
947class VAssertionLayer : public VRoot
948{
949public:
950 virtual void setMessage(char const* message) noexcept = 0;
951 virtual char const* getMessage() const noexcept = 0;
952};
953
954class VFillLayer : public VRoot
955{
956public:
957 virtual void setDimensions(Dims const& dimensions) noexcept = 0;
958 virtual Dims getDimensions() const noexcept = 0;
959 virtual void setOperation(FillOperation op) noexcept = 0;
960 virtual FillOperation getOperation() const noexcept = 0;
961 virtual void setAlpha(double alpha) noexcept = 0;
962 virtual double getAlpha() const noexcept = 0;
963 virtual void setBeta(double beta) noexcept = 0;
964 virtual double getBeta() const noexcept = 0;
965 virtual void setAlphaInt64(int64_t alpha) noexcept = 0;
966 virtual int64_t getAlphaInt64() const noexcept = 0;
967 virtual void setBetaInt64(int64_t beta) noexcept = 0;
968 virtual int64_t getBetaInt64() const noexcept = 0;
969 virtual bool isAlphaBetaInt64() const noexcept = 0;
970 virtual DataType getToType() const noexcept = 0;
971 virtual void setToType(DataType toType) noexcept = 0;
972};
973
974class VQuantizeLayer : public VRoot
975{
976public:
977 virtual int32_t getAxis() const noexcept = 0;
978 virtual void setAxis(int32_t axis) noexcept = 0;
979 virtual DataType getToType() const noexcept = 0;
980 virtual void setToType(DataType toType) noexcept = 0;
981 virtual Dims getBlockShape() const noexcept = 0;
982 virtual bool setBlockShape(Dims const& blockShape) noexcept = 0;
983};
984
985class VDequantizeLayer : public VRoot
986{
987public:
988 virtual int32_t getAxis() const noexcept = 0;
989 virtual void setAxis(int32_t axis) noexcept = 0;
990 virtual DataType getToType() const noexcept = 0;
991 virtual void setToType(DataType toType) noexcept = 0;
992 virtual Dims getBlockShape() const noexcept = 0;
993 virtual bool setBlockShape(Dims const& blockShape) noexcept = 0;
994};
995
996class VDynamicQuantizeLayer : public VRoot
997{
998public:
999 TRT_DEPRECATED virtual int32_t getAxis() const noexcept = 0;
1000 TRT_DEPRECATED virtual void setAxis(int32_t axis) noexcept = 0;
1001 TRT_DEPRECATED virtual int32_t getBlockSize() const noexcept = 0;
1002 TRT_DEPRECATED virtual void setBlockSize(int32_t axis) noexcept = 0;
1003 virtual DataType getScaleType() const noexcept = 0;
1004 virtual void setScaleType(DataType axis) noexcept = 0;
1005 virtual DataType getToType() const noexcept = 0;
1006 virtual void setToType(DataType toType) noexcept = 0;
1007 virtual Dims getBlockShape() const noexcept = 0;
1008 virtual void setBlockShape(Dims const& blockShape) noexcept = 0;
1009};
1010
1011class VScatterLayer : public VRoot
1012{
1013public:
1014 virtual void setMode(ScatterMode mode) noexcept = 0;
1015 virtual ScatterMode getMode() const noexcept = 0;
1016 virtual void setAxis(int32_t axis) noexcept = 0;
1017 virtual int32_t getAxis() const noexcept = 0;
1018}; // class VScatterLayer
1019
1020class VEinsumLayer : public VRoot
1021{
1022public:
1023 virtual bool setEquation(char const* equation) noexcept = 0;
1024 virtual char const* getEquation() const noexcept = 0;
1025};
1026
1027class VOneHotLayer : public VRoot
1028{
1029public:
1030 virtual int32_t getAxis() const noexcept = 0;
1031 virtual void setAxis(int32_t axis) noexcept = 0;
1032}; // class VOneHotLayer
1033
1034class VGridSampleLayer : public VRoot
1035{
1036public:
1037 virtual void setInterpolationMode(InterpolationMode mode) noexcept = 0;
1038 virtual InterpolationMode getInterpolationMode() const noexcept = 0;
1039 virtual void setAlignCorners(bool alignCorners) noexcept = 0;
1040 virtual bool getAlignCorners() const noexcept = 0;
1041 virtual bool setSampleMode(SampleMode mode) noexcept = 0;
1042 virtual SampleMode getSampleMode() const noexcept = 0;
1043}; // class VGridSampleLayer
1044
1045class VNMSLayer : public VRoot
1046{
1047public:
1048 virtual void setBoundingBoxFormat(BoundingBoxFormat fmt) noexcept = 0;
1049 virtual BoundingBoxFormat getBoundingBoxFormat() const noexcept = 0;
1050 virtual void setTopKBoxLimit(int32_t limit) noexcept = 0;
1051 virtual int32_t getTopKBoxLimit() const noexcept = 0;
1052 virtual bool setIndicesType(DataType type) noexcept = 0;
1053 virtual DataType getIndicesType() const noexcept = 0;
1054}; // class VNMSLayer
1055
1056class VReverseSequenceLayer : public VRoot
1057{
1058public:
1059 virtual void setBatchAxis(int32_t batchAxis) noexcept = 0;
1060 virtual int32_t getBatchAxis() const noexcept = 0;
1061
1062 virtual void setSequenceAxis(int32_t sequenceAxis) noexcept = 0;
1063 virtual int32_t getSequenceAxis() const noexcept = 0;
1064}; // class VReverseSequenceLayer
1065
1066class VNormalizationLayer : public VRoot
1067{
1068public:
1069 virtual void setEpsilon(float eps) noexcept = 0;
1070 virtual float getEpsilon() const noexcept = 0;
1071 virtual void setAxes(uint32_t axesMask) noexcept = 0;
1072 virtual uint32_t getAxes() const noexcept = 0;
1073 virtual void setNbGroups(int64_t nbGroups) noexcept = 0;
1074 virtual int64_t getNbGroups() const noexcept = 0;
1075 virtual bool isV2() const noexcept = 0;
1076}; // class VNormalizationLayer
1077
1078class VSqueezeLayer : public VRoot
1079{
1080};
1081
1082class VUnsqueezeLayer : public VRoot
1083{
1084};
1085
1086class VCumulativeLayer : public VRoot
1087{
1088public:
1089 virtual bool setOperation(CumulativeOperation op) noexcept = 0;
1090 virtual CumulativeOperation getOperation() const noexcept = 0;
1091 virtual void setExclusive(bool exclusive) noexcept = 0;
1092 virtual bool getExclusive() const noexcept = 0;
1093 virtual void setReverse(bool reverse) noexcept = 0;
1094 virtual bool getReverse() const noexcept = 0;
1095}; // class VCumulativeLayer
1096
1097class VRotaryEmbeddingLayer : public VRoot
1098{
1099public:
1100 virtual void setInterleaved(bool interleaved) noexcept = 0;
1101 virtual bool getInterleaved() const noexcept = 0;
1102 virtual bool setRotaryEmbeddingDim(int32_t rotaryEmbeddingDim) noexcept = 0;
1103 virtual int32_t getRotaryEmbeddingDim() const noexcept = 0;
1104 virtual void setInput(int32_t index, ITensor& input) noexcept = 0;
1105}; // class VRotaryEmbeddingLayer
1106
1107class VKVCacheUpdateLayer : public VRoot
1108{
1109public:
1110 TRT_NODISCARD virtual bool setCacheMode(KVCacheMode cacheMode) noexcept = 0;
1111 TRT_NODISCARD virtual KVCacheMode getCacheMode() const noexcept = 0;
1112 TRT_NODISCARD virtual bool setUpdateForm(AttentionIOForm form) noexcept = 0;
1113 TRT_NODISCARD virtual AttentionIOForm getUpdateForm() const noexcept = 0;
1114 TRT_NODISCARD virtual bool setUpdateLengths(ITensor* lengths) noexcept = 0;
1115 TRT_NODISCARD virtual ITensor* getUpdateLengths() const noexcept = 0;
1116}; // class VKVCacheUpdateLayer
1117
1118class VMoELayer : public VRoot
1119{
1120public:
1121 virtual void setGatedWeights(ITensor& fcGateWeights, ITensor& fcUpWeights, ITensor& fcDownWeights, MoEActType activationType) noexcept = 0;
1122 virtual void setGatedBiases(ITensor& fcGateBiases, ITensor& fcUpBiases, ITensor& fcDownBiases) noexcept = 0;
1123 virtual void setActivationType(MoEActType activationType) noexcept = 0;
1124 virtual MoEActType getActivationType() const noexcept = 0;
1125 virtual void setQuantizationStatic(ITensor& fcDownActivationScale, DataType dataType) noexcept = 0;
1126 virtual void setQuantizationDynamicDblQ(ITensor& fcDownActivationDblQScale, DataType dataType, Dims const& blockShape, DataType dynQOutputScaleType) noexcept = 0;
1127 virtual void setQuantizationToType(DataType type) noexcept = 0;
1128 virtual DataType getQuantizationToType() const noexcept = 0;
1129 virtual void setQuantizationBlockShape(Dims const& blockShape) noexcept = 0;
1130 virtual Dims getQuantizationBlockShape() const noexcept = 0;
1131 virtual void setDynQOutputScaleType(DataType type) noexcept = 0;
1132 virtual DataType getDynQOutputScaleType() const noexcept = 0;
1133 virtual void setSwigluParams(float limit, float alpha, float beta) noexcept = 0;
1134 virtual void setSwigluParamLimit(float limit) noexcept = 0;
1135 virtual float getSwigluParamLimit() const noexcept = 0;
1136 virtual void setSwigluParamAlpha(float alpha) noexcept = 0;
1137 virtual float getSwigluParamAlpha() const noexcept = 0;
1138 virtual void setSwigluParamBeta(float beta) noexcept = 0;
1139 virtual float getSwigluParamBeta() const noexcept = 0;
1140 virtual void setInput(int32_t index, ITensor& tensor) noexcept = 0;
1141}; // class VMoELayer
1142
1143
1144class VNetworkDefinition : public VRoot
1145{
1146public:
1147 virtual ITensor* addInput(char const* name, DataType type, Dims const& dimensions) noexcept = 0;
1148 virtual void markOutput(ITensor& tensor) noexcept = 0;
1149 virtual IActivationLayer* addActivation(ITensor& input, ActivationType type) noexcept = 0;
1150 virtual ILRNLayer* addLRN(ITensor& input, int64_t window, float alpha, float beta, float k) noexcept = 0;
1151 virtual IScaleLayer* addScale(
1152 ITensor& input, ScaleMode mode, Weights shift, Weights scale, Weights power) noexcept = 0;
1153 virtual ISoftMaxLayer* addSoftMax(ITensor& input) noexcept = 0;
1154 virtual IConcatenationLayer* addConcatenation(ITensor* const* inputs, int32_t nbInputs) noexcept = 0;
1155 virtual IElementWiseLayer* addElementWise(ITensor& input1, ITensor& input2, ElementWiseOperation op) noexcept = 0;
1156 virtual IUnaryLayer* addUnary(ITensor& input, UnaryOperation operation) noexcept = 0;
1157 virtual IShuffleLayer* addShuffle(ITensor& input) noexcept = 0;
1158 virtual int32_t getNbLayers() const noexcept = 0;
1159 virtual ILayer* getLayer(int32_t index) const noexcept = 0;
1160 virtual int32_t getNbInputs() const noexcept = 0;
1161 virtual ITensor* getInput(int32_t index) const noexcept = 0;
1162 virtual int32_t getNbOutputs() const noexcept = 0;
1163 virtual ITensor* getOutput(int32_t index) const noexcept = 0;
1164 virtual IReduceLayer* addReduce(
1165 ITensor& input, ReduceOperation operation, uint32_t reduceAxes, bool keepDimensions) noexcept
1166 = 0;
1167 virtual ITopKLayer* addTopK(ITensor& input, TopKOperation op, int32_t k, uint32_t reduceAxes) noexcept = 0;
1168 virtual IGatherLayer* addGather(ITensor& data, ITensor& indices, int32_t axis) noexcept = 0;
1169 virtual IRaggedSoftMaxLayer* addRaggedSoftMax(ITensor& input, ITensor& bounds) noexcept = 0;
1170 virtual IMatrixMultiplyLayer* addMatrixMultiply(
1171 ITensor& input0, MatrixOperation op0, ITensor& input1, MatrixOperation op1) noexcept = 0;
1172 virtual IConstantLayer* addConstant(Dims const& dimensions, Weights weights) noexcept = 0;
1173 virtual IIdentityLayer* addIdentity(ITensor& input) noexcept = 0;
1174 virtual void removeTensor(ITensor& tensor) noexcept = 0;
1175 virtual void unmarkOutput(ITensor& tensor) noexcept = 0;
1176 virtual IPluginV2Layer* addPluginV2(ITensor* const* inputs, int32_t nbInputs, IPluginV2& plugin) noexcept = 0;
1177 virtual IPluginV3Layer* addPluginV3(ITensor* const* inputs, int32_t nbInputs, ITensor* const* shapeInputs,
1178 int32_t nbShapeInputs, IPluginV3& plugin) noexcept = 0;
1179 virtual ISliceLayer* addSlice(ITensor& input, Dims const& start, Dims const& size, Dims const& stride) noexcept = 0;
1180 virtual void setName(char const* name) noexcept = 0;
1181 virtual char const* getName() const noexcept = 0;
1182 virtual IShapeLayer* addShape(ITensor& input) noexcept = 0;
1183 virtual bool hasImplicitBatchDimension() const noexcept = 0;
1184 virtual bool markOutputForShapes(ITensor& tensor) noexcept = 0;
1185 virtual bool unmarkOutputForShapes(ITensor& tensor) noexcept = 0;
1186 virtual IParametricReLULayer* addParametricReLU(ITensor& input, ITensor& slope) noexcept = 0;
1187 virtual IConvolutionLayer* addConvolutionNd(
1188 ITensor& input, int64_t nbOutputMaps, Dims const& kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
1189 = 0;
1190 virtual IPoolingLayer* addPoolingNd(ITensor& input, PoolingType type, Dims const& windowSize) noexcept = 0;
1191 virtual IDeconvolutionLayer* addDeconvolutionNd(
1192 ITensor& input, int64_t nbOutputMaps, Dims const& kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
1193 = 0;
1194 virtual IScaleLayer* addScaleNd(
1195 ITensor& input, ScaleMode mode, Weights shift, Weights scale, Weights power, int32_t channelAxis) noexcept = 0;
1196 virtual IResizeLayer* addResize(ITensor& input) noexcept = 0;
1197 virtual ILoop* addLoop() noexcept = 0;
1198 virtual ISelectLayer* addSelect(ITensor& condition, ITensor& thenInput, ITensor& elseInput) noexcept = 0;
1199 virtual IPaddingLayer* addPaddingNd(ITensor& input, Dims const& prePadding, Dims const& postPadding) noexcept = 0;
1200 virtual bool setWeightsName(Weights weights, char const* name) noexcept = 0;
1201 virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
1202 virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
1203 virtual IGatherLayer* addGatherV2(ITensor& data, ITensor& indices, GatherMode mode) noexcept = 0;
1204 virtual IIfConditional* addIfConditional() noexcept = 0;
1205 virtual IScatterLayer* addScatter(ITensor& data, ITensor& indices, ITensor& updates, ScatterMode mode) noexcept = 0;
1206 virtual IEinsumLayer* addEinsum(ITensor* const* inputs, int32_t nbInputs, char const* equation) noexcept = 0;
1207 virtual IAssertionLayer* addAssertion(ITensor& condition, char const* message) noexcept = 0;
1208 virtual IOneHotLayer* addOneHot(ITensor& indices, ITensor& values, ITensor& depth, int32_t axis) noexcept = 0;
1209 virtual INonZeroLayer* addNonZero(ITensor& input) noexcept = 0;
1210 virtual IGridSampleLayer* addGridSample(ITensor& input, ITensor& grid) noexcept = 0;
1211 virtual INMSLayer* addNMS(ITensor& boxes, ITensor& scores, ITensor& maxOutputBoxesPerClass) noexcept = 0;
1212 virtual IReverseSequenceLayer* addReverseSequence(ITensor& input, ITensor& sequenceLens) noexcept = 0;
1213 virtual INormalizationLayer* addNormalization(
1214 ITensor& input, ITensor& scale, ITensor& bias, uint32_t axesMask) noexcept = 0;
1215 virtual ICastLayer* addCast(ITensor& input, DataType toType) noexcept = 0;
1216 virtual IBuilder& getBuilder() const noexcept = 0;
1217 virtual NetworkDefinitionCreationFlags getFlags() const noexcept = 0;
1218 virtual bool getFlag(NetworkDefinitionCreationFlag networkDefinitionCreationFlag) const noexcept = 0;
1219 virtual IQuantizeLayer* addQuantizeV2(ITensor& input, ITensor& scale, DataType outputType) noexcept = 0;
1220 virtual IDequantizeLayer* addDequantizeV2(ITensor& input, ITensor& scale, DataType outputType) noexcept = 0;
1221 virtual IFillLayer* addFillV2(Dims const& dimensions, FillOperation op, DataType outputType) noexcept = 0;
1222 virtual bool markDebug(ITensor& tensor) noexcept = 0;
1223 virtual bool unmarkDebug(ITensor& tensor) noexcept = 0;
1224 virtual bool isDebugTensor(ITensor const& tensor) const noexcept = 0;
1225 virtual bool markWeightsRefittable(char const* name) noexcept = 0;
1226 virtual bool unmarkWeightsRefittable(char const* name) noexcept = 0;
1227 virtual bool areWeightsMarkedRefittable(char const* name) const noexcept = 0;
1228 virtual ISqueezeLayer* addSqueeze(ITensor& input, ITensor& axes) noexcept = 0;
1229 virtual IUnsqueezeLayer* addUnsqueeze(ITensor& input, ITensor& axes) noexcept = 0;
1230 virtual IDynamicQuantizeLayer* addDynamicQuantize(
1231 ITensor& input, int32_t axis, int32_t blockSize, DataType toType, DataType scaleType) noexcept = 0;
1232 virtual ICumulativeLayer* addCumulative(
1233 ITensor& input, ITensor& axis, CumulativeOperation operation, bool exclusive, bool reverse) noexcept = 0;
1234 virtual bool markUnfusedTensorsAsDebugTensors() noexcept = 0;
1235 virtual bool unmarkUnfusedTensorsAsDebugTensors() noexcept = 0;
1236 virtual ITopKLayer* addTopKV2(
1237 ITensor& input, TopKOperation op, int32_t k, uint32_t reduceAxes, DataType indicesType) noexcept = 0;
1238 virtual INonZeroLayer* addNonZeroV2(ITensor& input, DataType indicesType) noexcept = 0;
1239 virtual INMSLayer* addNMSV2(
1240 ITensor& boxes, ITensor& scores, ITensor& maxOutputBoxesPerClass, DataType indicesType) noexcept = 0;
1241 TRT_DEPRECATED virtual IAttention* addAttention(
1242 ITensor& query, ITensor& key, ITensor& value, AttentionNormalizationOp normOp, bool isCausal) noexcept = 0;
1243 virtual IRotaryEmbeddingLayer* addRotaryEmbedding(ITensor& input, ITensor& cosCache, ITensor& sinCache,
1244 bool interleaved, int32_t rotaryEmbeddingDim) noexcept = 0;
1245 virtual IDynamicQuantizeLayer* addDynamicQuantizeV2(
1246 ITensor& input, Dims const& blockShape, DataType toType, DataType scaleType) noexcept = 0;
1247 virtual IKVCacheUpdateLayer* addKVCacheUpdate(
1248 ITensor& cache, ITensor& update, ITensor& writeIndices, KVCacheMode cacheMode) noexcept = 0;
1249 virtual INormalizationLayer* addNormalizationV2(
1250 ITensor& input, ITensor& scale, ITensor& bias, uint32_t axesMask) noexcept = 0;
1251 virtual IMoELayer* addMoE(
1252 ITensor& hiddenStates, ITensor& selectedExpertsForTokens, ITensor& scoresForSelectedExperts) noexcept = 0;
1253 virtual IDistCollectiveLayer* addDistCollective(ITensor& input, CollectiveOperation distCollectiveOp,
1254 ReduceOperation reduceOp, int64_t root, int64_t* groups, int64_t groupSize) noexcept = 0;
1255 virtual IAttention* addAttentionV2(ITensor& query, ITensor& key, ITensor& value, AttentionNormalizationOp normOp,
1256 CausalMaskKind causalKind) noexcept = 0;
1257};
1258
1259class VTimingCache : public VRoot
1260{
1261public:
1262 virtual nvinfer1::IHostMemory* serialize() const noexcept = 0;
1263 virtual bool combine(ITimingCache const& inputCache, bool ignoreMismatch) noexcept = 0;
1264 virtual bool reset() noexcept = 0;
1265 virtual int64_t queryKeys(TimingCacheKey* keyBuffer, int64_t capacity) const noexcept = 0;
1266 virtual TimingCacheValue query(TimingCacheKey const& key) const noexcept = 0;
1267 virtual bool update(TimingCacheKey const& key, TimingCacheValue const& value) noexcept = 0;
1268};
1269
1270class VBuilderConfig : public VRoot
1271{
1272public:
1273 virtual void setAvgTimingIterations(int32_t avgTiming) noexcept = 0;
1274 virtual int32_t getAvgTimingIterations() const noexcept = 0;
1275 virtual void setEngineCapability(EngineCapability capability) noexcept = 0;
1276 virtual EngineCapability getEngineCapability() const noexcept = 0;
1277 virtual void setFlags(BuilderFlags builderFlags) noexcept = 0;
1278 virtual BuilderFlags getFlags() const noexcept = 0;
1279 virtual void clearFlag(BuilderFlag builderFlag) noexcept = 0;
1280 virtual void setFlag(BuilderFlag builderFlag) noexcept = 0;
1281 virtual bool getFlag(BuilderFlag builderFlag) const noexcept = 0;
1282 virtual void setDeviceType(ILayer const* layer, DeviceType deviceType) noexcept = 0;
1283 virtual DeviceType getDeviceType(ILayer const* layer) const noexcept = 0;
1284 virtual bool isDeviceTypeSet(ILayer const* layer) const noexcept = 0;
1285 virtual void resetDeviceType(ILayer const* layer) noexcept = 0;
1286 virtual bool canRunOnDLA(ILayer const* layer) const noexcept = 0;
1287 virtual void setDLACore(int32_t dlaCore) noexcept = 0;
1288 virtual int32_t getDLACore() const noexcept = 0;
1289 virtual void setDefaultDeviceType(DeviceType deviceType) noexcept = 0;
1290 virtual DeviceType getDefaultDeviceType() const noexcept = 0;
1291 virtual void reset() noexcept = 0;
1292 virtual void setProfileStream(const cudaStream_t stream) noexcept = 0;
1293 virtual cudaStream_t getProfileStream() const noexcept = 0;
1294 virtual int32_t addOptimizationProfile(IOptimizationProfile const* profile) noexcept = 0;
1295 virtual int32_t getNbOptimizationProfiles() const noexcept = 0;
1296 virtual void setProfilingVerbosity(ProfilingVerbosity verbosity) noexcept = 0;
1297 virtual ProfilingVerbosity getProfilingVerbosity() const noexcept = 0;
1298 virtual bool setTacticSources(TacticSources tacticSources) noexcept = 0;
1299 virtual TacticSources getTacticSources() const noexcept = 0;
1300 virtual nvinfer1::ITimingCache* createTimingCache(void const* blob, std::size_t size) const noexcept = 0;
1301 virtual bool setTimingCache(ITimingCache const& cache, bool ignoreMismatch) noexcept = 0;
1302 virtual nvinfer1::ITimingCache const* getTimingCache() const noexcept = 0;
1303 virtual void setMemoryPoolLimit(MemoryPoolType pool, std::size_t poolSize) noexcept = 0;
1304 virtual std::size_t getMemoryPoolLimit(MemoryPoolType pool) const noexcept = 0;
1305 virtual void setPreviewFeature(PreviewFeature feature, bool enable) noexcept = 0;
1306 virtual bool getPreviewFeature(PreviewFeature feature) const noexcept = 0;
1307 virtual void setBuilderOptimizationLevel(int32_t level) noexcept = 0;
1308 virtual int32_t getBuilderOptimizationLevel() const noexcept = 0;
1309 virtual void setHardwareCompatibilityLevel(HardwareCompatibilityLevel hardwareCompatibilityLevel) noexcept = 0;
1310 virtual HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept = 0;
1311 virtual void setPluginsToSerialize(char const* const* paths, int32_t nbPaths) noexcept = 0;
1312 virtual char const* getPluginToSerialize(int32_t index) const noexcept = 0;
1313 virtual int32_t getNbPluginsToSerialize() const noexcept = 0;
1314 virtual bool setMaxAuxStreams(int32_t nbStreams) noexcept = 0;
1315 virtual int32_t getMaxAuxStreams() const noexcept = 0;
1316 virtual void setProgressMonitor(IProgressMonitor* monitor) noexcept = 0;
1317 virtual IProgressMonitor* getProgressMonitor() const noexcept = 0;
1318 virtual void setRuntimePlatform(RuntimePlatform runtimePlatform) noexcept = 0;
1319 virtual RuntimePlatform getRuntimePlatform() const noexcept = 0;
1320 virtual void setMaxNbTactics(int32_t maxTactics) noexcept = 0;
1321 virtual int32_t getMaxNbTactics() const noexcept = 0;
1322 virtual bool setTilingOptimizationLevel(TilingOptimizationLevel level) noexcept = 0;
1323 virtual TilingOptimizationLevel getTilingOptimizationLevel() const noexcept = 0;
1324 virtual bool setL2LimitForTiling(int64_t size) noexcept = 0;
1325 virtual int64_t getL2LimitForTiling() const noexcept = 0;
1326 virtual bool setRemoteAutoTuningConfig(char const* config) noexcept = 0;
1327 virtual char const* getRemoteAutoTuningConfig() const noexcept = 0;
1328};
1329
1330class VSerializationConfig : public VRoot
1331{
1332public:
1333 virtual bool setFlags(SerializationFlags serializationFlags) noexcept = 0;
1334 virtual SerializationFlags getFlags() const noexcept = 0;
1335 virtual bool clearFlag(SerializationFlag serializationFlag) noexcept = 0;
1336 virtual bool setFlag(SerializationFlag serializationFlag) noexcept = 0;
1337 virtual bool getFlag(SerializationFlag serializationFlag) const noexcept = 0;
1338};
1339
1340class VBuilder : public VRoot
1341{
1342public:
1343 virtual int32_t getMaxDLABatchSize() const noexcept = 0;
1344 virtual int32_t getNbDLACores() const noexcept = 0;
1345 virtual void setGpuAllocator(IGpuAllocator* allocator) noexcept = 0;
1346 virtual nvinfer1::IBuilderConfig* createBuilderConfig() noexcept = 0;
1347 virtual nvinfer1::INetworkDefinition* createNetworkV2(NetworkDefinitionCreationFlags flags) noexcept = 0;
1348 virtual nvinfer1::IOptimizationProfile* createOptimizationProfile() noexcept = 0;
1349 virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
1350 virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
1351 virtual void reset() noexcept = 0;
1352 virtual nvinfer1::IHostMemory* buildSerializedNetwork(
1353 INetworkDefinition& network, IBuilderConfig& config) noexcept = 0;
1354 virtual bool isNetworkSupported(INetworkDefinition const& network, IBuilderConfig const& config) const noexcept = 0;
1355 virtual ILogger* getLogger() const noexcept = 0;
1356 virtual bool setMaxThreads(int32_t maxThreads) noexcept = 0;
1357 virtual int32_t getMaxThreads() const noexcept = 0;
1358 virtual IPluginRegistry& getPluginRegistry() noexcept = 0;
1359 virtual ICudaEngine* buildEngineWithConfig(INetworkDefinition& network, IBuilderConfig& config) noexcept = 0;
1360 virtual bool buildSerializedNetworkToStream(
1361 INetworkDefinition& network, IBuilderConfig& config, IStreamWriter& writer) noexcept = 0;
1362 virtual nvinfer1::IHostMemory* buildSerializedNetworkWithKernelText(
1363 INetworkDefinition& network, IBuilderConfig& config, IHostMemory*& kernelText) noexcept
1364 = 0;
1365};
1366
1367class VRuntimeConfig : public VRoot
1368{
1369public:
1370 virtual IRuntimeConfig* getPImpl() noexcept = 0;
1371 virtual void setExecutionContextAllocationStrategy(ExecutionContextAllocationStrategy strategy) noexcept = 0;
1372 virtual ExecutionContextAllocationStrategy getExecutionContextAllocationStrategy() const noexcept = 0;
1373};
1374
1375
1376class VDistCollectiveLayer : public VRoot
1377{
1378}; // VDistCollectiveLayer
1379
1380} // namespace apiv
1381} // namespace nvinfer1
1382
1383// @endcond
1384
1385#endif // NV_INFER_RUNTIME_IMPL_H
TENSORRTAPI nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
TENSORRTAPI nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
#define TRT_NODISCARD
A stand-in for [[nodiscard]] and [[nodiscard(REASON)]] that works with older compilers.
Definition: NvInferRuntimeBase.h:57
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:42
Structure to define the dimensions of a tensor.
User-implemented callback for notification when value of a debug tensor is updated.
Reference counted application-implemented error reporting interface for TensorRT objects.
Application-implemented class for controlling allocation on the GPU.
Callback from ExecutionContext::enqueueV3()
Plugin class for the V3 generation of user-implemented layers.
Application-implemented interface for profiling.
Application-implemented progress reporting interface for TensorRT.
Application-implemented class for reading data in a stream-based manner.
Application-implemented class for reading data in a stream-based manner asynchronously....
Application-implemented class for writing data in a stream-based manner.
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:139
The TensorRT API version 1 namespace.
Definition: NvInferSafePlugin.h:33
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:2780
v_1_0::IOutputAllocator IOutputAllocator
Definition: NvInferRuntime.h:3903
ResizeSelector
The coordinate selector when resize to single pixel output.
Definition: NvInfer.h:3980
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:76
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:178
MemoryPoolType
The type for memory pools used by TensorRT.
Definition: NvInfer.h:10289
AttentionIOForm
Enumerates the layout of the input/output tensors in an Attention layer.
Definition: NvInfer.h:6791
ScaleMode
Controls how shift, scale and power are applied in a Scale layer.
Definition: NvInfer.h:1650
RuntimePlatform
Describes the intended runtime platform (operating system and CPU architecture) for the execution of ...
Definition: NvInfer.h:9920
TensorIOMode
Definition of tensor IO Mode.
Definition: NvInferRuntimeBase.h:662
HardwareCompatibilityLevel
Describes requirements of compatibility with GPU architectures other than that of the GPU on which th...
Definition: NvInfer.h:10397
CumulativeOperation
Enumerates the cumulative operations that may be performed by a Cumulative layer.
Definition: NvInfer.h:6599
SerializationFlag
List of valid flags that the engine can enable when serializing the bytes.
Definition: NvInferRuntime.h:2821
BoundingBoxFormat
Representation of bounding box data used for the Boxes input tensor in INMSLayer.
Definition: NvInfer.h:6166
UnaryOperation
Enumerates the unary operations that may be performed by a Unary layer.
Definition: NvInfer.h:2608
v_1_0::IStreamWriter IStreamWriter
Definition: NvInferRuntime.h:699
v_1_0::IProfiler IProfiler
Definition: NvInferRuntime.h:1309
v_1_0::IStreamReaderV2 IStreamReaderV2
Definition: NvInferRuntime.h:776
ActivationType
Enumerates the types of activation to perform in an activation layer.
Definition: NvInfer.h:143
uint32_t TempfileControlFlags
Represents a collection of one or more TempfileControlFlag values combined using bitwise-OR operation...
Definition: NvInferRuntime.h:1387
FillOperation
Enumerates the tensor fill operations that may performed by a fill layer.
Definition: NvInfer.h:4983
EngineStat
The kind of engine statistics that queried from the ICudaEngine.
Definition: NvInferRuntime.h:2990
ResizeRoundMode
The rounding mode for nearest neighbor resize.
Definition: NvInfer.h:4007
v_1_0::ILogger ILogger
Definition: NvInferRuntimeBase.h:125
CausalMaskKind
Enumerates the causal mask alignment orientation for the attention.
Definition: NvInfer.h:6763
PaddingMode
Enumerates the modes of padding to perform in convolution, deconvolution and pooling layer,...
Definition: NvInfer.h:826
TripLimit
Enum that describes kinds of trip limits.
Definition: NvInfer.h:4365
uint32_t NetworkDefinitionCreationFlags
Represents one or more NetworkDefinitionCreationFlag flags using binary OR operations....
Definition: NvInfer.h:11380
PreviewFeature
Define preview features.
Definition: NvInfer.h:10363
TilingOptimizationLevel
Define the optimization levels for Tiling.
Definition: NvInfer.h:10447
DataType
The type of weights and tensors. The datatypes other than kBOOL, kINT32, and kINT64 are "activation d...
Definition: NvInferRuntimeBase.h:149
uint32_t BuilderFlags
Represents one or more BuilderFlag values using binary OR operations, e.g., 1U << BuilderFlag::kDEBUG...
Definition: NvInfer.h:9949
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:1341
LayerType
The type values of layer classes.
Definition: NvInfer.h:58
SampleMode
Controls how ISliceLayer and IGridSample handle out-of-bounds coordinates.
Definition: NvInfer.h:3110
GatherMode
Control form of IGatherLayer.
Definition: NvInfer.h:2349
v_1_0::IDebugListener IDebugListener
Definition: NvInferRuntime.h:3944
MoEActType
Enumerates the activation type for the MoE layer.
Definition: NvInfer.h:7680
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:1319
uint32_t TensorFormats
It is capable of representing one or more TensorFormat by binary OR operations, e....
Definition: NvInfer.h:135
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:2792
NetworkDefinitionCreationFlag
List of immutable network properties expressed at network creation time. NetworkDefinitionCreationFla...
Definition: NvInfer.h:11391
ElementWiseOperation
Enumerates the binary operations that may be performed by an ElementWise layer.
Definition: NvInfer.h:2260
CollectiveOperation
Enumerates the collective operations that may be performed by a DistCollective layer.
Definition: NvInfer.h:2738
uint32_t SerializationFlags
Represents one or more SerializationFlag values using binary OR operations, e.g., 1U << Serialization...
Definition: NvInferRuntime.h:2811
InterpolationMode
Enumerates various modes of interpolation.
Definition: NvInfer.h:3904
BuilderFlag
List of valid modes that the builder can enable when creating an engine from a network definition.
Definition: NvInfer.h:9959
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntime.h:1421
ExecutionContextAllocationStrategy
Different memory allocation behaviors for IExecutionContext.
Definition: NvInferRuntime.h:2928
TopKOperation
Enumerates the operations that may be performed by a TopK layer.
Definition: NvInfer.h:3395
ReduceOperation
Enumerates the reduce operations that may be performed by a Reduce layer.
Definition: NvInfer.h:2710
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:4878
ScatterMode
Control form of IScatterLayer.
Definition: NvInfer.h:5893
MatrixOperation
Enumerates the operations that may be performed on a tensor by IMatrixMultiplyLayer before multiplica...
Definition: NvInfer.h:3550
ResizeCoordinateTransformation
The resize coordinate transformation function.
Definition: NvInfer.h:3929
LoopOutput
Enum that describes kinds of loop outputs.
Definition: NvInfer.h:4337
KVCacheMode
Enumerates the KVCache modes that may be performed by a KVCacheUpdate layer.
Definition: NvInfer.h:7527
v_1_0::IStreamReader IStreamReader
Definition: NvInferRuntime.h:689
PoolingType
The type of pooling to perform in a pooling layer.
Definition: NvInfer.h:1263
v_1_0::IProgressMonitor IProgressMonitor
Definition: NvInfer.h:10560
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:203
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:2539
AttentionNormalizationOp
Enumerates the operations that may be performed by the normalization in the attention subgraph.
Definition: NvInfer.h:6731
nvinfer1::IPluginV3 IPluginV3
Definition: NvInferForwardDecl.h:47

  Copyright © 2024 NVIDIA Corporation
  Privacy Policy | Manage My Privacy | Do Not Sell or Share My Data | Terms of Service | Accessibility | Corporate Policies | Product Security | Contact