TensorRT 10.16.0
NvInferImpl.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef NV_INFER_IMPL_H
19#define NV_INFER_IMPL_H
20
21#include "NvInferLegacyDims.h"
23
24// @cond SuppressDoxyWarnings
25
26namespace nvinfer1
27{
28
29class ILogger;
30
31namespace v_1_0
32{
34} // namespace v_1_0
36
37namespace v_1_0
38{
40} // namespace v_1_0
42
43namespace v_1_0
44{
45class IProfiler;
46} // namespace v_1_0
48
49namespace v_1_0
50{
52} // namespace v_1_0
54
55namespace v_1_0
56{
57class IDebugListener;
58} // namespace v_1_0
60
61class IActivationLayer;
62class IAlgorithm;
63class IAlgorithmContext;
64class IAlgorithmIOInfo;
65class IAlgorithmVariant;
66class IAssertionLayer;
67class IAttention;
68class IBuilder;
69class IBuilderConfig;
70class IConcatenationLayer;
71class IConditionLayer;
72class IConstantLayer;
73class IConvolutionLayer;
74class ICudaEngine;
75class ICumulativeLayer;
76class IDeconvolutionLayer;
77class IDequantizeLayer;
78class IDimensionExpr;
79class IDynamicQuantizeLayer;
80class IEinsumLayer;
81class IElementWiseLayer;
82class IEngineInspector;
83class IExecutionContext;
84class IFillLayer;
85class IGatherLayer;
86class IGridSampleLayer;
87class IHostMemory;
88class IIdentityLayer;
89class ICastLayer;
90class IIfConditional;
91class IIfConditionalInputLayer;
92class IIfConditionalOutputLayer;
93class IInt8Calibrator;
94class IIteratorLayer;
95class IKVCacheUpdateLayer;
96class ILayer;
97class ILoop;
98class ILoopOutputLayer;
99class ILRNLayer;
100class IMatrixMultiplyLayer;
101class IMoELayer;
102class IDistCollectiveLayer;
103class INetworkDefinition;
104class INormalizationLayer;
105class INMSLayer;
106class INonZeroLayer;
107class IOneHotLayer;
108class IOptimizationProfile;
109class IPaddingLayer;
110class IParametricReLULayer;
111class IPlugin;
112class IPluginExt;
113class IPluginFactory;
114class IPluginLayer;
115class IPluginRegistry;
116class IPluginV2Layer;
117class IRotaryEmbeddingLayer;
118class IRuntimeConfig;
119
120namespace v_1_0
121{
122class IPluginV3;
123} // namespace v_1_0
125
126namespace v_1_0
127{
128class IStreamReader;
129class IStreamWriter;
130} // namespace v_1_0
133namespace v_1_0
134{
135class IStreamReaderV2;
136} // namespace v_1_0
138
139class IPluginV3Layer;
140class IPoolingLayer;
141class IQuantizeLayer;
142class IRaggedSoftMaxLayer;
143class IRecurrenceLayer;
144class IReduceLayer;
145class IRefitter;
146class IResizeLayer;
147class IReverseSequenceLayer;
148class IRuntime;
149class IScaleLayer;
150class IScatterLayer;
151class ISelectLayer;
152class ISerializationConfig;
153class IShapeLayer;
154class IShuffleLayer;
155class ISliceLayer;
156class ISoftMaxLayer;
157class ISqueezeLayer;
158class ITensor;
159
160namespace v_1_0
161{
162struct TimingCacheKey;
163struct TimingCacheValue;
164} // namespace v_1_0
165using TimingCacheKey = v_1_0::TimingCacheKey;
166using TimingCacheValue = v_1_0::TimingCacheValue;
167
168class ITimingCache;
169class ITopKLayer;
170class ITripLimitLayer;
171class IUnaryLayer;
172class IUnsqueezeLayer;
173struct Permutation;
174class Weights;
175
176enum class ActivationType : int32_t;
177enum class AttentionNormalizationOp : int32_t;
178enum class BoundingBoxFormat : int32_t;
179enum class BuilderFlag : int32_t;
180enum class CalibrationAlgoType : int32_t;
181enum class CumulativeOperation : int32_t;
182enum class DeviceType : int32_t;
183enum class DimensionOperation : int32_t;
184enum class ElementWiseOperation : int32_t;
185enum class EngineCapability : int32_t;
186enum class FillOperation : int32_t;
187enum class GatherMode : int32_t;
188enum class KVCacheMode : int32_t;
189enum class LayerInformationFormat : int32_t;
190enum class LayerType : int32_t;
191enum class LoopOutput : int32_t;
192enum class MatrixOperation : int32_t;
193enum class MemoryPoolType : int32_t;
194enum class MoEActType : int32_t;
195enum class NetworkDefinitionCreationFlag : int32_t;
196enum class OptProfileSelector : int32_t;
197enum class PaddingMode : int32_t;
198enum class PoolingType : int32_t;
199enum class ProfilingVerbosity : int32_t;
200enum class QuantizationFlag : int32_t;
201enum class ReduceOperation : int32_t;
202enum class CollectiveOperation : int32_t;
203enum class ResizeCoordinateTransformation : int32_t;
204enum class InterpolationMode : int32_t;
205enum class ResizeRoundMode : int32_t;
206enum class ResizeSelector : int32_t;
207enum class ScaleMode : int32_t;
208enum class ScatterMode : int32_t;
209enum class SampleMode : int32_t;
210enum class SerializationFlag : int32_t;
211enum class TensorIOMode : int32_t;
212enum class TensorLocation : int32_t;
213enum class TopKOperation : int32_t;
214enum class TripLimit : int32_t;
215enum class UnaryOperation : int32_t;
216enum class WeightsRole : int32_t;
217enum class PreviewFeature : int32_t;
218enum class HardwareCompatibilityLevel : int32_t;
219enum class ExecutionContextAllocationStrategy : int32_t;
220enum class RuntimePlatform : int32_t;
221enum class TilingOptimizationLevel : int32_t;
222enum class EngineStat : int32_t;
223
224
225using TacticSources = uint32_t;
226using TensorFormats = uint32_t;
227using BuilderFlags = uint32_t;
228using NetworkDefinitionCreationFlags = uint32_t;
229using QuantizationFlags = uint32_t;
230using TempfileControlFlags = uint32_t;
231using SerializationFlags = uint32_t;
232
240
241namespace apiv
242{
243
244class VRoot
245{
246public:
247 virtual ~VRoot() noexcept = default;
248};
249
250class VHostMemory : public VRoot
251{
252public:
253 virtual void* data() const noexcept = 0;
254 virtual std::size_t size() const noexcept = 0;
255 virtual DataType type() const noexcept = 0;
256};
257
258class VDimensionExpr : public VRoot
259{
260public:
261 virtual bool isConstant() const = 0;
262 virtual int64_t getConstantValue() const = 0;
263 virtual bool isSizeTensor() const = 0;
264};
265
266class VExprBuilder : public VRoot
267{
268public:
269 virtual IDimensionExpr const* constant(int64_t value) = 0;
270 virtual IDimensionExpr const* operation(
271 DimensionOperation op, IDimensionExpr const& first, IDimensionExpr const& second)
272 = 0;
273 virtual IDimensionExpr const* declareSizeTensor(
274 int32_t outputIndex, IDimensionExpr const& opt, IDimensionExpr const& upper)
275 = 0;
276};
277
278class VRuntime : public VRoot
279{
280public:
281 virtual IRuntime* getPImpl() noexcept = 0;
282 virtual nvinfer1::ICudaEngine* deserializeCudaEngine(void const* blob, std::size_t size) noexcept = 0;
283 virtual nvinfer1::ICudaEngine* deserializeCudaEngine(IStreamReader& streamReader) noexcept = 0;
284 virtual void setDLACore(int32_t dlaCore) noexcept = 0;
285 virtual int32_t getDLACore() const noexcept = 0;
286 virtual int32_t getNbDLACores() const noexcept = 0;
287 virtual void setGpuAllocator(IGpuAllocator* allocator) noexcept = 0;
288 virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
289 virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
290 virtual ILogger* getLogger() const noexcept = 0;
291 virtual bool setMaxThreads(int32_t maxThreads) noexcept = 0;
292 virtual int32_t getMaxThreads() const noexcept = 0;
293 virtual void setTemporaryDirectory(char const*) noexcept = 0;
294 virtual char const* getTemporaryDirectory() const noexcept = 0;
295 virtual void setTempfileControlFlags(TempfileControlFlags) noexcept = 0;
296 virtual TempfileControlFlags getTempfileControlFlags() const noexcept = 0;
297 virtual IPluginRegistry& getPluginRegistry() noexcept = 0;
298 virtual void setPluginRegistryParent(IPluginRegistry* parent) noexcept = 0;
299 virtual IRuntime* loadRuntime(char const* path) noexcept = 0;
300 virtual void setEngineHostCodeAllowed(bool allowed) noexcept = 0;
301 virtual bool getEngineHostCodeAllowed() const noexcept = 0;
302 // Added in TensorRT version 10.7
303 virtual nvinfer1::ICudaEngine* deserializeCudaEngineV2(IStreamReaderV2& streamReader) noexcept = 0;
304};
305
306class VRefitter : public VRoot
307{
308public:
309 virtual IRefitter* getPImpl() noexcept = 0;
310 virtual bool setWeights(char const* layerName, WeightsRole role, const Weights weights) noexcept = 0;
311 virtual bool refitCudaEngine() noexcept = 0;
312 virtual int32_t getMissing(int32_t size, char const** layerNames, WeightsRole* roles) noexcept = 0;
313 virtual int32_t getAll(int32_t size, char const** layerNames, WeightsRole* roles) noexcept = 0;
314 virtual bool setDynamicRange(char const* tensorName, float min, float max) noexcept = 0;
315 virtual float getDynamicRangeMin(char const* tensorName) const noexcept = 0;
316 virtual float getDynamicRangeMax(char const* tensorName) const noexcept = 0;
317 virtual int32_t getTensorsWithDynamicRange(int32_t size, char const** tensorNames) const noexcept = 0;
318 virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
319 virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
320 virtual bool setNamedWeights(char const* name, Weights weights) noexcept = 0;
321 virtual int32_t getMissingWeights(int32_t size, char const** weightsNames) noexcept = 0;
322 virtual int32_t getAllWeights(int32_t size, char const** weightsNames) noexcept = 0;
323 virtual ILogger* getLogger() const noexcept = 0;
324 virtual bool setMaxThreads(int32_t maxThreads) noexcept = 0;
325 virtual int32_t getMaxThreads() const noexcept = 0;
326 virtual bool setNamedWeightsWithLocation(char const* name, Weights weights, TensorLocation location) noexcept = 0;
327 virtual Weights getNamedWeights(char const* weightsName) const noexcept = 0;
328 virtual TensorLocation getWeightsLocation(char const* weightsName) const noexcept = 0;
329 virtual bool unsetNamedWeights(char const* weightsName) noexcept = 0;
330 virtual void setWeightsValidation(bool weightsValidation) noexcept = 0;
331 virtual bool getWeightsValidation() const noexcept = 0;
332 virtual bool refitCudaEngineAsync(cudaStream_t stream) noexcept = 0;
333 virtual Weights getWeightsPrototype(char const* weightsName) const noexcept = 0;
334};
335
336class VOptimizationProfile : public VRoot
337{
338public:
339 virtual bool setDimensions(char const* inputName, OptProfileSelector select, Dims const& dims) noexcept = 0;
340 virtual Dims getDimensions(char const* inputName, OptProfileSelector select) const noexcept = 0;
341 virtual bool setShapeValues(
342 char const* inputName, OptProfileSelector select, int32_t const* values, int32_t nbValues) noexcept = 0;
343 virtual int32_t getNbShapeValues(char const* inputName) const noexcept = 0;
344 virtual int32_t const* getShapeValues(char const* inputName, OptProfileSelector select) const noexcept = 0;
345 virtual bool setExtraMemoryTarget(float target) noexcept = 0;
346 virtual float getExtraMemoryTarget() const noexcept = 0;
347 virtual bool isValid() const noexcept = 0;
348 // Added in TensorRT 10.11
349 TRT_NODISCARD virtual bool setShapeValuesV2(
350 char const* inputName, OptProfileSelector select, int64_t const* values, int32_t nbValues) noexcept = 0;
351 TRT_NODISCARD virtual int64_t const* getShapeValuesV2(
352 char const* inputName, OptProfileSelector select) const noexcept = 0;
353};
354
355class VCudaEngine : public VRoot
356{
357public:
358 virtual ICudaEngine* getPImpl() noexcept = 0;
359 virtual int32_t getNbLayers() const noexcept = 0;
360 virtual IHostMemory* serialize() const noexcept = 0;
361 virtual IExecutionContext* createExecutionContext(ExecutionContextAllocationStrategy strategy) noexcept = 0;
362 virtual IExecutionContext* createExecutionContextWithoutDeviceMemory() noexcept = 0;
363 virtual size_t getDeviceMemorySize() const noexcept = 0;
364 virtual bool isRefittable() const noexcept = 0;
365 virtual char const* getName() const noexcept = 0;
366 virtual int32_t getNbOptimizationProfiles() const noexcept = 0;
367 virtual int32_t const* getProfileTensorValues(
368 char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept = 0;
369 virtual EngineCapability getEngineCapability() const noexcept = 0;
370 virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
371 virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
372 virtual bool hasImplicitBatchDimension() const noexcept = 0;
373 virtual TacticSources getTacticSources() const noexcept = 0;
374 virtual ProfilingVerbosity getProfilingVerbosity() const noexcept = 0;
375 virtual IEngineInspector* createEngineInspector() const noexcept = 0;
376 virtual Dims getTensorShape(char const* tensorName) const noexcept = 0;
377 virtual DataType getTensorDataType(char const* tensorName) const noexcept = 0;
378 virtual TensorLocation getTensorLocation(char const* tensorName) const noexcept = 0;
379 virtual bool isShapeInferenceIO(char const* tensorName) const noexcept = 0;
380 virtual TensorIOMode getTensorIOMode(char const* tensorName) const noexcept = 0;
381 virtual int32_t getTensorBytesPerComponent(char const* tensorName) const noexcept = 0;
382 virtual int32_t getTensorComponentsPerElement(char const* tensorName) const noexcept = 0;
383 virtual TensorFormat getTensorFormat(char const* tensorName) const noexcept = 0;
384 virtual char const* getTensorFormatDesc(char const* tensorName) const noexcept = 0;
385 virtual int32_t getTensorVectorizedDim(char const* tensorName) const noexcept = 0;
386 virtual Dims getProfileShape(
387 char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept = 0;
388 virtual int32_t getNbIOTensors() const noexcept = 0;
389 virtual char const* getIOTensorName(int32_t index) const noexcept = 0;
390 virtual HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept = 0;
391 virtual int32_t getNbAuxStreams() const noexcept = 0;
392
393 virtual int32_t getTensorBytesPerComponentV2(char const* tensorName, int32_t profileIndex) const noexcept = 0;
394 virtual int32_t getTensorComponentsPerElementV2(char const* tensorName, int32_t profileIndex) const noexcept = 0;
395 virtual TensorFormat getTensorFormatV2(char const* tensorName, int32_t profileIndex) const noexcept = 0;
396 virtual char const* getTensorFormatDescV2(char const* tensorName, int32_t profileIndex) const noexcept = 0;
397 virtual int32_t getTensorVectorizedDimV2(char const* tensorName, int32_t profileIndex) const noexcept = 0;
398
399 virtual ISerializationConfig* createSerializationConfig() noexcept = 0;
400 virtual IHostMemory* serializeWithConfig(ISerializationConfig& config) const noexcept = 0;
401
402 virtual size_t getDeviceMemorySizeForProfile(int32_t profileIndex) const noexcept = 0;
403 virtual IRefitter* createRefitter(ILogger& logger) noexcept = 0;
404
405 virtual bool setWeightStreamingBudget(int64_t gpuMemoryBudget) noexcept = 0;
406 virtual int64_t getWeightStreamingBudget() const noexcept = 0;
407 virtual int64_t getMinimumWeightStreamingBudget() const noexcept = 0;
408 virtual int64_t getStreamableWeightsSize() const noexcept = 0;
409
410 virtual bool isDebugTensor(char const* name) const noexcept = 0;
411
412 // Added in TensorRT 10.1
413 virtual bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept = 0;
414 virtual int64_t getWeightStreamingBudgetV2() const noexcept = 0;
415 virtual int64_t getWeightStreamingAutomaticBudget() const noexcept = 0;
416 virtual int64_t getWeightStreamingScratchMemorySize() const noexcept = 0;
417 virtual int64_t getDeviceMemorySizeV2() const noexcept = 0;
418 virtual int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept = 0;
419 // Added in TensorRT 10.11
420 TRT_NODISCARD virtual int64_t const* getProfileTensorValuesV2(
421 char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept = 0;
422 TRT_NODISCARD virtual IExecutionContext* createExecutionContextWithRuntimeConfig(
423 IRuntimeConfig* runtimeConfig) noexcept = 0;
424 TRT_NODISCARD virtual IRuntimeConfig* createRuntimeConfig() noexcept = 0;
425 TRT_NODISCARD virtual int64_t getEngineStat(EngineStat stat) const noexcept = 0;
426 // Added in TensorRT 10.15
427 TRT_NODISCARD virtual char const* getAliasedInputTensor(char const* tensorName) const noexcept = 0;
428};
429
430class VExecutionContext : public VRoot
431{
432public:
433 virtual IExecutionContext* getPImpl() noexcept = 0;
434 virtual void setDebugSync(bool sync) noexcept = 0;
435 virtual bool getDebugSync() const noexcept = 0;
436 virtual void setProfiler(IProfiler*) noexcept = 0;
437 virtual IProfiler* getProfiler() const noexcept = 0;
438 virtual ICudaEngine const& getEngine() const noexcept = 0;
439 virtual void setName(char const* name) noexcept = 0;
440 virtual char const* getName() const noexcept = 0;
441 virtual void setDeviceMemory(void* memory) noexcept = 0;
442 virtual int32_t getOptimizationProfile() const noexcept = 0;
443 virtual bool allInputDimensionsSpecified() const noexcept = 0;
444 virtual bool allInputShapesSpecified() const noexcept = 0;
445 virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
446 virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
447 virtual bool executeV2(void* const* bindings) noexcept = 0;
448 virtual bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept = 0;
449 virtual void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept = 0;
450 virtual bool getEnqueueEmitsProfile() const noexcept = 0;
451 virtual bool reportToProfiler() const noexcept = 0;
452 virtual bool setInputShape(char const* tensorName, Dims const& dims) noexcept = 0;
453 virtual Dims getTensorShape(char const* tensorName) const noexcept = 0;
454 virtual Dims getTensorStrides(char const* tensorName) const noexcept = 0;
455 virtual bool setTensorAddress(char const* tensorName, void* data) noexcept = 0;
456 virtual void const* getTensorAddress(char const* tensorName) const noexcept = 0;
457 virtual bool setInputTensorAddress(char const* tensorName, void const* data) noexcept = 0;
458 virtual bool setOutputTensorAddress(char const* tensorName, void* data) noexcept = 0;
459 virtual int32_t inferShapes(int32_t nbMaxNames, char const** tensorNames) noexcept = 0;
460 virtual bool setInputConsumedEvent(cudaEvent_t event) noexcept = 0;
461 virtual cudaEvent_t getInputConsumedEvent() const noexcept = 0;
462 virtual void* getOutputTensorAddress(char const* tensorName) const noexcept = 0;
463 virtual bool setOutputAllocator(char const* tensorName, IOutputAllocator* outputAllocator) noexcept = 0;
464 virtual IOutputAllocator* getOutputAllocator(char const* name) noexcept = 0;
465 virtual int64_t getMaxOutputSize(char const* tensorName) const noexcept = 0;
466 virtual bool setTemporaryStorageAllocator(IGpuAllocator* allocator) noexcept = 0;
467 virtual IGpuAllocator* getTemporaryStorageAllocator() const noexcept = 0;
468 virtual bool enqueueV3(cudaStream_t stream) noexcept = 0;
469 virtual void setPersistentCacheLimit(size_t size) noexcept = 0;
470 virtual size_t getPersistentCacheLimit() const noexcept = 0;
471 virtual bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept = 0;
472 virtual ProfilingVerbosity getNvtxVerbosity() const noexcept = 0;
473 virtual void setAuxStreams(cudaStream_t* auxStreams, int32_t nbStreams) noexcept = 0;
474 virtual bool setDebugListener(IDebugListener* listener) noexcept = 0;
475 virtual IDebugListener* getDebugListener() noexcept = 0;
476 virtual bool setTensorDebugState(char const* name, bool flag) noexcept = 0;
477 virtual bool getDebugState(char const* name) const noexcept = 0;
478 virtual bool setAllTensorsDebugState(bool flag) noexcept = 0;
479 virtual size_t updateDeviceMemorySizeForShapes() noexcept = 0;
480 virtual void setDeviceMemoryV2(void* memory, int64_t size) noexcept = 0;
481 TRT_NODISCARD virtual IRuntimeConfig* getRuntimeConfig() const noexcept = 0;
482 virtual bool setUnfusedTensorsDebugState(bool flag) noexcept = 0;
483 virtual bool getUnfusedTensorsDebugState() const noexcept = 0;
484#if ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
485 virtual bool isStreamCapturable(cudaStream_t stream) const noexcept = 0;
486#endif // ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
487 virtual bool setCommunicator(void* communicator) noexcept = 0;
488};
489
490class VEngineInspector : public VRoot
491{
492public:
493 virtual IEngineInspector* getPImpl() noexcept = 0;
494 virtual bool setExecutionContext(IExecutionContext const* context) noexcept = 0;
495 virtual IExecutionContext const* getExecutionContext() const noexcept = 0;
496 virtual char const* getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept = 0;
497 virtual char const* getEngineInformation(LayerInformationFormat format) const noexcept = 0;
498 virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
499 virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
500};
501
502class VTensor : public VRoot
503{
504public:
505 virtual void setName(char const* name) noexcept = 0;
506 virtual char const* getName() const noexcept = 0;
507 virtual void setDimensions(Dims const& dimensions) noexcept = 0;
508 virtual Dims getDimensions() const noexcept = 0;
509 virtual void setType(DataType type) noexcept = 0;
510 virtual DataType getType() const noexcept = 0;
511 virtual bool setDynamicRange(float min, float max) noexcept = 0;
512 virtual bool isNetworkInput() const noexcept = 0;
513 virtual bool isNetworkOutput() const noexcept = 0;
514 virtual void setBroadcastAcrossBatch(bool broadcastAcrossBatch) noexcept = 0;
515 virtual bool getBroadcastAcrossBatch() const noexcept = 0;
516 virtual TensorLocation getLocation() const noexcept = 0;
517 virtual void setLocation(TensorLocation location) noexcept = 0;
518 virtual bool dynamicRangeIsSet() const noexcept = 0;
519 virtual void resetDynamicRange() noexcept = 0;
520 virtual float getDynamicRangeMin() const noexcept = 0;
521 virtual float getDynamicRangeMax() const noexcept = 0;
522 virtual void setAllowedFormats(TensorFormats formats) noexcept = 0;
523 virtual TensorFormats getAllowedFormats() const noexcept = 0;
524 virtual bool isShapeTensor() const noexcept = 0;
525 virtual bool isExecutionTensor() const noexcept = 0;
526 virtual void setDimensionName(int32_t index, char const* name) noexcept = 0;
527 virtual char const* getDimensionName(int32_t index) const noexcept = 0;
528};
529
530class VLayer : public VRoot
531{
532public:
533 virtual LayerType getType() const noexcept = 0;
534 virtual void setName(char const* name) noexcept = 0;
535 virtual char const* getName() const noexcept = 0;
536 virtual int32_t getNbInputs() const noexcept = 0;
537 virtual ITensor* getInput(int32_t index) const noexcept = 0;
538 virtual int32_t getNbOutputs() const noexcept = 0;
539 virtual ITensor* getOutput(int32_t index) const noexcept = 0;
540 virtual void setInput(int32_t index, ITensor& tensor) noexcept = 0;
541 virtual void setPrecision(DataType dataType) noexcept = 0;
542 virtual DataType getPrecision() const noexcept = 0;
543 virtual bool precisionIsSet() const noexcept = 0;
544 virtual void resetPrecision() noexcept = 0;
545 virtual void setOutputType(int32_t index, DataType dataType) noexcept = 0;
546 virtual DataType getOutputType(int32_t index) const noexcept = 0;
547 virtual bool outputTypeIsSet(int32_t index) const noexcept = 0;
548 virtual void resetOutputType(int32_t index) noexcept = 0;
549 virtual void setMetadata(char const* docString) noexcept = 0;
550 virtual char const* getMetadata() const noexcept = 0;
551 virtual bool setNbRanks(int32_t nbRanks) noexcept = 0;
552 virtual int32_t getNbRanks() const noexcept = 0;
553};
554
555class VConvolutionLayer : public VRoot
556{
557public:
558 virtual void setNbOutputMaps(int64_t nbOutputMaps) noexcept = 0;
559 virtual int64_t getNbOutputMaps() const noexcept = 0;
560 virtual void setNbGroups(int64_t nbGroups) noexcept = 0;
561 virtual int64_t getNbGroups() const noexcept = 0;
562 virtual void setKernelWeights(Weights weights) noexcept = 0;
563 virtual Weights getKernelWeights() const noexcept = 0;
564 virtual void setBiasWeights(Weights weights) noexcept = 0;
565 virtual Weights getBiasWeights() const noexcept = 0;
566 virtual void setPrePadding(Dims const& padding) noexcept = 0;
567 virtual Dims getPrePadding() const noexcept = 0;
568 virtual void setPostPadding(Dims const& padding) noexcept = 0;
569 virtual Dims getPostPadding() const noexcept = 0;
570 virtual void setPaddingMode(PaddingMode paddingMode) noexcept = 0;
571 virtual PaddingMode getPaddingMode() const noexcept = 0;
572 virtual void setKernelSizeNd(Dims const& kernelSize) noexcept = 0;
573 virtual Dims getKernelSizeNd() const noexcept = 0;
574 virtual void setStrideNd(Dims const& stride) noexcept = 0;
575 virtual Dims getStrideNd() const noexcept = 0;
576 virtual void setPaddingNd(Dims const& padding) noexcept = 0;
577 virtual Dims getPaddingNd() const noexcept = 0;
578 virtual void setDilationNd(Dims const& dilation) noexcept = 0;
579 virtual Dims getDilationNd() const noexcept = 0;
580};
581
582class VActivationLayer : public VRoot
583{
584public:
585 virtual void setActivationType(ActivationType type) noexcept = 0;
586 virtual ActivationType getActivationType() const noexcept = 0;
587 virtual void setAlpha(float alpha) noexcept = 0;
588 virtual void setBeta(float beta) noexcept = 0;
589 virtual float getAlpha() const noexcept = 0;
590 virtual float getBeta() const noexcept = 0;
591};
592
593class VPoolingLayer : public VRoot
594{
595public:
596 virtual void setPoolingType(PoolingType type) noexcept = 0;
597 virtual PoolingType getPoolingType() const noexcept = 0;
598 virtual void setBlendFactor(float blendFactor) noexcept = 0;
599 virtual float getBlendFactor() const noexcept = 0;
600 virtual void setAverageCountExcludesPadding(bool exclusive) noexcept = 0;
601 virtual bool getAverageCountExcludesPadding() const noexcept = 0;
602 virtual void setPrePadding(Dims const& padding) noexcept = 0;
603 virtual Dims getPrePadding() const noexcept = 0;
604 virtual void setPostPadding(Dims const& padding) noexcept = 0;
605 virtual Dims getPostPadding() const noexcept = 0;
606 virtual void setPaddingMode(PaddingMode paddingMode) noexcept = 0;
607 virtual PaddingMode getPaddingMode() const noexcept = 0;
608 virtual void setWindowSizeNd(Dims const& windowSize) noexcept = 0;
609 virtual Dims getWindowSizeNd() const noexcept = 0;
610 virtual void setStrideNd(Dims const& stride) noexcept = 0;
611 virtual Dims getStrideNd() const noexcept = 0;
612 virtual void setPaddingNd(Dims const& padding) noexcept = 0;
613 virtual Dims getPaddingNd() const noexcept = 0;
614};
615
616class VLRNLayer : public VRoot
617{
618public:
619 virtual void setWindowSize(int64_t windowSize) noexcept = 0;
620 virtual int64_t getWindowSize() const noexcept = 0;
621 virtual void setAlpha(float alpha) noexcept = 0;
622 virtual float getAlpha() const noexcept = 0;
623 virtual void setBeta(float beta) noexcept = 0;
624 virtual float getBeta() const noexcept = 0;
625 virtual void setK(float k) noexcept = 0;
626 virtual float getK() const noexcept = 0;
627};
628
629class VScaleLayer : public VRoot
630{
631public:
632 virtual void setMode(ScaleMode mode) noexcept = 0;
633 virtual ScaleMode getMode() const noexcept = 0;
634 virtual void setShift(Weights shift) noexcept = 0;
635 virtual Weights getShift() const noexcept = 0;
636 virtual void setScale(Weights scale) noexcept = 0;
637 virtual Weights getScale() const noexcept = 0;
638 virtual void setPower(Weights power) noexcept = 0;
639 virtual Weights getPower() const noexcept = 0;
640 virtual int32_t getChannelAxis() const noexcept = 0;
641 virtual void setChannelAxis(int32_t channelAxis) noexcept = 0;
642};
643
644class VSoftMaxLayer : public VRoot
645{
646public:
647 virtual void setAxes(uint32_t axes) noexcept = 0;
648 virtual uint32_t getAxes() const noexcept = 0;
649};
650
651class VConcatenationLayer : public VRoot
652{
653public:
654 virtual void setAxis(int32_t axis) noexcept = 0;
655 virtual int32_t getAxis() const noexcept = 0;
656};
657
658class VDeconvolutionLayer : public VRoot
659{
660public:
661 virtual void setNbOutputMaps(int64_t nbOutputMaps) noexcept = 0;
662 virtual int64_t getNbOutputMaps() const noexcept = 0;
663 virtual void setNbGroups(int64_t nbGroups) noexcept = 0;
664 virtual int64_t getNbGroups() const noexcept = 0;
665 virtual void setKernelWeights(Weights weights) noexcept = 0;
666 virtual Weights getKernelWeights() const noexcept = 0;
667 virtual void setBiasWeights(Weights weights) noexcept = 0;
668 virtual Weights getBiasWeights() const noexcept = 0;
669 virtual void setPrePadding(Dims const& padding) noexcept = 0;
670 virtual Dims getPrePadding() const noexcept = 0;
671 virtual void setPostPadding(Dims const& padding) noexcept = 0;
672 virtual Dims getPostPadding() const noexcept = 0;
673 virtual void setPaddingMode(PaddingMode paddingMode) noexcept = 0;
674 virtual PaddingMode getPaddingMode() const noexcept = 0;
675 virtual void setKernelSizeNd(Dims const& kernelSize) noexcept = 0;
676 virtual Dims getKernelSizeNd() const noexcept = 0;
677 virtual void setStrideNd(Dims const& stride) noexcept = 0;
678 virtual Dims getStrideNd() const noexcept = 0;
679 virtual void setPaddingNd(Dims const& padding) noexcept = 0;
680 virtual Dims getPaddingNd() const noexcept = 0;
681 virtual void setDilationNd(Dims const& dilation) noexcept = 0;
682 virtual Dims getDilationNd() const noexcept = 0;
683};
684
685class VElementWiseLayer : public VRoot
686{
687public:
688 virtual void setOperation(ElementWiseOperation op) noexcept = 0;
689 virtual ElementWiseOperation getOperation() const noexcept = 0;
690};
691
692class VGatherLayer : public VRoot
693{
694public:
695 virtual void setGatherAxis(int32_t axis) noexcept = 0;
696 virtual int32_t getGatherAxis() const noexcept = 0;
697 virtual void setNbElementWiseDims(int32_t k) noexcept = 0;
698 virtual int32_t getNbElementWiseDims() const noexcept = 0;
699 virtual void setMode(GatherMode mode) noexcept = 0;
700 virtual GatherMode getMode() const noexcept = 0;
701};
702
703class VPluginLayer : public VRoot
704{
705public:
706 virtual IPlugin& getPlugin() noexcept = 0;
707};
708
709class VPluginV2Layer : public VRoot
710{
711public:
712 virtual IPluginV2& getPlugin() noexcept = 0;
713};
714
715class VPluginV3Layer : public VRoot
716{
717public:
718 virtual IPluginV3& getPlugin() noexcept = 0;
719};
720
721class VUnaryLayer : public VRoot
722{
723public:
724 virtual void setOperation(UnaryOperation op) noexcept = 0;
725 virtual UnaryOperation getOperation() const noexcept = 0;
726};
727
728class VReduceLayer : public VRoot
729{
730public:
731 virtual void setOperation(ReduceOperation op) noexcept = 0;
732 virtual ReduceOperation getOperation() const noexcept = 0;
733 virtual void setReduceAxes(uint32_t reduceAxes) noexcept = 0;
734 virtual uint32_t getReduceAxes() const noexcept = 0;
735 virtual void setKeepDimensions(bool keepDimensions) noexcept = 0;
736 virtual bool getKeepDimensions() const noexcept = 0;
737};
738
739class VPaddingLayer : public VRoot
740{
741public:
742 virtual void setPrePaddingNd(Dims const& padding) noexcept = 0;
743 virtual Dims getPrePaddingNd() const noexcept = 0;
744 virtual void setPostPaddingNd(Dims const& padding) noexcept = 0;
745 virtual Dims getPostPaddingNd() const noexcept = 0;
746};
747
748class VShuffleLayer : public VRoot
749{
750public:
751 virtual void setFirstTranspose(Permutation const& permutation) noexcept = 0;
752 virtual Permutation const& getFirstTranspose() const noexcept = 0;
753 virtual void setReshapeDimensions(Dims const& dimensions) noexcept = 0;
754 virtual Dims getReshapeDimensions() const noexcept = 0;
755 virtual void setSecondTranspose(Permutation const& permutation) noexcept = 0;
756 virtual Permutation const& getSecondTranspose() const noexcept = 0;
757 virtual void setZeroIsPlaceholder(bool zeroIsPlaceholder) noexcept = 0;
758 virtual bool getZeroIsPlaceholder() const noexcept = 0;
759};
760
761class VSliceLayer : public VRoot
762{
763public:
764 virtual void setStart(Dims const& start) noexcept = 0;
765 virtual Dims getStart() const noexcept = 0;
766 virtual void setSize(Dims const& size) noexcept = 0;
767 virtual Dims getSize() const noexcept = 0;
768 virtual void setStride(Dims const& stride) noexcept = 0;
769 virtual Dims getStride() const noexcept = 0;
770 virtual void setMode(SampleMode mode) noexcept = 0;
771 virtual SampleMode getMode() const noexcept = 0;
772 virtual void setAxes(Dims const& axes) noexcept = 0;
773 virtual Dims getAxes() const noexcept = 0;
774};
775
776class VShapeLayer : public VRoot
777{
778public:
779};
780
781class VTopKLayer : public VRoot
782{
783public:
784 virtual void setOperation(TopKOperation op) noexcept = 0;
785 virtual TopKOperation getOperation() const noexcept = 0;
786 virtual void setK(int32_t k) noexcept = 0;
787 virtual int32_t getK() const noexcept = 0;
788 virtual void setReduceAxes(uint32_t reduceAxes) noexcept = 0;
789 virtual uint32_t getReduceAxes() const noexcept = 0;
790 virtual bool setIndicesType(DataType type) noexcept = 0;
791 virtual DataType getIndicesType() const noexcept = 0;
792};
793
794class VMatrixMultiplyLayer : public VRoot
795{
796public:
797 virtual void setOperation(int32_t index, MatrixOperation op) noexcept = 0;
798 virtual MatrixOperation getOperation(int32_t index) const noexcept = 0;
799};
800
801class VNonZeroLayer : public VRoot
802{
803public:
804 virtual bool setIndicesType(DataType type) noexcept = 0;
805 virtual DataType getIndicesType() const noexcept = 0;
806};
807
808class VRaggedSoftMaxLayer : public VRoot
809{
810public:
811};
812
813class VIdentityLayer : public VRoot
814{
815public:
816};
817
818class VCastLayer : public VRoot
819{
820public:
821 virtual void setToType(DataType toType) noexcept = 0;
822 virtual DataType getToType() const noexcept = 0;
823};
824
825class VConstantLayer : public VRoot
826{
827public:
828 virtual void setWeights(Weights weights) noexcept = 0;
829 virtual Weights getWeights() const noexcept = 0;
830 virtual void setDimensions(Dims const& dimensions) noexcept = 0;
831 virtual Dims getDimensions() const noexcept = 0;
832};
833
834class VParametricReLULayer : public VRoot
835{
836public:
837};
838
839class VResizeLayer : public VRoot
840{
841public:
842 virtual void setOutputDimensions(Dims const& dimensions) noexcept = 0;
843 virtual Dims getOutputDimensions() const noexcept = 0;
844 virtual void setScales(float const* scales, int32_t nbScales) noexcept = 0;
845 virtual int32_t getScales(int32_t size, float* scales) const noexcept = 0;
846 virtual void setResizeMode(InterpolationMode interpolationMode) noexcept = 0;
847 virtual InterpolationMode getResizeMode() const noexcept = 0;
848 virtual void setCoordinateTransformation(ResizeCoordinateTransformation coordTransform) noexcept = 0;
849 virtual ResizeCoordinateTransformation getCoordinateTransformation() const noexcept = 0;
850 virtual void setSelectorForSinglePixel(ResizeSelector selector) noexcept = 0;
851 virtual ResizeSelector getSelectorForSinglePixel() const noexcept = 0;
852 virtual void setNearestRounding(ResizeRoundMode value) noexcept = 0;
853 virtual ResizeRoundMode getNearestRounding() const noexcept = 0;
854 virtual void setCubicCoeff(float value) noexcept = 0;
855 virtual float getCubicCoeff() const noexcept = 0;
856 virtual void setExcludeOutside(bool value) noexcept = 0;
857 virtual bool getExcludeOutside() const noexcept = 0;
858};
859
860class VLoopBoundaryLayer : public VRoot
861{
862public:
863 virtual ILoop* getLoop() const noexcept = 0;
864};
865
866class VRecurrenceLayer : public VRoot
867{
868public:
869};
870
871class VLoopOutputLayer : public VRoot
872{
873public:
874 virtual LoopOutput getLoopOutput() const noexcept = 0;
875 virtual void setAxis(int32_t axis) noexcept = 0;
876 virtual int32_t getAxis() const noexcept = 0;
877};
878
879class VTripLimitLayer : public VRoot
880{
881public:
882 virtual TripLimit getTripLimit() const noexcept = 0;
883};
884
885class VIteratorLayer : public VRoot
886{
887public:
888 virtual void setAxis(int32_t axis) noexcept = 0;
889 virtual int32_t getAxis() const noexcept = 0;
890 virtual void setReverse(bool reverse) noexcept = 0;
891 virtual bool getReverse() const noexcept = 0;
892};
893class VLoop : public VRoot
894{
895public:
896 virtual IRecurrenceLayer* addRecurrence(ITensor& initialValue) noexcept = 0;
897 virtual ITripLimitLayer* addTripLimit(ITensor& tensor, TripLimit limit) noexcept = 0;
898 virtual IIteratorLayer* addIterator(ITensor& tensor, int32_t axis = 0, bool reverse = false) noexcept = 0;
899 virtual ILoopOutputLayer* addLoopOutput(ITensor& tensor, LoopOutput outputKind, int32_t axis = 0) noexcept = 0;
900 virtual void setName(char const* name) noexcept = 0;
901 virtual char const* getName() const noexcept = 0;
902};
903
904class VConditionalBoundaryLayer : public VRoot
905{
906public:
907 virtual IIfConditional* getConditional() const noexcept = 0;
908};
909
910class VConditionLayer : public VRoot
911{
912public:
913};
914
915class VConditionalInputLayer : public VRoot
916{
917public:
918};
919
920class VConditionalOutputLayer : public VRoot
921{
922public:
923};
924
925class VIfConditional : public VRoot
926{
927public:
928 virtual IConditionLayer* setCondition(ITensor& tensor) noexcept = 0;
929 virtual IIfConditionalInputLayer* addInput(ITensor& tensor) noexcept = 0;
930 virtual IIfConditionalOutputLayer* addOutput(ITensor& trueTensor, ITensor& falseTensor) noexcept = 0;
931 virtual void setName(char const* name) noexcept = 0;
932 virtual char const* getName() const noexcept = 0;
933};
934
935class VAttentionBoundaryLayer : public VRoot
936{
937public:
938 virtual IAttention* getAttention() const noexcept = 0;
939};
940
941class VAttentionInputLayer : public VRoot
942{
943public:
944};
945
946class VAttentionOutputLayer : public VRoot
947{
948public:
949};
950
951class VAttention : public VRoot
952{
953public:
954 TRT_NODISCARD virtual bool setInput(int32_t index, ITensor& input) noexcept = 0;
955 TRT_NODISCARD virtual int32_t getNbInputs() const noexcept = 0;
956 TRT_NODISCARD virtual ITensor* getInput(int32_t index) const noexcept = 0;
957 TRT_NODISCARD virtual int32_t getNbOutputs() const noexcept = 0;
958 TRT_NODISCARD virtual ITensor* getOutput(int32_t index) const noexcept = 0;
959 TRT_NODISCARD virtual bool setName(char const* name) noexcept = 0;
960 TRT_NODISCARD virtual char const* getName() const noexcept = 0;
961 TRT_NODISCARD virtual bool setNormalizationOperation(AttentionNormalizationOp op) noexcept = 0;
962 TRT_NODISCARD virtual AttentionNormalizationOp getNormalizationOperation() const noexcept = 0;
963 TRT_NODISCARD virtual bool setCausal(bool isCausal) noexcept = 0;
964 TRT_NODISCARD virtual bool getCausal() const noexcept = 0;
965 TRT_NODISCARD virtual bool setMask(ITensor& mask) noexcept = 0;
966 TRT_NODISCARD virtual ITensor* getMask() const noexcept = 0;
967 TRT_NODISCARD virtual bool setDecomposable(bool decomposable) noexcept = 0;
968 TRT_NODISCARD virtual bool getDecomposable() const noexcept = 0;
969 TRT_NODISCARD virtual bool setNormalizationQuantizeScale(ITensor& tensor) noexcept = 0;
970 TRT_NODISCARD virtual ITensor* getNormalizationQuantizeScale() const noexcept = 0;
971 TRT_NODISCARD virtual bool setNormalizationQuantizeToType(DataType type) noexcept = 0;
972 TRT_NODISCARD virtual DataType getNormalizationQuantizeToType() const noexcept = 0;
973 TRT_NODISCARD virtual bool setMetadata(char const* docString) noexcept = 0;
974 TRT_NODISCARD virtual char const* getMetadata() const noexcept = 0;
975 TRT_NODISCARD virtual bool setNbRanks(int32_t nbRanks) noexcept = 0;
976 TRT_NODISCARD virtual int32_t getNbRanks() const noexcept = 0;
977}; // class VAttention
978
979class VSelectLayer : public VRoot
980{
981};
982
983class VAssertionLayer : public VRoot
984{
985public:
986 virtual void setMessage(char const* message) noexcept = 0;
987 virtual char const* getMessage() const noexcept = 0;
988};
989
990class VFillLayer : public VRoot
991{
992public:
993 virtual void setDimensions(Dims const& dimensions) noexcept = 0;
994 virtual Dims getDimensions() const noexcept = 0;
995 virtual void setOperation(FillOperation op) noexcept = 0;
996 virtual FillOperation getOperation() const noexcept = 0;
997 virtual void setAlpha(double alpha) noexcept = 0;
998 virtual double getAlpha() const noexcept = 0;
999 virtual void setBeta(double beta) noexcept = 0;
1000 virtual double getBeta() const noexcept = 0;
1001 virtual void setAlphaInt64(int64_t alpha) noexcept = 0;
1002 virtual int64_t getAlphaInt64() const noexcept = 0;
1003 virtual void setBetaInt64(int64_t beta) noexcept = 0;
1004 virtual int64_t getBetaInt64() const noexcept = 0;
1005 virtual bool isAlphaBetaInt64() const noexcept = 0;
1006 virtual DataType getToType() const noexcept = 0;
1007 virtual void setToType(DataType toType) noexcept = 0;
1008};
1009
1010class VQuantizeLayer : public VRoot
1011{
1012public:
1013 virtual int32_t getAxis() const noexcept = 0;
1014 virtual void setAxis(int32_t axis) noexcept = 0;
1015 virtual DataType getToType() const noexcept = 0;
1016 virtual void setToType(DataType toType) noexcept = 0;
1017 virtual Dims getBlockShape() const noexcept = 0;
1018 virtual bool setBlockShape(Dims const& blockShape) noexcept = 0;
1019};
1020
1021class VDequantizeLayer : public VRoot
1022{
1023public:
1024 virtual int32_t getAxis() const noexcept = 0;
1025 virtual void setAxis(int32_t axis) noexcept = 0;
1026 virtual DataType getToType() const noexcept = 0;
1027 virtual void setToType(DataType toType) noexcept = 0;
1028 virtual Dims getBlockShape() const noexcept = 0;
1029 virtual bool setBlockShape(Dims const& blockShape) noexcept = 0;
1030};
1031
1032class VDynamicQuantizeLayer : public VRoot
1033{
1034public:
1035 TRT_DEPRECATED virtual int32_t getAxis() const noexcept = 0;
1036 TRT_DEPRECATED virtual void setAxis(int32_t axis) noexcept = 0;
1037 TRT_DEPRECATED virtual int32_t getBlockSize() const noexcept = 0;
1038 TRT_DEPRECATED virtual void setBlockSize(int32_t axis) noexcept = 0;
1039 virtual DataType getScaleType() const noexcept = 0;
1040 virtual void setScaleType(DataType axis) noexcept = 0;
1041 virtual DataType getToType() const noexcept = 0;
1042 virtual void setToType(DataType toType) noexcept = 0;
1043 virtual Dims getBlockShape() const noexcept = 0;
1044 virtual void setBlockShape(Dims const& blockShape) noexcept = 0;
1045};
1046
1047class VScatterLayer : public VRoot
1048{
1049public:
1050 virtual void setMode(ScatterMode mode) noexcept = 0;
1051 virtual ScatterMode getMode() const noexcept = 0;
1052 virtual void setAxis(int32_t axis) noexcept = 0;
1053 virtual int32_t getAxis() const noexcept = 0;
1054}; // class VScatterLayer
1055
1056class VEinsumLayer : public VRoot
1057{
1058public:
1059 virtual bool setEquation(char const* equation) noexcept = 0;
1060 virtual char const* getEquation() const noexcept = 0;
1061};
1062
1063class VOneHotLayer : public VRoot
1064{
1065public:
1066 virtual int32_t getAxis() const noexcept = 0;
1067 virtual void setAxis(int32_t axis) noexcept = 0;
1068}; // class VOneHotLayer
1069
1070class VGridSampleLayer : public VRoot
1071{
1072public:
1073 virtual void setInterpolationMode(InterpolationMode mode) noexcept = 0;
1074 virtual InterpolationMode getInterpolationMode() const noexcept = 0;
1075 virtual void setAlignCorners(bool alignCorners) noexcept = 0;
1076 virtual bool getAlignCorners() const noexcept = 0;
1077 virtual bool setSampleMode(SampleMode mode) noexcept = 0;
1078 virtual SampleMode getSampleMode() const noexcept = 0;
1079}; // class VGridSampleLayer
1080
1081class VNMSLayer : public VRoot
1082{
1083public:
1084 virtual void setBoundingBoxFormat(BoundingBoxFormat fmt) noexcept = 0;
1085 virtual BoundingBoxFormat getBoundingBoxFormat() const noexcept = 0;
1086 virtual void setTopKBoxLimit(int32_t limit) noexcept = 0;
1087 virtual int32_t getTopKBoxLimit() const noexcept = 0;
1088 virtual bool setIndicesType(DataType type) noexcept = 0;
1089 virtual DataType getIndicesType() const noexcept = 0;
1090}; // class VNMSLayer
1091
1092class VReverseSequenceLayer : public VRoot
1093{
1094public:
1095 virtual void setBatchAxis(int32_t batchAxis) noexcept = 0;
1096 virtual int32_t getBatchAxis() const noexcept = 0;
1097
1098 virtual void setSequenceAxis(int32_t sequenceAxis) noexcept = 0;
1099 virtual int32_t getSequenceAxis() const noexcept = 0;
1100}; // class VReverseSequenceLayer
1101
1102class VNormalizationLayer : public VRoot
1103{
1104public:
1105 virtual void setEpsilon(float eps) noexcept = 0;
1106 virtual float getEpsilon() const noexcept = 0;
1107 virtual void setAxes(uint32_t axesMask) noexcept = 0;
1108 virtual uint32_t getAxes() const noexcept = 0;
1109 virtual void setNbGroups(int64_t nbGroups) noexcept = 0;
1110 virtual int64_t getNbGroups() const noexcept = 0;
1111 virtual void setComputePrecision(DataType type) noexcept = 0;
1112 virtual DataType getComputePrecision() const noexcept = 0;
1113 virtual bool isV2() const noexcept = 0;
1114}; // class VNormalizationLayer
1115
1116class VSqueezeLayer : public VRoot
1117{
1118};
1119
1120class VUnsqueezeLayer : public VRoot
1121{
1122};
1123
1124class VCumulativeLayer : public VRoot
1125{
1126public:
1127 virtual bool setOperation(CumulativeOperation op) noexcept = 0;
1128 virtual CumulativeOperation getOperation() const noexcept = 0;
1129 virtual void setExclusive(bool exclusive) noexcept = 0;
1130 virtual bool getExclusive() const noexcept = 0;
1131 virtual void setReverse(bool reverse) noexcept = 0;
1132 virtual bool getReverse() const noexcept = 0;
1133}; // class VCumulativeLayer
1134
1135class VRotaryEmbeddingLayer : public VRoot
1136{
1137public:
1138 virtual void setInterleaved(bool interleaved) noexcept = 0;
1139 virtual bool getInterleaved() const noexcept = 0;
1140 virtual bool setRotaryEmbeddingDim(int32_t rotaryEmbeddingDim) noexcept = 0;
1141 virtual int32_t getRotaryEmbeddingDim() const noexcept = 0;
1142 virtual void setInput(int32_t index, ITensor& input) noexcept = 0;
1143}; // class VRotaryEmbeddingLayer
1144
1145class VKVCacheUpdateLayer : public VRoot
1146{
1147public:
1148 TRT_NODISCARD virtual bool setCacheMode(KVCacheMode cacheMode) noexcept = 0;
1149 TRT_NODISCARD virtual KVCacheMode getCacheMode() const noexcept = 0;
1150}; // class VKVCacheUpdateLayer
1151
1152class VMoELayer : public VRoot
1153{
1154public:
1155 virtual void setGatedWeights(ITensor& fcGateWeights, ITensor& fcUpWeights, ITensor& fcDownWeights, MoEActType activationType) noexcept = 0;
1156 virtual void setGatedBiases(ITensor& fcGateBiases, ITensor& fcUpBiases, ITensor& fcDownBiases) noexcept = 0;
1157 virtual void setActivationType(MoEActType activationType) noexcept = 0;
1158 virtual MoEActType getActivationType() const noexcept = 0;
1159 virtual void setQuantizationStatic(ITensor& fcDownActivationScale, DataType dataType) noexcept = 0;
1160 virtual void setQuantizationDynamicDblQ(ITensor& fcDownActivationDblQScale, DataType dataType, Dims const& blockShape, DataType dynQOutputScaleType) noexcept = 0;
1161 virtual void setQuantizationToType(DataType type) noexcept = 0;
1162 virtual DataType getQuantizationToType() const noexcept = 0;
1163 virtual void setQuantizationBlockShape(Dims const& blockShape) noexcept = 0;
1164 virtual Dims getQuantizationBlockShape() const noexcept = 0;
1165 virtual void setDynQOutputScaleType(DataType type) noexcept = 0;
1166 virtual DataType getDynQOutputScaleType() const noexcept = 0;
1167 virtual void setSwigluParams(float limit, float alpha, float beta) noexcept = 0;
1168 virtual void setSwigluParamLimit(float limit) noexcept = 0;
1169 virtual float getSwigluParamLimit() const noexcept = 0;
1170 virtual void setSwigluParamAlpha(float alpha) noexcept = 0;
1171 virtual float getSwigluParamAlpha() const noexcept = 0;
1172 virtual void setSwigluParamBeta(float beta) noexcept = 0;
1173 virtual float getSwigluParamBeta() const noexcept = 0;
1174 virtual void setInput(int32_t index, ITensor& tensor) noexcept = 0;
1175}; // class VMoELayer
1176
1177
1178class VNetworkDefinition : public VRoot
1179{
1180public:
1181 virtual ITensor* addInput(char const* name, DataType type, Dims const& dimensions) noexcept = 0;
1182 virtual void markOutput(ITensor& tensor) noexcept = 0;
1183 virtual IActivationLayer* addActivation(ITensor& input, ActivationType type) noexcept = 0;
1184 virtual ILRNLayer* addLRN(ITensor& input, int64_t window, float alpha, float beta, float k) noexcept = 0;
1185 virtual IScaleLayer* addScale(
1186 ITensor& input, ScaleMode mode, Weights shift, Weights scale, Weights power) noexcept = 0;
1187 virtual ISoftMaxLayer* addSoftMax(ITensor& input) noexcept = 0;
1188 virtual IConcatenationLayer* addConcatenation(ITensor* const* inputs, int32_t nbInputs) noexcept = 0;
1189 virtual IElementWiseLayer* addElementWise(ITensor& input1, ITensor& input2, ElementWiseOperation op) noexcept = 0;
1190 virtual IUnaryLayer* addUnary(ITensor& input, UnaryOperation operation) noexcept = 0;
1191 virtual IShuffleLayer* addShuffle(ITensor& input) noexcept = 0;
1192 virtual int32_t getNbLayers() const noexcept = 0;
1193 virtual ILayer* getLayer(int32_t index) const noexcept = 0;
1194 virtual int32_t getNbInputs() const noexcept = 0;
1195 virtual ITensor* getInput(int32_t index) const noexcept = 0;
1196 virtual int32_t getNbOutputs() const noexcept = 0;
1197 virtual ITensor* getOutput(int32_t index) const noexcept = 0;
1198 virtual IReduceLayer* addReduce(
1199 ITensor& input, ReduceOperation operation, uint32_t reduceAxes, bool keepDimensions) noexcept
1200 = 0;
1201 virtual ITopKLayer* addTopK(ITensor& input, TopKOperation op, int32_t k, uint32_t reduceAxes) noexcept = 0;
1202 virtual IGatherLayer* addGather(ITensor& data, ITensor& indices, int32_t axis) noexcept = 0;
1203 virtual IRaggedSoftMaxLayer* addRaggedSoftMax(ITensor& input, ITensor& bounds) noexcept = 0;
1204 virtual IMatrixMultiplyLayer* addMatrixMultiply(
1205 ITensor& input0, MatrixOperation op0, ITensor& input1, MatrixOperation op1) noexcept = 0;
1206 virtual IConstantLayer* addConstant(Dims const& dimensions, Weights weights) noexcept = 0;
1207 virtual IIdentityLayer* addIdentity(ITensor& input) noexcept = 0;
1208 virtual void removeTensor(ITensor& tensor) noexcept = 0;
1209 virtual void unmarkOutput(ITensor& tensor) noexcept = 0;
1210 virtual IPluginV2Layer* addPluginV2(ITensor* const* inputs, int32_t nbInputs, IPluginV2& plugin) noexcept = 0;
1211 virtual IPluginV3Layer* addPluginV3(ITensor* const* inputs, int32_t nbInputs, ITensor* const* shapeInputs,
1212 int32_t nbShapeInputs, IPluginV3& plugin) noexcept = 0;
1213 virtual ISliceLayer* addSlice(ITensor& input, Dims const& start, Dims const& size, Dims const& stride) noexcept = 0;
1214 virtual void setName(char const* name) noexcept = 0;
1215 virtual char const* getName() const noexcept = 0;
1216 virtual IShapeLayer* addShape(ITensor& input) noexcept = 0;
1217 virtual bool hasImplicitBatchDimension() const noexcept = 0;
1218 virtual bool markOutputForShapes(ITensor& tensor) noexcept = 0;
1219 virtual bool unmarkOutputForShapes(ITensor& tensor) noexcept = 0;
1220 virtual IParametricReLULayer* addParametricReLU(ITensor& input, ITensor& slope) noexcept = 0;
1221 virtual IConvolutionLayer* addConvolutionNd(
1222 ITensor& input, int64_t nbOutputMaps, Dims const& kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
1223 = 0;
1224 virtual IPoolingLayer* addPoolingNd(ITensor& input, PoolingType type, Dims const& windowSize) noexcept = 0;
1225 virtual IDeconvolutionLayer* addDeconvolutionNd(
1226 ITensor& input, int64_t nbOutputMaps, Dims const& kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
1227 = 0;
1228 virtual IScaleLayer* addScaleNd(
1229 ITensor& input, ScaleMode mode, Weights shift, Weights scale, Weights power, int32_t channelAxis) noexcept = 0;
1230 virtual IResizeLayer* addResize(ITensor& input) noexcept = 0;
1231 virtual ILoop* addLoop() noexcept = 0;
1232 virtual ISelectLayer* addSelect(ITensor& condition, ITensor& thenInput, ITensor& elseInput) noexcept = 0;
1233 virtual IFillLayer* addFill(Dims const& dimensions, FillOperation op) noexcept = 0;
1234 virtual IPaddingLayer* addPaddingNd(ITensor& input, Dims const& prePadding, Dims const& postPadding) noexcept = 0;
1235 virtual bool setWeightsName(Weights weights, char const* name) noexcept = 0;
1236 virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
1237 virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
1238 virtual IDequantizeLayer* addDequantize(ITensor& input, ITensor& scale) noexcept = 0;
1239 virtual IQuantizeLayer* addQuantize(ITensor& input, ITensor& scale) noexcept = 0;
1240 virtual IGatherLayer* addGatherV2(ITensor& data, ITensor& indices, GatherMode mode) noexcept = 0;
1241 virtual IIfConditional* addIfConditional() noexcept = 0;
1242 virtual IScatterLayer* addScatter(ITensor& data, ITensor& indices, ITensor& updates, ScatterMode mode) noexcept = 0;
1243 virtual IEinsumLayer* addEinsum(ITensor* const* inputs, int32_t nbInputs, char const* equation) noexcept = 0;
1244 virtual IAssertionLayer* addAssertion(ITensor& condition, char const* message) noexcept = 0;
1245 virtual IOneHotLayer* addOneHot(ITensor& indices, ITensor& values, ITensor& depth, int32_t axis) noexcept = 0;
1246 virtual INonZeroLayer* addNonZero(ITensor& input) noexcept = 0;
1247 virtual IGridSampleLayer* addGridSample(ITensor& input, ITensor& grid) noexcept = 0;
1248 virtual INMSLayer* addNMS(ITensor& boxes, ITensor& scores, ITensor& maxOutputBoxesPerClass) noexcept = 0;
1249 virtual IReverseSequenceLayer* addReverseSequence(ITensor& input, ITensor& sequenceLens) noexcept = 0;
1250 virtual INormalizationLayer* addNormalization(
1251 ITensor& input, ITensor& scale, ITensor& bias, uint32_t axesMask) noexcept = 0;
1252 virtual ICastLayer* addCast(ITensor& input, DataType toType) noexcept = 0;
1253 virtual IBuilder& getBuilder() const noexcept = 0;
1254 virtual NetworkDefinitionCreationFlags getFlags() const noexcept = 0;
1255 virtual bool getFlag(NetworkDefinitionCreationFlag networkDefinitionCreationFlag) const noexcept = 0;
1256 virtual IQuantizeLayer* addQuantizeV2(ITensor& input, ITensor& scale, DataType outputType) noexcept = 0;
1257 virtual IDequantizeLayer* addDequantizeV2(ITensor& input, ITensor& scale, DataType outputType) noexcept = 0;
1258 virtual IFillLayer* addFillV2(Dims const& dimensions, FillOperation op, DataType outputType) noexcept = 0;
1259 virtual bool markDebug(ITensor& tensor) noexcept = 0;
1260 virtual bool unmarkDebug(ITensor& tensor) noexcept = 0;
1261 virtual bool isDebugTensor(ITensor const& tensor) const noexcept = 0;
1262 virtual bool markWeightsRefittable(char const* name) noexcept = 0;
1263 virtual bool unmarkWeightsRefittable(char const* name) noexcept = 0;
1264 virtual bool areWeightsMarkedRefittable(char const* name) const noexcept = 0;
1265 virtual ISqueezeLayer* addSqueeze(ITensor& input, ITensor& axes) noexcept = 0;
1266 virtual IUnsqueezeLayer* addUnsqueeze(ITensor& input, ITensor& axes) noexcept = 0;
1267 virtual IDynamicQuantizeLayer* addDynamicQuantize(
1268 ITensor& input, int32_t axis, int32_t blockSize, DataType toType, DataType scaleType) noexcept = 0;
1269 virtual ICumulativeLayer* addCumulative(
1270 ITensor& input, ITensor& axis, CumulativeOperation operation, bool exclusive, bool reverse) noexcept = 0;
1271 virtual bool markUnfusedTensorsAsDebugTensors() noexcept = 0;
1272 virtual bool unmarkUnfusedTensorsAsDebugTensors() noexcept = 0;
1273 virtual ITopKLayer* addTopKV2(
1274 ITensor& input, TopKOperation op, int32_t k, uint32_t reduceAxes, DataType indicesType) noexcept = 0;
1275 virtual INonZeroLayer* addNonZeroV2(ITensor& input, DataType indicesType) noexcept = 0;
1276 virtual INMSLayer* addNMSV2(
1277 ITensor& boxes, ITensor& scores, ITensor& maxOutputBoxesPerClass, DataType indicesType) noexcept = 0;
1278 virtual IAttention* addAttention(
1279 ITensor& query, ITensor& key, ITensor& value, AttentionNormalizationOp normOp, bool isCausal) noexcept = 0;
1280 virtual IRotaryEmbeddingLayer* addRotaryEmbedding(ITensor& input, ITensor& cosCache, ITensor& sinCache,
1281 bool interleaved, int32_t rotaryEmbeddingDim) noexcept = 0;
1282 virtual IDynamicQuantizeLayer* addDynamicQuantizeV2(
1283 ITensor& input, Dims const& blockShape, DataType toType, DataType scaleType) noexcept = 0;
1284 virtual IKVCacheUpdateLayer* addKVCacheUpdate(
1285 ITensor& cache, ITensor& update, ITensor& writeIndices, KVCacheMode cacheMode) noexcept = 0;
1286 virtual INormalizationLayer* addNormalizationV2(
1287 ITensor& input, ITensor& scale, ITensor& bias, uint32_t axesMask) noexcept = 0;
1288 virtual IMoELayer* addMoE(
1289 ITensor& hiddenStates, ITensor& selectedExpertsForTokens, ITensor& scoresForSelectedExperts) noexcept = 0;
1290 virtual IDistCollectiveLayer* addDistCollective(ITensor& input, CollectiveOperation distCollectiveOp,
1291 ReduceOperation reduceOp, int64_t root, int64_t* groups, int64_t groupSize) noexcept = 0;
1292};
1293
1294class VAlgorithmIOInfo : public VRoot
1295{
1296public:
1297 virtual DataType getDataType() const noexcept = 0;
1298 virtual Dims getStrides() const noexcept = 0;
1299 virtual int64_t getVectorizedDim() const noexcept = 0;
1300 virtual int64_t getComponentsPerElement() const noexcept = 0;
1301};
1302
1303class VAlgorithmVariant : public VRoot
1304{
1305public:
1306 virtual int64_t getImplementation() const noexcept = 0;
1307 virtual int64_t getTactic() const noexcept = 0;
1308};
1309
1310class VAlgorithmContext : public VRoot
1311{
1312public:
1313 virtual char const* getName() const noexcept = 0;
1314 virtual Dims getDimensions(int32_t index, OptProfileSelector select) const noexcept = 0;
1315 virtual int32_t getNbInputs() const noexcept = 0;
1316 virtual int32_t getNbOutputs() const noexcept = 0;
1317};
1318
1319class VAlgorithm : public VRoot
1320{
1321public:
1322 virtual IAlgorithmVariant const& getAlgorithmVariant() const noexcept = 0;
1323 virtual float getTimingMSec() const noexcept = 0;
1324 virtual std::size_t getWorkspaceSize() const noexcept = 0;
1325 virtual IAlgorithmIOInfo const* getAlgorithmIOInfoByIndex(int32_t index) const noexcept = 0;
1326};
1327
1328
1329class VTimingCache : public VRoot
1330{
1331public:
1332 virtual nvinfer1::IHostMemory* serialize() const noexcept = 0;
1333 virtual bool combine(ITimingCache const& inputCache, bool ignoreMismatch) noexcept = 0;
1334 virtual bool reset() noexcept = 0;
1335 virtual int64_t queryKeys(TimingCacheKey* keyBuffer, int64_t capacity) const noexcept = 0;
1336 virtual TimingCacheValue query(TimingCacheKey const& key) const noexcept = 0;
1337 virtual bool update(TimingCacheKey const& key, TimingCacheValue const& value) noexcept = 0;
1338};
1339
1340class VBuilderConfig : public VRoot
1341{
1342public:
1343 virtual void setAvgTimingIterations(int32_t avgTiming) noexcept = 0;
1344 virtual int32_t getAvgTimingIterations() const noexcept = 0;
1345 virtual void setEngineCapability(EngineCapability capability) noexcept = 0;
1346 virtual EngineCapability getEngineCapability() const noexcept = 0;
1347 virtual void setInt8Calibrator(IInt8Calibrator* calibrator) noexcept = 0;
1348 virtual IInt8Calibrator* getInt8Calibrator() const noexcept = 0;
1349 virtual void setFlags(BuilderFlags builderFlags) noexcept = 0;
1350 virtual BuilderFlags getFlags() const noexcept = 0;
1351 virtual void clearFlag(BuilderFlag builderFlag) noexcept = 0;
1352 virtual void setFlag(BuilderFlag builderFlag) noexcept = 0;
1353 virtual bool getFlag(BuilderFlag builderFlag) const noexcept = 0;
1354 virtual void setDeviceType(ILayer const* layer, DeviceType deviceType) noexcept = 0;
1355 virtual DeviceType getDeviceType(ILayer const* layer) const noexcept = 0;
1356 virtual bool isDeviceTypeSet(ILayer const* layer) const noexcept = 0;
1357 virtual void resetDeviceType(ILayer const* layer) noexcept = 0;
1358 virtual bool canRunOnDLA(ILayer const* layer) const noexcept = 0;
1359 virtual void setDLACore(int32_t dlaCore) noexcept = 0;
1360 virtual int32_t getDLACore() const noexcept = 0;
1361 virtual void setDefaultDeviceType(DeviceType deviceType) noexcept = 0;
1362 virtual DeviceType getDefaultDeviceType() const noexcept = 0;
1363 virtual void reset() noexcept = 0;
1364 virtual void setProfileStream(const cudaStream_t stream) noexcept = 0;
1365 virtual cudaStream_t getProfileStream() const noexcept = 0;
1366 virtual int32_t addOptimizationProfile(IOptimizationProfile const* profile) noexcept = 0;
1367 virtual int32_t getNbOptimizationProfiles() const noexcept = 0;
1368 virtual void setProfilingVerbosity(ProfilingVerbosity verbosity) noexcept = 0;
1369 virtual ProfilingVerbosity getProfilingVerbosity() const noexcept = 0;
1370 virtual void setAlgorithmSelector(IAlgorithmSelector* selector) noexcept = 0;
1371 virtual IAlgorithmSelector* getAlgorithmSelector() const noexcept = 0;
1372 virtual bool setCalibrationProfile(IOptimizationProfile const* profile) noexcept = 0;
1373 virtual IOptimizationProfile const* getCalibrationProfile() noexcept = 0;
1374 virtual void setQuantizationFlags(QuantizationFlags flags) noexcept = 0;
1375 virtual QuantizationFlags getQuantizationFlags() const noexcept = 0;
1376 virtual void clearQuantizationFlag(QuantizationFlag flag) noexcept = 0;
1377 virtual void setQuantizationFlag(QuantizationFlag flag) noexcept = 0;
1378 virtual bool getQuantizationFlag(QuantizationFlag flag) const noexcept = 0;
1379 virtual bool setTacticSources(TacticSources tacticSources) noexcept = 0;
1380 virtual TacticSources getTacticSources() const noexcept = 0;
1381 virtual nvinfer1::ITimingCache* createTimingCache(void const* blob, std::size_t size) const noexcept = 0;
1382 virtual bool setTimingCache(ITimingCache const& cache, bool ignoreMismatch) noexcept = 0;
1383 virtual nvinfer1::ITimingCache const* getTimingCache() const noexcept = 0;
1384 virtual void setMemoryPoolLimit(MemoryPoolType pool, std::size_t poolSize) noexcept = 0;
1385 virtual std::size_t getMemoryPoolLimit(MemoryPoolType pool) const noexcept = 0;
1386 virtual void setPreviewFeature(PreviewFeature feature, bool enable) noexcept = 0;
1387 virtual bool getPreviewFeature(PreviewFeature feature) const noexcept = 0;
1388 virtual void setBuilderOptimizationLevel(int32_t level) noexcept = 0;
1389 virtual int32_t getBuilderOptimizationLevel() const noexcept = 0;
1390 virtual void setHardwareCompatibilityLevel(HardwareCompatibilityLevel hardwareCompatibilityLevel) noexcept = 0;
1391 virtual HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept = 0;
1392 virtual void setPluginsToSerialize(char const* const* paths, int32_t nbPaths) noexcept = 0;
1393 virtual char const* getPluginToSerialize(int32_t index) const noexcept = 0;
1394 virtual int32_t getNbPluginsToSerialize() const noexcept = 0;
1395 virtual void setMaxAuxStreams(int32_t nbStreams) noexcept = 0;
1396 virtual int32_t getMaxAuxStreams() const noexcept = 0;
1397 virtual void setProgressMonitor(IProgressMonitor* monitor) noexcept = 0;
1398 virtual IProgressMonitor* getProgressMonitor() const noexcept = 0;
1399 virtual void setRuntimePlatform(RuntimePlatform runtimePlatform) noexcept = 0;
1400 virtual RuntimePlatform getRuntimePlatform() const noexcept = 0;
1401 virtual void setMaxNbTactics(int32_t maxTactics) noexcept = 0;
1402 virtual int32_t getMaxNbTactics() const noexcept = 0;
1403 virtual bool setTilingOptimizationLevel(TilingOptimizationLevel level) noexcept = 0;
1404 virtual TilingOptimizationLevel getTilingOptimizationLevel() const noexcept = 0;
1405 virtual bool setL2LimitForTiling(int64_t size) noexcept = 0;
1406 virtual int64_t getL2LimitForTiling() const noexcept = 0;
1407 virtual bool setRemoteAutoTuningConfig(char const* config) noexcept = 0;
1408 virtual char const* getRemoteAutoTuningConfig() const noexcept = 0;
1409};
1410
1411class VSerializationConfig : public VRoot
1412{
1413public:
1414 virtual bool setFlags(SerializationFlags serializationFlags) noexcept = 0;
1415 virtual SerializationFlags getFlags() const noexcept = 0;
1416 virtual bool clearFlag(SerializationFlag serializationFlag) noexcept = 0;
1417 virtual bool setFlag(SerializationFlag serializationFlag) noexcept = 0;
1418 virtual bool getFlag(SerializationFlag serializationFlag) const noexcept = 0;
1419};
1420
1421class VBuilder : public VRoot
1422{
1423public:
1424 virtual bool platformHasFastFp16() const noexcept = 0;
1425 virtual bool platformHasFastInt8() const noexcept = 0;
1426 virtual int32_t getMaxDLABatchSize() const noexcept = 0;
1427 virtual int32_t getNbDLACores() const noexcept = 0;
1428 virtual void setGpuAllocator(IGpuAllocator* allocator) noexcept = 0;
1429 virtual nvinfer1::IBuilderConfig* createBuilderConfig() noexcept = 0;
1430 virtual nvinfer1::INetworkDefinition* createNetworkV2(NetworkDefinitionCreationFlags flags) noexcept = 0;
1431 virtual nvinfer1::IOptimizationProfile* createOptimizationProfile() noexcept = 0;
1432 virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
1433 virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
1434 virtual void reset() noexcept = 0;
1435 virtual bool platformHasTf32() const noexcept = 0;
1436 virtual nvinfer1::IHostMemory* buildSerializedNetwork(
1437 INetworkDefinition& network, IBuilderConfig& config) noexcept = 0;
1438 virtual bool isNetworkSupported(INetworkDefinition const& network, IBuilderConfig const& config) const noexcept = 0;
1439 virtual ILogger* getLogger() const noexcept = 0;
1440 virtual bool setMaxThreads(int32_t maxThreads) noexcept = 0;
1441 virtual int32_t getMaxThreads() const noexcept = 0;
1442 virtual IPluginRegistry& getPluginRegistry() noexcept = 0;
1443 virtual ICudaEngine* buildEngineWithConfig(INetworkDefinition& network, IBuilderConfig& config) noexcept = 0;
1444 virtual bool buildSerializedNetworkToStream(
1445 INetworkDefinition& network, IBuilderConfig& config, IStreamWriter& writer) noexcept = 0;
1446 virtual nvinfer1::IHostMemory* buildSerializedNetworkWithKernelText(
1447 INetworkDefinition& network, IBuilderConfig& config, IHostMemory*& kernelText) noexcept
1448 = 0;
1449};
1450
1451class VRuntimeConfig : public VRoot
1452{
1453public:
1454 virtual IRuntimeConfig* getPImpl() noexcept = 0;
1455 virtual void setExecutionContextAllocationStrategy(ExecutionContextAllocationStrategy strategy) noexcept = 0;
1456 virtual ExecutionContextAllocationStrategy getExecutionContextAllocationStrategy() const noexcept = 0;
1457};
1458
1459
1460class VDistCollectiveLayer : public VRoot
1461{
1462}; // VDistCollectiveLayer
1463
1464} // namespace apiv
1465} // namespace nvinfer1
1466
1467// @endcond
1468
1469#endif // NV_INFER_RUNTIME_IMPL_H
TENSORRTAPI nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
TENSORRTAPI nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
#define TRT_NODISCARD
A stand-in for [[nodiscard]] and [[nodiscard(REASON)]] that works with older compilers.
Definition: NvInferRuntimeBase.h:57
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:42
Structure to define the dimensions of a tensor.
Interface implemented by application for selecting and reporting algorithms of a layer provided by th...
User-implemented callback for notification when value of a debug tensor is updated.
Reference counted application-implemented error reporting interface for TensorRT objects.
Application-implemented class for controlling allocation on the GPU.
Callback from ExecutionContext::enqueueV3()
Plugin class for the V3 generation of user-implemented layers.
Application-implemented interface for profiling.
Application-implemented progress reporting interface for TensorRT.
Application-implemented class for reading data in a stream-based manner.
Application-implemented class for reading data in a stream-based manner asynchronously....
Application-implemented class for writing data in a stream-based manner.
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:142
The TensorRT API version 1 namespace.
Definition: NvInferSafePlugin.h:33
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:2961
v_1_0::IOutputAllocator IOutputAllocator
Definition: NvInferRuntime.h:4264
ResizeSelector
The coordinate selector when resize to single pixel output.
Definition: NvInfer.h:4181
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:76
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:179
MemoryPoolType
The type for memory pools used by TensorRT.
Definition: NvInfer.h:10841
ScaleMode
Controls how shift, scale and power are applied in a Scale layer.
Definition: NvInfer.h:1889
RuntimePlatform
Describes the intended runtime platform (operating system and CPU architecture) for the execution of ...
Definition: NvInfer.h:10421
uint32_t QuantizationFlags
Represents one or more QuantizationFlag values using binary OR operations.
Definition: NvInfer.h:10373
TensorIOMode
Definition of tensor IO Mode.
Definition: NvInferRuntimeBase.h:659
HardwareCompatibilityLevel
Describes requirements of compatibility with GPU architectures other than that of the GPU on which th...
Definition: NvInfer.h:10969
CumulativeOperation
Enumerates the cumulative operations that may be performed by a Cumulative layer.
Definition: NvInfer.h:6790
SerializationFlag
List of valid flags that the engine can enable when serializing the bytes.
Definition: NvInferRuntime.h:3002
BoundingBoxFormat
Representation of bounding box data used for the Boxes input tensor in INMSLayer.
Definition: NvInfer.h:6328
UnaryOperation
Enumerates the unary operations that may be performed by a Unary layer.
Definition: NvInfer.h:2834
v_1_0::IStreamWriter IStreamWriter
Definition: NvInferRuntime.h:720
v_1_0::IProfiler IProfiler
Definition: NvInferRuntime.h:1318
v_1_0::IAlgorithmSelector IAlgorithmSelector
Definition: NvInfer.h:10365
v_1_0::IStreamReaderV2 IStreamReaderV2
Definition: NvInferRuntime.h:797
ActivationType
Enumerates the types of activation to perform in an activation layer.
Definition: NvInfer.h:143
uint32_t TempfileControlFlags
Represents a collection of one or more TempfileControlFlag values combined using bitwise-OR operation...
Definition: NvInferRuntime.h:1396
FillOperation
Enumerates the tensor fill operations that may performed by a fill layer.
Definition: NvInfer.h:5162
EngineStat
The kind of engine statistics that queried from the ICudaEngine.
Definition: NvInferRuntime.h:3170
ResizeRoundMode
The rounding mode for nearest neighbor resize.
Definition: NvInfer.h:4211
PaddingMode
Enumerates the modes of padding to perform in convolution, deconvolution and pooling layer,...
Definition: NvInfer.h:1067
TripLimit
Enum that describes kinds of trip limits.
Definition: NvInfer.h:4570
uint32_t NetworkDefinitionCreationFlags
Represents one or more NetworkDefinitionCreationFlag flags using binary OR operations....
Definition: NvInfer.h:12103
PreviewFeature
Define preview features.
Definition: NvInfer.h:10916
TilingOptimizationLevel
Define the optimization levels for Tiling.
Definition: NvInfer.h:11022
DataType
The type of weights and tensors. The datatypes other than kBOOL, kINT32, and kINT64 are "activation d...
Definition: NvInferRuntimeBase.h:146
uint32_t BuilderFlags
Represents one or more BuilderFlag values using binary OR operations, e.g., 1U << BuilderFlag::kFP16 ...
Definition: NvInfer.h:10453
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:1350
CalibrationAlgoType
Version of calibration algorithm to use.
Definition: NvInfer.h:9799
LayerType
The type values of layer classes.
Definition: NvInfer.h:58
SampleMode
Controls how ISliceLayer and IGridSample handle out-of-bounds coordinates.
Definition: NvInfer.h:3325
GatherMode
Control form of IGatherLayer.
Definition: NvInfer.h:2581
v_1_0::IDebugListener IDebugListener
Definition: NvInferRuntime.h:4305
MoEActType
Enumerates the activation type for the MoE layer.
Definition: NvInfer.h:7554
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:1328
uint32_t TensorFormats
It is capable of representing one or more TensorFormat by binary OR operations, e....
Definition: NvInfer.h:135
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:2973
NetworkDefinitionCreationFlag
List of immutable network properties expressed at network creation time. NetworkDefinitionCreationFla...
Definition: NvInfer.h:12114
ElementWiseOperation
Enumerates the binary operations that may be performed by an ElementWise layer.
Definition: NvInfer.h:2491
QuantizationFlag
List of valid flags for quantizing the network to int8.
Definition: NvInfer.h:10385
CollectiveOperation
Enumerates the collective operations that may be performed by a DistCollective layer.
Definition: NvInfer.h:2962
uint32_t SerializationFlags
Represents one or more SerializationFlag values using binary OR operations, e.g., 1U << Serialization...
Definition: NvInferRuntime.h:2992
InterpolationMode
Enumerates various modes of interpolation.
Definition: NvInfer.h:4099
BuilderFlag
List of valid modes that the builder can enable when creating an engine from a network definition.
Definition: NvInfer.h:10463
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntime.h:1430
ExecutionContextAllocationStrategy
Different memory allocation behaviors for IExecutionContext.
Definition: NvInferRuntime.h:3107
TopKOperation
Enumerates the operations that may be performed by a TopK layer.
Definition: NvInfer.h:3606
ReduceOperation
Enumerates the reduce operations that may be performed by a Reduce layer.
Definition: NvInfer.h:2934
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:5279
ScatterMode
Control form of IScatterLayer.
Definition: NvInfer.h:6062
MatrixOperation
Enumerates the operations that may be performed on a tensor by IMatrixMultiplyLayer before multiplica...
Definition: NvInfer.h:3759
ResizeCoordinateTransformation
The resize coordinate transformation function.
Definition: NvInfer.h:4127
LoopOutput
Enum that describes kinds of loop outputs.
Definition: NvInfer.h:4542
KVCacheMode
Enumerates the KVCache modes that may be performed by a KVCacheUpdate layer.
Definition: NvInfer.h:7466
v_1_0::IStreamReader IStreamReader
Definition: NvInferRuntime.h:710
PoolingType
The type of pooling to perform in a pooling layer.
Definition: NvInfer.h:1503
v_1_0::IProgressMonitor IProgressMonitor
Definition: NvInfer.h:11138
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:204
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:2635
AttentionNormalizationOp
Enumerates the operations that may be performed by the normalization in the attention subgraph.
Definition: NvInfer.h:6925
nvinfer1::IPluginV3 IPluginV3
Definition: NvInferForwardDecl.h:47

  Copyright © 2024 NVIDIA Corporation
  Privacy Policy | Manage My Privacy | Do Not Sell or Share My Data | Terms of Service | Accessibility | Corporate Policies | Product Security | Contact