18#ifndef NV_INFER_IMPL_H
19#define NV_INFER_IMPL_H
56class IActivationLayer;
61class IConcatenationLayer;
64class IConvolutionLayer;
66class ICumulativeLayer;
67class IDeconvolutionLayer;
68class IDequantizeLayer;
70class IDynamicQuantizeLayer;
72class IElementWiseLayer;
73class IEngineInspector;
74class IExecutionContext;
77class IGridSampleLayer;
82class IIfConditionalInputLayer;
83class IIfConditionalOutputLayer;
85class IKVCacheUpdateLayer;
88class ILoopOutputLayer;
90class IMatrixMultiplyLayer;
92class IDistCollectiveLayer;
93class INetworkDefinition;
94class INormalizationLayer;
98class IOptimizationProfile;
100class IParametricReLULayer;
105class IPluginRegistry;
107class IRotaryEmbeddingLayer;
133class IRaggedSoftMaxLayer;
134class IRecurrenceLayer;
138class IReverseSequenceLayer;
143class ISerializationConfig;
153struct TimingCacheKey;
154struct TimingCacheValue;
156using TimingCacheKey = v_1_0::TimingCacheKey;
157using TimingCacheValue = v_1_0::TimingCacheValue;
161class ITripLimitLayer;
163class IUnsqueezeLayer;
190enum class QuantizationFlag : int32_t;
223using QuantizationFlags = uint32_t;
241 virtual ~VRoot() noexcept = default;
244class VHostMemory : public VRoot
247 virtual void* data() const noexcept = 0;
248 virtual std::
size_t size() const noexcept = 0;
249 virtual
DataType type() const noexcept = 0;
252class VDimensionExpr : public VRoot
255 virtual bool isConstant()
const = 0;
256 virtual int64_t getConstantValue()
const = 0;
257 virtual bool isSizeTensor()
const = 0;
260class VExprBuilder :
public VRoot
263 virtual IDimensionExpr
const* constant(int64_t value) = 0;
264 virtual IDimensionExpr
const* operation(
267 virtual IDimensionExpr
const* declareSizeTensor(
268 int32_t outputIndex, IDimensionExpr
const& opt, IDimensionExpr
const& upper)
272class VRuntime :
public VRoot
275 virtual IRuntime* getPImpl() noexcept = 0;
276 virtual
nvinfer1::ICudaEngine* deserializeCudaEngine(
void const* blob, std::
size_t size) noexcept = 0;
278 virtual
void setDLACore(int32_t dlaCore) noexcept = 0;
279 virtual int32_t getDLACore() const noexcept = 0;
280 virtual int32_t getNbDLACores() const noexcept = 0;
281 virtual
void setGpuAllocator(
IGpuAllocator* allocator) noexcept = 0;
282 virtual
void setErrorRecorder(
IErrorRecorder* recorder) noexcept = 0;
284 virtual ILogger*
getLogger() const noexcept = 0;
285 virtual
bool setMaxThreads(int32_t maxThreads) noexcept = 0;
286 virtual int32_t getMaxThreads() const noexcept = 0;
287 virtual
void setTemporaryDirectory(
char const*) noexcept = 0;
288 virtual
char const* getTemporaryDirectory() const noexcept = 0;
292 virtual
void setPluginRegistryParent(IPluginRegistry* parent) noexcept = 0;
293 virtual IRuntime* loadRuntime(
char const* path) noexcept = 0;
294 virtual
void setEngineHostCodeAllowed(
bool allowed) noexcept = 0;
295 virtual
bool getEngineHostCodeAllowed() const noexcept = 0;
298 virtual int64_t getEngineHeaderSize() const noexcept = 0;
299 virtual
EngineValidity getEngineValidity(
void const* blob, int64_t blobSize, uint64_t* diagnostics) const noexcept = 0;
302class VRefitter : public VRoot
305 virtual IRefitter* getPImpl() noexcept = 0;
306 virtual
bool setWeights(
char const* layerName,
WeightsRole role, const Weights weights) noexcept = 0;
307 virtual
bool refitCudaEngine() noexcept = 0;
308 virtual int32_t getMissing(int32_t size,
char const** layerNames,
WeightsRole* roles) noexcept = 0;
309 virtual int32_t getAll(int32_t size,
char const** layerNames,
WeightsRole* roles) noexcept = 0;
310 virtual
bool setDynamicRange(
char const* tensorName,
float min,
float max) noexcept = 0;
311 virtual
float getDynamicRangeMin(
char const* tensorName) const noexcept = 0;
312 virtual
float getDynamicRangeMax(
char const* tensorName) const noexcept = 0;
313 virtual int32_t getTensorsWithDynamicRange(int32_t size,
char const** tensorNames) const noexcept = 0;
314 virtual
void setErrorRecorder(
IErrorRecorder* recorder) noexcept = 0;
316 virtual
bool setNamedWeights(
char const* name, Weights weights) noexcept = 0;
317 virtual int32_t getMissingWeights(int32_t size,
char const** weightsNames) noexcept = 0;
318 virtual int32_t getAllWeights(int32_t size,
char const** weightsNames) noexcept = 0;
319 virtual ILogger*
getLogger() const noexcept = 0;
320 virtual
bool setMaxThreads(int32_t maxThreads) noexcept = 0;
321 virtual int32_t getMaxThreads() const noexcept = 0;
322 virtual
bool setNamedWeightsWithLocation(
char const* name, Weights weights,
TensorLocation location) noexcept = 0;
323 virtual Weights getNamedWeights(
char const* weightsName) const noexcept = 0;
324 virtual
TensorLocation getWeightsLocation(
char const* weightsName) const noexcept = 0;
325 virtual
bool unsetNamedWeights(
char const* weightsName) noexcept = 0;
326 virtual
void setWeightsValidation(
bool weightsValidation) noexcept = 0;
327 virtual
bool getWeightsValidation() const noexcept = 0;
328 virtual
bool refitCudaEngineAsync(cudaStream_t stream) noexcept = 0;
329 virtual Weights getWeightsPrototype(
char const* weightsName) const noexcept = 0;
332class VOptimizationProfile : public VRoot
335 virtual bool setDimensions(
char const* inputName,
OptProfileSelector select,
Dims const& dims)
noexcept = 0;
337 virtual bool setShapeValues(
338 char const* inputName,
OptProfileSelector select, int32_t
const* values, int32_t nbValues)
noexcept = 0;
339 virtual int32_t getNbShapeValues(
char const* inputName)
const noexcept = 0;
340 virtual int32_t
const* getShapeValues(
char const* inputName,
OptProfileSelector select)
const noexcept = 0;
341 virtual bool setExtraMemoryTarget(
float target)
noexcept = 0;
342 virtual float getExtraMemoryTarget() const noexcept = 0;
343 virtual
bool isValid() const noexcept = 0;
346 char const* inputName,
OptProfileSelector select, int64_t const* values, int32_t nbValues) noexcept = 0;
351class VCudaEngine : public VRoot
354 virtual ICudaEngine* getPImpl() noexcept = 0;
355 virtual int32_t getNbLayers() const noexcept = 0;
356 virtual IHostMemory* serialize() const noexcept = 0;
358 virtual
bool isRefittable() const noexcept = 0;
359 virtual
char const* getName() const noexcept = 0;
360 virtual int32_t getNbOptimizationProfiles() const noexcept = 0;
361 virtual int32_t const* getProfileTensorValues(
362 char const* tensorName, int32_t profileIndex,
OptProfileSelector select) const noexcept = 0;
364 virtual
void setErrorRecorder(
IErrorRecorder* recorder) noexcept = 0;
366 virtual
bool hasImplicitBatchDimension() const noexcept = 0;
369 virtual IEngineInspector* createEngineInspector() const noexcept = 0;
370 virtual
Dims getTensorShape(
char const* tensorName) const noexcept = 0;
371 virtual
DataType getTensorDataType(
char const* tensorName) const noexcept = 0;
372 virtual
TensorLocation getTensorLocation(
char const* tensorName) const noexcept = 0;
373 virtual
bool isShapeInferenceIO(
char const* tensorName) const noexcept = 0;
374 virtual
TensorIOMode getTensorIOMode(
char const* tensorName) const noexcept = 0;
375 virtual int32_t getTensorBytesPerComponent(
char const* tensorName) const noexcept = 0;
376 virtual int32_t getTensorComponentsPerElement(
char const* tensorName) const noexcept = 0;
377 virtual
TensorFormat getTensorFormat(
char const* tensorName) const noexcept = 0;
378 virtual
char const* getTensorFormatDesc(
char const* tensorName) const noexcept = 0;
379 virtual int32_t getTensorVectorizedDim(
char const* tensorName) const noexcept = 0;
380 virtual
Dims getProfileShape(
381 char const* tensorName, int32_t profileIndex,
OptProfileSelector select) const noexcept = 0;
382 virtual int32_t getNbIOTensors() const noexcept = 0;
383 virtual
char const* getIOTensorName(int32_t index) const noexcept = 0;
385 virtual int32_t getNbAuxStreams() const noexcept = 0;
387 virtual int32_t getTensorBytesPerComponentV2(
char const* tensorName, int32_t profileIndex) const noexcept = 0;
388 virtual int32_t getTensorComponentsPerElementV2(
char const* tensorName, int32_t profileIndex) const noexcept = 0;
389 virtual
TensorFormat getTensorFormatV2(
char const* tensorName, int32_t profileIndex) const noexcept = 0;
390 virtual
char const* getTensorFormatDescV2(
char const* tensorName, int32_t profileIndex) const noexcept = 0;
391 virtual int32_t getTensorVectorizedDimV2(
char const* tensorName, int32_t profileIndex) const noexcept = 0;
393 virtual ISerializationConfig* createSerializationConfig() noexcept = 0;
394 virtual IHostMemory* serializeWithConfig(ISerializationConfig& config) const noexcept = 0;
396 virtual
size_t getDeviceMemorySizeForProfile(int32_t profileIndex) const noexcept = 0;
397 virtual IRefitter* createRefitter(ILogger& logger) noexcept = 0;
399 virtual
bool setWeightStreamingBudget(int64_t gpuMemoryBudget) noexcept = 0;
400 virtual int64_t getWeightStreamingBudget() const noexcept = 0;
401 virtual int64_t getMinimumWeightStreamingBudget() const noexcept = 0;
402 virtual int64_t getStreamableWeightsSize() const noexcept = 0;
404 virtual
bool isDebugTensor(
char const* name) const noexcept = 0;
407 virtual
bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept = 0;
408 virtual int64_t getWeightStreamingBudgetV2() const noexcept = 0;
409 virtual int64_t getWeightStreamingAutomaticBudget() const noexcept = 0;
410 virtual int64_t getWeightStreamingScratchMemorySize() const noexcept = 0;
411 virtual int64_t getDeviceMemorySizeV2() const noexcept = 0;
412 virtual int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept = 0;
414 TRT_NODISCARD virtual int64_t const* getProfileTensorValuesV2(
415 char const* tensorName, int32_t profileIndex,
OptProfileSelector select) const noexcept = 0;
416 TRT_NODISCARD virtual IExecutionContext* createExecutionContextWithRuntimeConfig(
417 IRuntimeConfig* runtimeConfig) noexcept = 0;
418 TRT_NODISCARD virtual IRuntimeConfig* createRuntimeConfig() noexcept = 0;
421 TRT_NODISCARD virtual
char const* getAliasedInputTensor(
char const* tensorName) const noexcept = 0;
424class VExecutionContext : public VRoot
427 virtual IExecutionContext* getPImpl() noexcept = 0;
428 virtual
void setDebugSync(
bool sync) noexcept = 0;
429 virtual
bool getDebugSync() const noexcept = 0;
430 virtual
void setProfiler(
IProfiler*) noexcept = 0;
431 virtual
IProfiler* getProfiler() const noexcept = 0;
432 virtual ICudaEngine const& getEngine() const noexcept = 0;
433 virtual
void setName(
char const* name) noexcept = 0;
434 virtual
char const* getName() const noexcept = 0;
435 virtual
void setDeviceMemory(
void* memory) noexcept = 0;
436 virtual int32_t getOptimizationProfile() const noexcept = 0;
437 virtual
bool allInputDimensionsSpecified() const noexcept = 0;
438 virtual
bool allInputShapesSpecified() const noexcept = 0;
439 virtual
void setErrorRecorder(
IErrorRecorder* recorder) noexcept = 0;
441 virtual
bool executeV2(
void* const* bindings) noexcept = 0;
442 virtual
bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept = 0;
443 virtual
void setEnqueueEmitsProfile(
bool enqueueEmitsProfile) noexcept = 0;
444 virtual
bool getEnqueueEmitsProfile() const noexcept = 0;
445 virtual
bool reportToProfiler() const noexcept = 0;
446 virtual
bool setInputShape(
char const* tensorName,
Dims const& dims) noexcept = 0;
447 virtual
Dims getTensorShape(
char const* tensorName) const noexcept = 0;
448 virtual
Dims getTensorStrides(
char const* tensorName) const noexcept = 0;
449 virtual
bool setTensorAddress(
char const* tensorName,
void* data) noexcept = 0;
450 virtual
void const* getTensorAddress(
char const* tensorName) const noexcept = 0;
451 virtual
bool setInputTensorAddress(
char const* tensorName,
void const* data) noexcept = 0;
452 virtual
bool setOutputTensorAddress(
char const* tensorName,
void* data) noexcept = 0;
453 virtual int32_t inferShapes(int32_t nbMaxNames,
char const** tensorNames) noexcept = 0;
454 virtual
bool setInputConsumedEvent(cudaEvent_t event) noexcept = 0;
455 virtual cudaEvent_t getInputConsumedEvent() const noexcept = 0;
456 virtual
void* getOutputTensorAddress(
char const* tensorName) const noexcept = 0;
457 virtual
bool setOutputAllocator(
char const* tensorName,
IOutputAllocator* outputAllocator) noexcept = 0;
458 virtual
IOutputAllocator* getOutputAllocator(
char const* name) noexcept = 0;
459 virtual int64_t getMaxOutputSize(
char const* tensorName) const noexcept = 0;
460 virtual
bool setTemporaryStorageAllocator(
IGpuAllocator* allocator) noexcept = 0;
461 virtual
IGpuAllocator* getTemporaryStorageAllocator() const noexcept = 0;
462 virtual
bool enqueueV3(cudaStream_t stream) noexcept = 0;
463 virtual
void setPersistentCacheLimit(
size_t size) noexcept = 0;
464 virtual
size_t getPersistentCacheLimit() const noexcept = 0;
467 virtual
void setAuxStreams(cudaStream_t* auxStreams, int32_t nbStreams) noexcept = 0;
468 virtual
bool setDebugListener(
IDebugListener* listener) noexcept = 0;
470 virtual
bool setTensorDebugState(
char const* name,
bool flag) noexcept = 0;
471 virtual
bool getDebugState(
char const* name) const noexcept = 0;
472 virtual
bool setAllTensorsDebugState(
bool flag) noexcept = 0;
473 virtual
size_t updateDeviceMemorySizeForShapes() noexcept = 0;
474 virtual
void setDeviceMemoryV2(
void* memory, int64_t size) noexcept = 0;
475 TRT_NODISCARD virtual IRuntimeConfig* getRuntimeConfig() const noexcept = 0;
476 virtual
bool setUnfusedTensorsDebugState(
bool flag) noexcept = 0;
477 virtual
bool getUnfusedTensorsDebugState() const noexcept = 0;
478#if ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
479 virtual bool isStreamCapturable(cudaStream_t stream)
const noexcept = 0;
481 virtual bool setCommunicator(
void* communicator)
noexcept = 0;
484class VEngineInspector :
public VRoot
487 virtual IEngineInspector* getPImpl() noexcept = 0;
488 virtual
bool setExecutionContext(IExecutionContext const* context) noexcept = 0;
489 virtual IExecutionContext const* getExecutionContext() const noexcept = 0;
490 virtual
char const* getLayerInformation(int32_t layerIndex,
LayerInformationFormat format) const noexcept = 0;
492 virtual
void setErrorRecorder(
IErrorRecorder* recorder) noexcept = 0;
496class VTensor : public VRoot
499 virtual void setName(
char const* name)
noexcept = 0;
500 virtual char const* getName() const noexcept = 0;
501 virtual
void setDimensions(
Dims const& dimensions) noexcept = 0;
502 virtual
Dims getDimensions() const noexcept = 0;
503 virtual
void setType(
DataType type) noexcept = 0;
504 virtual
DataType getType() const noexcept = 0;
505 virtual
bool isNetworkInput() const noexcept = 0;
506 virtual
bool isNetworkOutput() const noexcept = 0;
507 virtual
void setAllowedFormats(
TensorFormats formats) noexcept = 0;
508 virtual
TensorFormats getAllowedFormats() const noexcept = 0;
509 virtual
bool isShapeTensor() const noexcept = 0;
510 virtual
bool isExecutionTensor() const noexcept = 0;
511 virtual
void setDimensionName(int32_t index,
char const* name) noexcept = 0;
512 virtual
char const* getDimensionName(int32_t index) const noexcept = 0;
515class VLayer : public VRoot
518 virtual LayerType getType() const noexcept = 0;
519 virtual
void setName(
char const* name) noexcept = 0;
520 virtual
char const* getName() const noexcept = 0;
521 virtual int32_t getNbInputs() const noexcept = 0;
522 virtual ITensor* getInput(int32_t index) const noexcept = 0;
523 virtual int32_t getNbOutputs() const noexcept = 0;
524 virtual ITensor* getOutput(int32_t index) const noexcept = 0;
525 virtual
void setInput(int32_t index, ITensor& tensor) noexcept = 0;
526 virtual
void setPrecision(
DataType dataType) noexcept = 0;
527 virtual
DataType getPrecision() const noexcept = 0;
528 virtual
bool precisionIsSet() const noexcept = 0;
529 virtual
void resetPrecision() noexcept = 0;
530 virtual
void setOutputType(int32_t index,
DataType dataType) noexcept = 0;
531 virtual
DataType getOutputType(int32_t index) const noexcept = 0;
532 virtual
bool outputTypeIsSet(int32_t index) const noexcept = 0;
533 virtual
void resetOutputType(int32_t index) noexcept = 0;
534 virtual
void setMetadata(
char const* docString) noexcept = 0;
535 virtual
char const* getMetadata() const noexcept = 0;
536 virtual
bool setNbRanks(int32_t nbRanks) noexcept = 0;
537 virtual int32_t getNbRanks() const noexcept = 0;
540class VConvolutionLayer : public VRoot
543 virtual void setNbOutputMaps(int64_t nbOutputMaps)
noexcept = 0;
544 virtual int64_t getNbOutputMaps() const noexcept = 0;
545 virtual
void setNbGroups(int64_t nbGroups) noexcept = 0;
546 virtual int64_t getNbGroups() const noexcept = 0;
547 virtual
void setKernelWeights(Weights weights) noexcept = 0;
548 virtual Weights getKernelWeights() const noexcept = 0;
549 virtual
void setBiasWeights(Weights weights) noexcept = 0;
550 virtual Weights getBiasWeights() const noexcept = 0;
551 virtual
void setPrePadding(
Dims const& padding) noexcept = 0;
552 virtual
Dims getPrePadding() const noexcept = 0;
553 virtual
void setPostPadding(
Dims const& padding) noexcept = 0;
554 virtual
Dims getPostPadding() const noexcept = 0;
555 virtual
void setPaddingMode(
PaddingMode paddingMode) noexcept = 0;
556 virtual
PaddingMode getPaddingMode() const noexcept = 0;
557 virtual
void setKernelSizeNd(
Dims const& kernelSize) noexcept = 0;
558 virtual
Dims getKernelSizeNd() const noexcept = 0;
559 virtual
void setStrideNd(
Dims const& stride) noexcept = 0;
560 virtual
Dims getStrideNd() const noexcept = 0;
561 virtual
void setPaddingNd(
Dims const& padding) noexcept = 0;
562 virtual
Dims getPaddingNd() const noexcept = 0;
563 virtual
void setDilationNd(
Dims const& dilation) noexcept = 0;
564 virtual
Dims getDilationNd() const noexcept = 0;
567class VActivationLayer : public VRoot
572 virtual
void setAlpha(
float alpha) noexcept = 0;
573 virtual
void setBeta(
float beta) noexcept = 0;
574 virtual
float getAlpha() const noexcept = 0;
575 virtual
float getBeta() const noexcept = 0;
578class VPoolingLayer : public VRoot
581 virtual void setPoolingType(
PoolingType type)
noexcept = 0;
582 virtual PoolingType getPoolingType() const noexcept = 0;
583 virtual
void setBlendFactor(
float blendFactor) noexcept = 0;
584 virtual
float getBlendFactor() const noexcept = 0;
585 virtual
void setAverageCountExcludesPadding(
bool exclusive) noexcept = 0;
586 virtual
bool getAverageCountExcludesPadding() const noexcept = 0;
587 virtual
void setPrePadding(
Dims const& padding) noexcept = 0;
588 virtual
Dims getPrePadding() const noexcept = 0;
589 virtual
void setPostPadding(
Dims const& padding) noexcept = 0;
590 virtual
Dims getPostPadding() const noexcept = 0;
591 virtual
void setPaddingMode(
PaddingMode paddingMode) noexcept = 0;
592 virtual
PaddingMode getPaddingMode() const noexcept = 0;
593 virtual
void setWindowSizeNd(
Dims const& windowSize) noexcept = 0;
594 virtual
Dims getWindowSizeNd() const noexcept = 0;
595 virtual
void setStrideNd(
Dims const& stride) noexcept = 0;
596 virtual
Dims getStrideNd() const noexcept = 0;
597 virtual
void setPaddingNd(
Dims const& padding) noexcept = 0;
598 virtual
Dims getPaddingNd() const noexcept = 0;
601class VLRNLayer : public VRoot
604 virtual void setWindowSize(int64_t windowSize)
noexcept = 0;
605 virtual int64_t getWindowSize() const noexcept = 0;
606 virtual
void setAlpha(
float alpha) noexcept = 0;
607 virtual
float getAlpha() const noexcept = 0;
608 virtual
void setBeta(
float beta) noexcept = 0;
609 virtual
float getBeta() const noexcept = 0;
610 virtual
void setK(
float k) noexcept = 0;
611 virtual
float getK() const noexcept = 0;
614class VScaleLayer : public VRoot
617 virtual void setMode(
ScaleMode mode)
noexcept = 0;
618 virtual ScaleMode getMode() const noexcept = 0;
619 virtual
void setShift(Weights shift) noexcept = 0;
620 virtual Weights getShift() const noexcept = 0;
621 virtual
void setScale(Weights scale) noexcept = 0;
622 virtual Weights getScale() const noexcept = 0;
623 virtual
void setPower(Weights power) noexcept = 0;
624 virtual Weights getPower() const noexcept = 0;
625 virtual int32_t getChannelAxis() const noexcept = 0;
626 virtual
void setChannelAxis(int32_t channelAxis) noexcept = 0;
629class VSoftMaxLayer : public VRoot
632 virtual void setAxes(uint32_t axes)
noexcept = 0;
633 virtual uint32_t getAxes() const noexcept = 0;
636class VConcatenationLayer : public VRoot
639 virtual void setAxis(int32_t axis)
noexcept = 0;
640 virtual int32_t getAxis() const noexcept = 0;
643class VDeconvolutionLayer : public VRoot
646 virtual void setNbOutputMaps(int64_t nbOutputMaps)
noexcept = 0;
647 virtual int64_t getNbOutputMaps() const noexcept = 0;
648 virtual
void setNbGroups(int64_t nbGroups) noexcept = 0;
649 virtual int64_t getNbGroups() const noexcept = 0;
650 virtual
void setKernelWeights(Weights weights) noexcept = 0;
651 virtual Weights getKernelWeights() const noexcept = 0;
652 virtual
void setBiasWeights(Weights weights) noexcept = 0;
653 virtual Weights getBiasWeights() const noexcept = 0;
654 virtual
void setPrePadding(
Dims const& padding) noexcept = 0;
655 virtual
Dims getPrePadding() const noexcept = 0;
656 virtual
void setPostPadding(
Dims const& padding) noexcept = 0;
657 virtual
Dims getPostPadding() const noexcept = 0;
658 virtual
void setPaddingMode(
PaddingMode paddingMode) noexcept = 0;
659 virtual
PaddingMode getPaddingMode() const noexcept = 0;
660 virtual
void setKernelSizeNd(
Dims const& kernelSize) noexcept = 0;
661 virtual
Dims getKernelSizeNd() const noexcept = 0;
662 virtual
void setStrideNd(
Dims const& stride) noexcept = 0;
663 virtual
Dims getStrideNd() const noexcept = 0;
664 virtual
void setPaddingNd(
Dims const& padding) noexcept = 0;
665 virtual
Dims getPaddingNd() const noexcept = 0;
666 virtual
void setDilationNd(
Dims const& dilation) noexcept = 0;
667 virtual
Dims getDilationNd() const noexcept = 0;
670class VElementWiseLayer : public VRoot
677class VGatherLayer : public VRoot
680 virtual void setGatherAxis(int32_t axis)
noexcept = 0;
681 virtual int32_t getGatherAxis() const noexcept = 0;
682 virtual
void setNbElementWiseDims(int32_t k) noexcept = 0;
683 virtual int32_t getNbElementWiseDims() const noexcept = 0;
684 virtual
void setMode(
GatherMode mode) noexcept = 0;
685 virtual
GatherMode getMode() const noexcept = 0;
688class VPluginLayer : public VRoot
691 virtual IPlugin& getPlugin() noexcept = 0;
694class VPluginV2Layer : public VRoot
697 virtual IPluginV2& getPlugin() noexcept = 0;
700class VPluginV3Layer : public VRoot
703 virtual IPluginV3& getPlugin() noexcept = 0;
706class VUnaryLayer : public VRoot
713class VReduceLayer : public VRoot
718 virtual
void setReduceAxes(uint32_t reduceAxes) noexcept = 0;
719 virtual uint32_t getReduceAxes() const noexcept = 0;
720 virtual
void setKeepDimensions(
bool keepDimensions) noexcept = 0;
721 virtual
bool getKeepDimensions() const noexcept = 0;
724class VPaddingLayer : public VRoot
727 virtual void setPrePaddingNd(
Dims const& padding)
noexcept = 0;
728 virtual Dims getPrePaddingNd() const noexcept = 0;
729 virtual
void setPostPaddingNd(
Dims const& padding) noexcept = 0;
730 virtual
Dims getPostPaddingNd() const noexcept = 0;
733class VShuffleLayer : public VRoot
736 virtual void setFirstTranspose(Permutation
const& permutation)
noexcept = 0;
737 virtual Permutation
const& getFirstTranspose() const noexcept = 0;
738 virtual
void setReshapeDimensions(
Dims const& dimensions) noexcept = 0;
739 virtual
Dims getReshapeDimensions() const noexcept = 0;
740 virtual
void setSecondTranspose(Permutation const& permutation) noexcept = 0;
741 virtual Permutation const& getSecondTranspose() const noexcept = 0;
742 virtual
void setZeroIsPlaceholder(
bool zeroIsPlaceholder) noexcept = 0;
743 virtual
bool getZeroIsPlaceholder() const noexcept = 0;
746class VSliceLayer : public VRoot
749 virtual void setStart(
Dims const& start)
noexcept = 0;
750 virtual Dims getStart() const noexcept = 0;
751 virtual
void setSize(
Dims const& size) noexcept = 0;
752 virtual
Dims getSize() const noexcept = 0;
753 virtual
void setStride(
Dims const& stride) noexcept = 0;
754 virtual
Dims getStride() const noexcept = 0;
755 virtual
void setMode(
SampleMode mode) noexcept = 0;
756 virtual
SampleMode getMode() const noexcept = 0;
757 virtual
void setAxes(
Dims const& axes) noexcept = 0;
758 virtual
Dims getAxes() const noexcept = 0;
761class VShapeLayer : public VRoot
766class VTopKLayer :
public VRoot
771 virtual
void setK(int32_t k) noexcept = 0;
772 virtual int32_t getK() const noexcept = 0;
773 virtual
void setReduceAxes(uint32_t reduceAxes) noexcept = 0;
774 virtual uint32_t getReduceAxes() const noexcept = 0;
775 virtual
bool setIndicesType(
DataType type) noexcept = 0;
776 virtual
DataType getIndicesType() const noexcept = 0;
779class VMatrixMultiplyLayer : public VRoot
782 virtual void setOperation(int32_t index,
MatrixOperation op)
noexcept = 0;
783 virtual MatrixOperation getOperation(int32_t index)
const noexcept = 0;
786class VNonZeroLayer :
public VRoot
789 virtual bool setIndicesType(
DataType type)
noexcept = 0;
790 virtual DataType getIndicesType() const noexcept = 0;
793class VRaggedSoftMaxLayer : public VRoot
798class VIdentityLayer :
public VRoot
803class VCastLayer :
public VRoot
806 virtual void setToType(
DataType toType)
noexcept = 0;
807 virtual DataType getToType() const noexcept = 0;
810class VConstantLayer : public VRoot
813 virtual void setWeights(Weights weights)
noexcept = 0;
814 virtual Weights getWeights() const noexcept = 0;
815 virtual
void setDimensions(
Dims const& dimensions) noexcept = 0;
816 virtual
Dims getDimensions() const noexcept = 0;
819class VParametricReLULayer : public VRoot
824class VResizeLayer :
public VRoot
827 virtual void setOutputDimensions(
Dims const& dimensions)
noexcept = 0;
828 virtual Dims getOutputDimensions() const noexcept = 0;
829 virtual
void setScales(
float const* scales, int32_t nbScales) noexcept = 0;
830 virtual int32_t getScales(int32_t size,
float* scales) const noexcept = 0;
835 virtual
void setSelectorForSinglePixel(
ResizeSelector selector) noexcept = 0;
836 virtual
ResizeSelector getSelectorForSinglePixel() const noexcept = 0;
839 virtual
void setCubicCoeff(
float value) noexcept = 0;
840 virtual
float getCubicCoeff() const noexcept = 0;
841 virtual
void setExcludeOutside(
bool value) noexcept = 0;
842 virtual
bool getExcludeOutside() const noexcept = 0;
845class VLoopBoundaryLayer : public VRoot
848 virtual ILoop* getLoop() const noexcept = 0;
851class VRecurrenceLayer : public VRoot
856class VLoopOutputLayer :
public VRoot
859 virtual LoopOutput getLoopOutput() const noexcept = 0;
860 virtual
void setAxis(int32_t axis) noexcept = 0;
861 virtual int32_t getAxis() const noexcept = 0;
864class VTripLimitLayer : public VRoot
867 virtual TripLimit getTripLimit() const noexcept = 0;
870class VIteratorLayer : public VRoot
873 virtual void setAxis(int32_t axis)
noexcept = 0;
874 virtual int32_t getAxis() const noexcept = 0;
875 virtual
void setReverse(
bool reverse) noexcept = 0;
876 virtual
bool getReverse() const noexcept = 0;
878class VLoop : public VRoot
881 virtual IRecurrenceLayer* addRecurrence(ITensor& initialValue)
noexcept = 0;
882 virtual ITripLimitLayer* addTripLimit(ITensor& tensor,
TripLimit limit)
noexcept = 0;
883 virtual IIteratorLayer* addIterator(ITensor& tensor, int32_t axis = 0,
bool reverse =
false) noexcept = 0;
884 virtual ILoopOutputLayer* addLoopOutput(ITensor& tensor,
LoopOutput outputKind, int32_t axis = 0) noexcept = 0;
885 virtual
void setName(
char const* name) noexcept = 0;
886 virtual
char const* getName() const noexcept = 0;
889class VConditionalBoundaryLayer : public VRoot
892 virtual IIfConditional* getConditional() const noexcept = 0;
895class VConditionLayer : public VRoot
900class VConditionalInputLayer :
public VRoot
905class VConditionalOutputLayer :
public VRoot
910class VIfConditional :
public VRoot
913 virtual IConditionLayer* setCondition(ITensor& tensor)
noexcept = 0;
914 virtual IIfConditionalInputLayer* addInput(ITensor& tensor)
noexcept = 0;
915 virtual IIfConditionalOutputLayer* addOutput(ITensor& trueTensor, ITensor& falseTensor)
noexcept = 0;
916 virtual void setName(
char const* name)
noexcept = 0;
917 virtual char const* getName() const noexcept = 0;
920class VAttentionBoundaryLayer : public VRoot
923 virtual IAttention* getAttention() const noexcept = 0;
926class VAttentionInputLayer : public VRoot
931class VAttentionOutputLayer :
public VRoot
936class VAttention :
public VRoot
939 TRT_NODISCARD virtual bool setInput(int32_t index, ITensor& input)
noexcept = 0;
940 TRT_NODISCARD virtual int32_t getNbInputs() const noexcept = 0;
941 TRT_NODISCARD virtual ITensor* getInput(int32_t index) const noexcept = 0;
942 TRT_NODISCARD virtual int32_t getNbOutputs() const noexcept = 0;
943 TRT_NODISCARD virtual ITensor* getOutput(int32_t index) const noexcept = 0;
944 TRT_NODISCARD virtual
bool setName(
char const* name) noexcept = 0;
945 TRT_NODISCARD virtual
char const* getName() const noexcept = 0;
948 TRT_NODISCARD virtual
bool setCausal(
bool isCausal) noexcept = 0;
950 TRT_NODISCARD virtual
bool setMask(ITensor& mask) noexcept = 0;
952 TRT_NODISCARD virtual
bool setDecomposable(
bool decomposable) noexcept = 0;
953 TRT_NODISCARD virtual
bool getDecomposable() const noexcept = 0;
954 TRT_NODISCARD virtual
bool setNormalizationQuantizeScale(ITensor& tensor) noexcept = 0;
955 TRT_NODISCARD virtual ITensor* getNormalizationQuantizeScale() const noexcept = 0;
958 TRT_NODISCARD virtual
bool setMetadata(
char const* docString) noexcept = 0;
959 TRT_NODISCARD virtual
char const* getMetadata() const noexcept = 0;
960 TRT_NODISCARD virtual
bool setNbRanks(int32_t nbRanks) noexcept = 0;
961 TRT_NODISCARD virtual int32_t getNbRanks() const noexcept = 0;
964class VSelectLayer : public VRoot
968class VAssertionLayer :
public VRoot
971 virtual void setMessage(
char const* message)
noexcept = 0;
972 virtual char const* getMessage() const noexcept = 0;
975class VFillLayer : public VRoot
978 virtual void setDimensions(
Dims const& dimensions)
noexcept = 0;
979 virtual Dims getDimensions() const noexcept = 0;
982 virtual
void setAlpha(
double alpha) noexcept = 0;
983 virtual
double getAlpha() const noexcept = 0;
984 virtual
void setBeta(
double beta) noexcept = 0;
985 virtual
double getBeta() const noexcept = 0;
986 virtual
void setAlphaInt64(int64_t alpha) noexcept = 0;
987 virtual int64_t getAlphaInt64() const noexcept = 0;
988 virtual
void setBetaInt64(int64_t beta) noexcept = 0;
989 virtual int64_t getBetaInt64() const noexcept = 0;
990 virtual
bool isAlphaBetaInt64() const noexcept = 0;
991 virtual
DataType getToType() const noexcept = 0;
992 virtual
void setToType(
DataType toType) noexcept = 0;
995class VQuantizeLayer : public VRoot
998 virtual int32_t getAxis() const noexcept = 0;
999 virtual
void setAxis(int32_t axis) noexcept = 0;
1000 virtual
DataType getToType() const noexcept = 0;
1001 virtual
void setToType(
DataType toType) noexcept = 0;
1002 virtual
Dims getBlockShape() const noexcept = 0;
1003 virtual
bool setBlockShape(
Dims const& blockShape) noexcept = 0;
1006class VDequantizeLayer : public VRoot
1009 virtual int32_t getAxis() const noexcept = 0;
1010 virtual
void setAxis(int32_t axis) noexcept = 0;
1011 virtual
DataType getToType() const noexcept = 0;
1012 virtual
void setToType(
DataType toType) noexcept = 0;
1013 virtual
Dims getBlockShape() const noexcept = 0;
1014 virtual
bool setBlockShape(
Dims const& blockShape) noexcept = 0;
1017class VDynamicQuantizeLayer : public VRoot
1022 TRT_DEPRECATED virtual int32_t getBlockSize() const noexcept = 0;
1023 TRT_DEPRECATED virtual
void setBlockSize(int32_t axis) noexcept = 0;
1024 virtual
DataType getScaleType() const noexcept = 0;
1025 virtual
void setScaleType(
DataType axis) noexcept = 0;
1026 virtual
DataType getToType() const noexcept = 0;
1027 virtual
void setToType(
DataType toType) noexcept = 0;
1028 virtual
Dims getBlockShape() const noexcept = 0;
1029 virtual
void setBlockShape(
Dims const& blockShape) noexcept = 0;
1032class VScatterLayer : public VRoot
1035 virtual void setMode(
ScatterMode mode)
noexcept = 0;
1037 virtual
void setAxis(int32_t axis) noexcept = 0;
1038 virtual int32_t getAxis() const noexcept = 0;
1041class VEinsumLayer : public VRoot
1044 virtual bool setEquation(
char const* equation)
noexcept = 0;
1045 virtual char const* getEquation() const noexcept = 0;
1048class VOneHotLayer : public VRoot
1051 virtual int32_t getAxis() const noexcept = 0;
1052 virtual
void setAxis(int32_t axis) noexcept = 0;
1055class VGridSampleLayer : public VRoot
1060 virtual
void setAlignCorners(
bool alignCorners) noexcept = 0;
1061 virtual
bool getAlignCorners() const noexcept = 0;
1062 virtual
bool setSampleMode(
SampleMode mode) noexcept = 0;
1063 virtual
SampleMode getSampleMode() const noexcept = 0;
1066class VNMSLayer : public VRoot
1071 virtual
void setTopKBoxLimit(int32_t limit) noexcept = 0;
1072 virtual int32_t getTopKBoxLimit() const noexcept = 0;
1073 virtual
bool setIndicesType(
DataType type) noexcept = 0;
1074 virtual
DataType getIndicesType() const noexcept = 0;
1077class VReverseSequenceLayer : public VRoot
1080 virtual void setBatchAxis(int32_t batchAxis)
noexcept = 0;
1081 virtual int32_t getBatchAxis() const noexcept = 0;
1083 virtual
void setSequenceAxis(int32_t sequenceAxis) noexcept = 0;
1084 virtual int32_t getSequenceAxis() const noexcept = 0;
1087class VNormalizationLayer : public VRoot
1090 virtual void setEpsilon(
float eps)
noexcept = 0;
1091 virtual float getEpsilon() const noexcept = 0;
1092 virtual
void setAxes(uint32_t axesMask) noexcept = 0;
1093 virtual uint32_t getAxes() const noexcept = 0;
1094 virtual
void setNbGroups(int64_t nbGroups) noexcept = 0;
1095 virtual int64_t getNbGroups() const noexcept = 0;
1096 virtual
void setComputePrecision(
DataType type) noexcept = 0;
1097 virtual
DataType getComputePrecision() const noexcept = 0;
1098 virtual
bool isV2() const noexcept = 0;
1101class VSqueezeLayer : public VRoot
1105class VUnsqueezeLayer :
public VRoot
1109class VCumulativeLayer :
public VRoot
1114 virtual
void setExclusive(
bool exclusive) noexcept = 0;
1115 virtual
bool getExclusive() const noexcept = 0;
1116 virtual
void setReverse(
bool reverse) noexcept = 0;
1117 virtual
bool getReverse() const noexcept = 0;
1120class VRotaryEmbeddingLayer : public VRoot
1123 virtual void setInterleaved(
bool interleaved)
noexcept = 0;
1124 virtual bool getInterleaved() const noexcept = 0;
1125 virtual
bool setRotaryEmbeddingDim(int32_t rotaryEmbeddingDim) noexcept = 0;
1126 virtual int32_t getRotaryEmbeddingDim() const noexcept = 0;
1127 virtual
void setInput(int32_t index, ITensor& input) noexcept = 0;
1130class VKVCacheUpdateLayer : public VRoot
1137class VMoELayer : public VRoot
1140 virtual void setGatedWeights(ITensor& fcGateWeights, ITensor& fcUpWeights, ITensor& fcDownWeights,
MoEActType activationType)
noexcept = 0;
1141 virtual void setGatedBiases(ITensor& fcGateBiases, ITensor& fcUpBiases, ITensor& fcDownBiases)
noexcept = 0;
1142 virtual void setActivationType(
MoEActType activationType)
noexcept = 0;
1143 virtual MoEActType getActivationType() const noexcept = 0;
1144 virtual
void setQuantizationStatic(ITensor& fcDownActivationScale,
DataType dataType) noexcept = 0;
1145 virtual
void setQuantizationDynamicDblQ(ITensor& fcDownActivationDblQScale,
DataType dataType,
Dims const& blockShape,
DataType dynQOutputScaleType) noexcept = 0;
1146 virtual
void setQuantizationToType(
DataType type) noexcept = 0;
1147 virtual
DataType getQuantizationToType() const noexcept = 0;
1148 virtual
void setQuantizationBlockShape(
Dims const& blockShape) noexcept = 0;
1149 virtual
Dims getQuantizationBlockShape() const noexcept = 0;
1150 virtual
void setDynQOutputScaleType(
DataType type) noexcept = 0;
1151 virtual
DataType getDynQOutputScaleType() const noexcept = 0;
1152 virtual
void setSwigluParams(
float limit,
float alpha,
float beta) noexcept = 0;
1153 virtual
void setSwigluParamLimit(
float limit) noexcept = 0;
1154 virtual
float getSwigluParamLimit() const noexcept = 0;
1155 virtual
void setSwigluParamAlpha(
float alpha) noexcept = 0;
1156 virtual
float getSwigluParamAlpha() const noexcept = 0;
1157 virtual
void setSwigluParamBeta(
float beta) noexcept = 0;
1158 virtual
float getSwigluParamBeta() const noexcept = 0;
1159 virtual
void setInput(int32_t index, ITensor& tensor) noexcept = 0;
1163class VNetworkDefinition : public VRoot
1166 virtual ITensor* addInput(
char const* name,
DataType type,
Dims const& dimensions)
noexcept = 0;
1167 virtual void markOutput(ITensor& tensor)
noexcept = 0;
1168 virtual IActivationLayer* addActivation(ITensor& input,
ActivationType type)
noexcept = 0;
1169 virtual ILRNLayer* addLRN(ITensor& input, int64_t window,
float alpha,
float beta,
float k)
noexcept = 0;
1170 virtual IScaleLayer* addScale(
1171 ITensor& input,
ScaleMode mode, Weights shift, Weights scale, Weights power)
noexcept = 0;
1172 virtual ISoftMaxLayer* addSoftMax(ITensor& input)
noexcept = 0;
1173 virtual IConcatenationLayer* addConcatenation(ITensor*
const* inputs, int32_t nbInputs)
noexcept = 0;
1174 virtual IElementWiseLayer* addElementWise(ITensor& input1, ITensor& input2,
ElementWiseOperation op)
noexcept = 0;
1175 virtual IUnaryLayer* addUnary(ITensor& input,
UnaryOperation operation)
noexcept = 0;
1176 virtual IShuffleLayer* addShuffle(ITensor& input)
noexcept = 0;
1177 virtual int32_t getNbLayers() const noexcept = 0;
1178 virtual ILayer* getLayer(int32_t index) const noexcept = 0;
1179 virtual int32_t getNbInputs() const noexcept = 0;
1180 virtual ITensor* getInput(int32_t index) const noexcept = 0;
1181 virtual int32_t getNbOutputs() const noexcept = 0;
1182 virtual ITensor* getOutput(int32_t index) const noexcept = 0;
1183 virtual IReduceLayer* addReduce(
1184 ITensor& input,
ReduceOperation operation, uint32_t reduceAxes,
bool keepDimensions) noexcept
1186 virtual ITopKLayer* addTopK(ITensor& input,
TopKOperation op, int32_t k, uint32_t reduceAxes) noexcept = 0;
1187 virtual IGatherLayer* addGather(ITensor& data, ITensor& indices, int32_t axis) noexcept = 0;
1188 virtual IRaggedSoftMaxLayer* addRaggedSoftMax(ITensor& input, ITensor& bounds) noexcept = 0;
1189 virtual IMatrixMultiplyLayer* addMatrixMultiply(
1191 virtual IConstantLayer* addConstant(
Dims const& dimensions, Weights weights) noexcept = 0;
1192 virtual IIdentityLayer* addIdentity(ITensor& input) noexcept = 0;
1193 virtual
void removeTensor(ITensor& tensor) noexcept = 0;
1194 virtual
void unmarkOutput(ITensor& tensor) noexcept = 0;
1195 virtual ISliceLayer* addSlice(ITensor& input,
Dims const& start,
Dims const& size,
Dims const& stride) noexcept = 0;
1196 virtual
void setName(
char const* name) noexcept = 0;
1197 virtual
char const* getName() const noexcept = 0;
1198 virtual IShapeLayer* addShape(ITensor& input) noexcept = 0;
1199 virtual
bool hasImplicitBatchDimension() const noexcept = 0;
1200 virtual
bool markOutputForShapes(ITensor& tensor) noexcept = 0;
1201 virtual
bool unmarkOutputForShapes(ITensor& tensor) noexcept = 0;
1202 virtual IParametricReLULayer* addParametricReLU(ITensor& input, ITensor& slope) noexcept = 0;
1203 virtual IConvolutionLayer* addConvolutionNd(
1204 ITensor& input, int64_t nbOutputMaps,
Dims const& kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
1206 virtual IPoolingLayer* addPoolingNd(ITensor& input,
PoolingType type,
Dims const& windowSize) noexcept = 0;
1207 virtual IDeconvolutionLayer* addDeconvolutionNd(
1208 ITensor& input, int64_t nbOutputMaps,
Dims const& kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
1210 virtual IScaleLayer* addScaleNd(
1211 ITensor& input,
ScaleMode mode, Weights shift, Weights scale, Weights power, int32_t channelAxis) noexcept = 0;
1212 virtual IResizeLayer* addResize(ITensor& input) noexcept = 0;
1213 virtual ILoop* addLoop() noexcept = 0;
1214 virtual ISelectLayer* addSelect(ITensor& condition, ITensor& thenInput, ITensor& elseInput) noexcept = 0;
1215 virtual IFillLayer* addFill(
Dims const& dimensions,
FillOperation op) noexcept = 0;
1216 virtual IPaddingLayer* addPaddingNd(ITensor& input,
Dims const& prePadding,
Dims const& postPadding) noexcept = 0;
1217 virtual
bool setWeightsName(Weights weights,
char const* name) noexcept = 0;
1218 virtual
void setErrorRecorder(
IErrorRecorder* recorder) noexcept = 0;
1220 virtual IGatherLayer* addGatherV2(ITensor& data, ITensor& indices,
GatherMode mode) noexcept = 0;
1221 virtual IIfConditional* addIfConditional() noexcept = 0;
1222 virtual IScatterLayer* addScatter(ITensor& data, ITensor& indices, ITensor& updates,
ScatterMode mode) noexcept = 0;
1223 virtual IEinsumLayer* addEinsum(ITensor* const* inputs, int32_t nbInputs,
char const* equation) noexcept = 0;
1224 virtual IAssertionLayer* addAssertion(ITensor& condition,
char const* message) noexcept = 0;
1225 virtual IOneHotLayer* addOneHot(ITensor& indices, ITensor& values, ITensor& depth, int32_t axis) noexcept = 0;
1226 virtual INonZeroLayer* addNonZero(ITensor& input) noexcept = 0;
1227 virtual IGridSampleLayer* addGridSample(ITensor& input, ITensor& grid) noexcept = 0;
1228 virtual INMSLayer* addNMS(ITensor& boxes, ITensor& scores, ITensor& maxOutputBoxesPerClass) noexcept = 0;
1229 virtual IReverseSequenceLayer* addReverseSequence(ITensor& input, ITensor& sequenceLens) noexcept = 0;
1230 virtual INormalizationLayer* addNormalization(
1231 ITensor& input, ITensor& scale, ITensor& bias, uint32_t axesMask) noexcept = 0;
1232 virtual ICastLayer* addCast(ITensor& input,
DataType toType) noexcept = 0;
1233 virtual IBuilder& getBuilder() const noexcept = 0;
1236 virtual IQuantizeLayer* addQuantizeV2(ITensor& input, ITensor& scale,
DataType outputType) noexcept = 0;
1237 virtual IDequantizeLayer* addDequantizeV2(ITensor& input, ITensor& scale,
DataType outputType) noexcept = 0;
1239 virtual
bool markDebug(ITensor& tensor) noexcept = 0;
1240 virtual
bool unmarkDebug(ITensor& tensor) noexcept = 0;
1241 virtual
bool isDebugTensor(ITensor const& tensor) const noexcept = 0;
1242 virtual
bool markWeightsRefittable(
char const* name) noexcept = 0;
1243 virtual
bool unmarkWeightsRefittable(
char const* name) noexcept = 0;
1244 virtual
bool areWeightsMarkedRefittable(
char const* name) const noexcept = 0;
1245 virtual ISqueezeLayer* addSqueeze(ITensor& input, ITensor& axes) noexcept = 0;
1246 virtual IUnsqueezeLayer* addUnsqueeze(ITensor& input, ITensor& axes) noexcept = 0;
1247 virtual IDynamicQuantizeLayer* addDynamicQuantize(
1248 ITensor& input, int32_t axis, int32_t blockSize,
DataType toType,
DataType scaleType) noexcept = 0;
1249 virtual ICumulativeLayer* addCumulative(
1250 ITensor& input, ITensor& axis,
CumulativeOperation operation,
bool exclusive,
bool reverse) noexcept = 0;
1251 virtual
bool markUnfusedTensorsAsDebugTensors() noexcept = 0;
1252 virtual
bool unmarkUnfusedTensorsAsDebugTensors() noexcept = 0;
1253 virtual ITopKLayer* addTopKV2(
1254 ITensor& input,
TopKOperation op, int32_t k, uint32_t reduceAxes,
DataType indicesType) noexcept = 0;
1255 virtual INonZeroLayer* addNonZeroV2(ITensor& input,
DataType indicesType) noexcept = 0;
1256 virtual INMSLayer* addNMSV2(
1257 ITensor& boxes, ITensor& scores, ITensor& maxOutputBoxesPerClass,
DataType indicesType) noexcept = 0;
1258 virtual IAttention* addAttention(
1260 virtual IRotaryEmbeddingLayer* addRotaryEmbedding(ITensor& input, ITensor& cosCache, ITensor& sinCache,
1261 bool interleaved, int32_t rotaryEmbeddingDim) noexcept = 0;
1262 virtual IDynamicQuantizeLayer* addDynamicQuantizeV2(
1264 virtual IKVCacheUpdateLayer* addKVCacheUpdate(
1265 ITensor& cache, ITensor& update, ITensor& writeIndices,
KVCacheMode cacheMode) noexcept = 0;
1266 virtual INormalizationLayer* addNormalizationV2(
1267 ITensor& input, ITensor& scale, ITensor& bias, uint32_t axesMask) noexcept = 0;
1268 virtual IMoELayer* addMoE(
1269 ITensor& hiddenStates, ITensor& selectedExpertsForTokens, ITensor& scoresForSelectedExperts) noexcept = 0;
1270 virtual IDistCollectiveLayer* addDistCollective(ITensor& input,
CollectiveOperation distCollectiveOp,
1271 ReduceOperation reduceOp, int64_t root, int64_t* groups, int64_t groupSize) noexcept = 0;
1275class VTimingCache : public VRoot
1279 virtual
bool combine(ITimingCache const& inputCache,
bool ignoreMismatch) noexcept = 0;
1280 virtual
bool reset() noexcept = 0;
1281 virtual int64_t queryKeys(TimingCacheKey* keyBuffer, int64_t capacity) const noexcept = 0;
1282 virtual TimingCacheValue query(TimingCacheKey const& key) const noexcept = 0;
1283 virtual
bool update(TimingCacheKey const& key, TimingCacheValue const& value) noexcept = 0;
1286class VBuilderConfig : public VRoot
1289 virtual void setAvgTimingIterations(int32_t avgTiming)
noexcept = 0;
1290 virtual int32_t getAvgTimingIterations() const noexcept = 0;
1291 virtual
void setEngineCapability(
EngineCapability capability) noexcept = 0;
1293 virtual
void setFlags(
BuilderFlags builderFlags) noexcept = 0;
1295 virtual
void clearFlag(
BuilderFlag builderFlag) noexcept = 0;
1296 virtual
void setFlag(
BuilderFlag builderFlag) noexcept = 0;
1297 virtual
bool getFlag(
BuilderFlag builderFlag) const noexcept = 0;
1298 virtual
void setDeviceType(ILayer const* layer,
DeviceType deviceType) noexcept = 0;
1299 virtual
DeviceType getDeviceType(ILayer const* layer) const noexcept = 0;
1300 virtual
bool isDeviceTypeSet(ILayer const* layer) const noexcept = 0;
1301 virtual
void resetDeviceType(ILayer const* layer) noexcept = 0;
1302 virtual
bool canRunOnDLA(ILayer const* layer) const noexcept = 0;
1303 virtual
void setDLACore(int32_t dlaCore) noexcept = 0;
1304 virtual int32_t getDLACore() const noexcept = 0;
1305 virtual
void setDefaultDeviceType(
DeviceType deviceType) noexcept = 0;
1306 virtual
DeviceType getDefaultDeviceType() const noexcept = 0;
1307 virtual
void reset() noexcept = 0;
1308 virtual
void setProfileStream(const cudaStream_t stream) noexcept = 0;
1309 virtual cudaStream_t getProfileStream() const noexcept = 0;
1310 virtual int32_t addOptimizationProfile(IOptimizationProfile const* profile) noexcept = 0;
1311 virtual int32_t getNbOptimizationProfiles() const noexcept = 0;
1314 virtual
bool setTacticSources(
TacticSources tacticSources) noexcept = 0;
1315 virtual
TacticSources getTacticSources() const noexcept = 0;
1316 virtual
nvinfer1::ITimingCache* createTimingCache(
void const* blob, std::
size_t size) const noexcept = 0;
1317 virtual
bool setTimingCache(ITimingCache const& cache,
bool ignoreMismatch) noexcept = 0;
1318 virtual
nvinfer1::ITimingCache const* getTimingCache() const noexcept = 0;
1319 virtual
void setMemoryPoolLimit(
MemoryPoolType pool, std::
size_t poolSize) noexcept = 0;
1320 virtual std::
size_t getMemoryPoolLimit(
MemoryPoolType pool) const noexcept = 0;
1321 virtual
void setPreviewFeature(
PreviewFeature feature,
bool enable) noexcept = 0;
1322 virtual
bool getPreviewFeature(
PreviewFeature feature) const noexcept = 0;
1323 virtual
void setBuilderOptimizationLevel(int32_t level) noexcept = 0;
1324 virtual int32_t getBuilderOptimizationLevel() const noexcept = 0;
1327 virtual
void setPluginsToSerialize(
char const* const* paths, int32_t nbPaths) noexcept = 0;
1328 virtual
char const* getPluginToSerialize(int32_t index) const noexcept = 0;
1329 virtual int32_t getNbPluginsToSerialize() const noexcept = 0;
1330 virtual
void setMaxAuxStreams(int32_t nbStreams) noexcept = 0;
1331 virtual int32_t getMaxAuxStreams() const noexcept = 0;
1334 virtual
void setRuntimePlatform(
RuntimePlatform runtimePlatform) noexcept = 0;
1336 virtual
void setMaxNbTactics(int32_t maxTactics) noexcept = 0;
1337 virtual int32_t getMaxNbTactics() const noexcept = 0;
1340 virtual
bool setL2LimitForTiling(int64_t size) noexcept = 0;
1341 virtual int64_t getL2LimitForTiling() const noexcept = 0;
1342 TRT_NODISCARD virtual
bool setNbComputeCapabilities(int32_t maxNbComputeCapabilities) noexcept = 0;
1343 TRT_NODISCARD virtual int32_t getNbComputeCapabilities() const noexcept = 0;
1348class VSerializationConfig : public VRoot
1358class VBuilder : public VRoot
1361 virtual bool platformHasFastFp16() const noexcept = 0;
1362 virtual
bool platformHasFastInt8() const noexcept = 0;
1363 virtual int32_t getMaxDLABatchSize() const noexcept = 0;
1364 virtual int32_t getNbDLACores() const noexcept = 0;
1365 virtual
void setGpuAllocator(
IGpuAllocator* allocator) noexcept = 0;
1366 virtual
nvinfer1::IBuilderConfig* createBuilderConfig() noexcept = 0;
1368 virtual
nvinfer1::IOptimizationProfile* createOptimizationProfile() noexcept = 0;
1369 virtual
void setErrorRecorder(
IErrorRecorder* recorder) noexcept = 0;
1371 virtual
void reset() noexcept = 0;
1372 virtual
bool platformHasTf32() const noexcept = 0;
1373 virtual
nvinfer1::IHostMemory* buildSerializedNetwork(
1374 INetworkDefinition& network, IBuilderConfig& config) noexcept = 0;
1375 virtual
bool isNetworkSupported(INetworkDefinition const& network, IBuilderConfig const& config) const noexcept = 0;
1376 virtual ILogger*
getLogger() const noexcept = 0;
1377 virtual
bool setMaxThreads(int32_t maxThreads) noexcept = 0;
1378 virtual int32_t getMaxThreads() const noexcept = 0;
1380 virtual
bool buildSerializedNetworkToStream(
1381 INetworkDefinition& network, IBuilderConfig& config,
IStreamWriter& writer) noexcept = 0;
1384class VRuntimeConfig : public VRoot
1387 virtual IRuntimeConfig* getPImpl() noexcept = 0;
1391 virtual
bool setRuntimeCache(
nvinfer1::IRuntimeCache const& cache) noexcept = 0;
1393 virtual
void setDynamicShapesKernelSpecializationStrategy(
1400class VRuntimeCache : public VRoot
1404 virtual
bool deserialize(
void const* blob,
size_t size) noexcept = 0;
1405 virtual
bool reset() noexcept = 0;
1408class VDistCollectiveLayer : public VRoot
TENSORRTAPI nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
TENSORRTAPI nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
#define TRT_NODISCARD
A stand-in for [[nodiscard]] and [[nodiscard(REASON)]] that works with older compilers.
Definition: NvInferRuntimeBase.h:57
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:42
Structure to define the dimensions of a tensor.
User-implemented callback for notification when value of a debug tensor is updated.
Reference counted application-implemented error reporting interface for TensorRT objects.
Application-implemented class for controlling allocation on the GPU.
Callback from ExecutionContext::enqueueV3()
Plugin class for the V3 generation of user-implemented layers.
Application-implemented interface for profiling.
Application-implemented progress reporting interface for TensorRT.
Application-implemented class for reading data in a stream-based manner.
Application-implemented class for reading data in a stream-based manner asynchronously....
Application-implemented class for writing data in a stream-based manner.
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:142
The TensorRT API version 1 namespace.
Definition: NvInferPluginBase.h:29
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:2958
v_1_0::IOutputAllocator IOutputAllocator
Definition: NvInferRuntime.h:4293
ResizeSelector
The coordinate selector when resize to single pixel output.
Definition: NvInfer.h:4062
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:76
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:179
MemoryPoolType
The type for memory pools used by TensorRT.
Definition: NvInfer.h:9996
ScaleMode
Controls how shift, scale and power are applied in a Scale layer.
Definition: NvInfer.h:1770
RuntimePlatform
Describes the intended runtime platform (operating system and CPU architecture) for the execution of ...
Definition: NvInfer.h:9569
TensorIOMode
Definition of tensor IO Mode.
Definition: NvInferRuntimeBase.h:659
HardwareCompatibilityLevel
Describes requirements of compatibility with GPU architectures other than that of the GPU on which th...
Definition: NvInfer.h:10124
CumulativeOperation
Enumerates the cumulative operations that may be performed by a Cumulative layer.
Definition: NvInfer.h:6671
SerializationFlag
List of valid flags that the engine can enable when serializing the bytes.
Definition: NvInferRuntime.h:2999
BoundingBoxFormat
Representation of bounding box data used for the Boxes input tensor in INMSLayer.
Definition: NvInfer.h:6209
ComputeCapability
Describes compute capability that an engine will be built for.
Definition: NvInfer.h:10173
v_1_0::IPluginV3 IPluginV3
Definition: NvInferPluginBase.h:273
UnaryOperation
Enumerates the unary operations that may be performed by a Unary layer.
Definition: NvInfer.h:2715
v_1_0::IStreamWriter IStreamWriter
Definition: NvInferRuntime.h:720
v_1_0::IProfiler IProfiler
Definition: NvInferRuntime.h:1318
DynamicShapesKernelSpecializationStrategy
Different kernel specialization strategies for dynamic shapes.
Definition: NvInferRuntime.h:3178
v_1_0::IStreamReaderV2 IStreamReaderV2
Definition: NvInferRuntime.h:797
ActivationType
Enumerates the types of activation to perform in an activation layer.
Definition: NvInfer.h:143
uint32_t TempfileControlFlags
Represents a collection of one or more TempfileControlFlag values combined using bitwise-OR operation...
Definition: NvInferRuntime.h:1396
FillOperation
Enumerates the tensor fill operations that may performed by a fill layer.
Definition: NvInfer.h:5043
EngineStat
The kind of engine statistics that queried from the ICudaEngine.
Definition: NvInferRuntime.h:3374
CudaGraphStrategy
Strategies available for CUDA graphs optimizations for JIT (Just-In-Time) inference.
Definition: NvInferRuntime.h:3217
ResizeRoundMode
The rounding mode for nearest neighbor resize.
Definition: NvInfer.h:4092
EngineValidity
Whether a TensorRT-RTX engine is likely to be valid on the current system.
Definition: NvInferRuntime.h:1863
PaddingMode
Enumerates the modes of padding to perform in convolution, deconvolution and pooling layer,...
Definition: NvInfer.h:948
TripLimit
Enum that describes kinds of trip limits.
Definition: NvInfer.h:4451
uint32_t NetworkDefinitionCreationFlags
Represents one or more NetworkDefinitionCreationFlag flags using binary OR operations....
Definition: NvInfer.h:11176
PreviewFeature
Define preview features.
Definition: NvInfer.h:10071
TilingOptimizationLevel
Define the optimization levels for Tiling.
Definition: NvInfer.h:10199
DataType
The type of weights and tensors. The datatypes other than kBOOL, kINT32, and kINT64 are "activation d...
Definition: NvInferRuntimeBase.h:146
uint32_t BuilderFlags
Represents one or more BuilderFlag values using binary OR operations, e.g., 1U << BuilderFlag::kFP16 ...
Definition: NvInfer.h:9601
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:1350
LayerType
The type values of layer classes.
Definition: NvInfer.h:58
SampleMode
Controls how ISliceLayer and IGridSample handle out-of-bounds coordinates.
Definition: NvInfer.h:3206
GatherMode
Control form of IGatherLayer.
Definition: NvInfer.h:2462
v_1_0::IDebugListener IDebugListener
Definition: NvInferRuntime.h:4334
MoEActType
Enumerates the activation type for the MoE layer.
Definition: NvInfer.h:7435
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:1328
uint32_t TensorFormats
It is capable of representing one or more TensorFormat by binary OR operations, e....
Definition: NvInfer.h:135
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:2970
NetworkDefinitionCreationFlag
List of immutable network properties expressed at network creation time. NetworkDefinitionCreationFla...
Definition: NvInfer.h:11187
ElementWiseOperation
Enumerates the binary operations that may be performed by an ElementWise layer.
Definition: NvInfer.h:2372
CollectiveOperation
Enumerates the collective operations that may be performed by a DistCollective layer.
Definition: NvInfer.h:2843
uint32_t SerializationFlags
Represents one or more SerializationFlag values using binary OR operations, e.g., 1U << Serialization...
Definition: NvInferRuntime.h:2989
InterpolationMode
Enumerates various modes of interpolation.
Definition: NvInfer.h:3980
BuilderFlag
List of valid modes that the builder can enable when creating an engine from a network definition.
Definition: NvInfer.h:9611
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntime.h:1430
ExecutionContextAllocationStrategy
Different memory allocation behaviors for IExecutionContext.
Definition: NvInferRuntime.h:3104
TopKOperation
Enumerates the operations that may be performed by a TopK layer.
Definition: NvInfer.h:3487
ReduceOperation
Enumerates the reduce operations that may be performed by a Reduce layer.
Definition: NvInfer.h:2815
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:5291
ScatterMode
Control form of IScatterLayer.
Definition: NvInfer.h:5943
MatrixOperation
Enumerates the operations that may be performed on a tensor by IMatrixMultiplyLayer before multiplica...
Definition: NvInfer.h:3640
ResizeCoordinateTransformation
The resize coordinate transformation function.
Definition: NvInfer.h:4008
LoopOutput
Enum that describes kinds of loop outputs.
Definition: NvInfer.h:4423
KVCacheMode
Enumerates the KVCache modes that may be performed by a KVCacheUpdate layer.
Definition: NvInfer.h:7347
v_1_0::IStreamReader IStreamReader
Definition: NvInferRuntime.h:710
PoolingType
The type of pooling to perform in a pooling layer.
Definition: NvInfer.h:1384
v_1_0::IProgressMonitor IProgressMonitor
Definition: NvInfer.h:10315
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:204
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:2632
AttentionNormalizationOp
Enumerates the operations that may be performed by the normalization in the attention subgraph.
Definition: NvInfer.h:6806