Go to the documentation of this file.
50 #ifndef NV_INFER_IMPL_H
51 #define NV_INFER_IMPL_H
53 #include "NvInferLegacyDims.h"
59 class IActivationLayer;
61 class IAlgorithmContext;
62 class IAlgorithmIOInfo;
63 class IAlgorithmVariant;
64 class IAlgorithmSelector;
66 class IConcatenationLayer;
69 class IConvolutionLayer;
70 class IConvolutionLayer;
72 class IDeconvolutionLayer;
73 class IDeconvolutionLayer;
74 class IDequantizeLayer;
76 class IElementWiseLayer;
77 class IExecutionContext;
79 class IFullyConnectedLayer;
83 class IInt8Calibrator;
87 class ILoopOutputLayer;
89 class IMatrixMultiplyLayer;
90 class INetworkDefinition;
91 class IOptimizationProfile;
94 class IParametricReLULayer;
100 class IPluginV2Layer;
104 class IQuantizeLayer;
105 class IRaggedSoftMaxLayer;
106 class IRecurrenceLayer;
119 class ITripLimitLayer;
178 virtual ~
VRoot() noexcept =
default;
184 virtual void* data()
const noexcept = 0;
185 virtual std::size_t size()
const noexcept = 0;
186 virtual DataType type()
const noexcept = 0;
192 virtual bool isConstant()
const = 0;
193 virtual int32_t getConstantValue()
const = 0;
209 const void* blob, std::size_t size, IPluginFactory* pluginFactory) noexcept
211 virtual void setDLACore(int32_t dlaCore) noexcept = 0;
212 virtual int32_t getDLACore()
const noexcept = 0;
213 virtual int32_t getNbDLACores()
const noexcept = 0;
214 virtual void setGpuAllocator(
IGpuAllocator* allocator) noexcept = 0;
215 virtual void setErrorRecorder(
IErrorRecorder* recorder) noexcept = 0;
222 virtual bool setWeights(
const char* layerName,
WeightsRole role,
const Weights weights) noexcept = 0;
223 virtual bool refitCudaEngine() noexcept = 0;
224 virtual int32_t getMissing(int32_t size,
const char** layerNames,
WeightsRole* roles) noexcept = 0;
225 virtual int32_t getAll(int32_t size,
const char** layerNames,
WeightsRole* roles) noexcept = 0;
226 virtual bool setDynamicRange(
const char* tensorName,
float min,
float max) noexcept = 0;
227 virtual float getDynamicRangeMin(
const char* tensorName)
const noexcept = 0;
228 virtual float getDynamicRangeMax(
const char* tensorName)
const noexcept = 0;
229 virtual int32_t getTensorsWithDynamicRange(int32_t size,
const char** tensorNames)
const noexcept = 0;
230 virtual void setErrorRecorder(
IErrorRecorder* recorder) noexcept = 0;
232 virtual bool setNamedWeights(
const char* name,
Weights weights) noexcept = 0;
233 virtual int32_t getMissingWeights(int32_t size,
const char** weightsNames) noexcept = 0;
234 virtual int32_t getAllWeights(int32_t size,
const char** weightsNames) noexcept = 0;
242 virtual bool setShapeValues(
243 const char* inputName,
OptProfileSelector select,
const int32_t* values, int32_t nbValues) noexcept
245 virtual int32_t getNbShapeValues(
const char* inputName)
const noexcept = 0;
246 virtual int32_t
const* getShapeValues(
const char* inputName,
OptProfileSelector select)
const noexcept = 0;
247 virtual bool setExtraMemoryTarget(
float target) noexcept = 0;
248 virtual float getExtraMemoryTarget()
const noexcept = 0;
249 virtual bool isValid()
const noexcept = 0;
255 virtual int32_t getNbBindings()
const noexcept = 0;
256 virtual int32_t getBindingIndex(
const char* name)
const noexcept = 0;
257 virtual const char* getBindingName(int32_t bindingIndex)
const noexcept = 0;
258 virtual bool bindingIsInput(int32_t bindingIndex)
const noexcept = 0;
259 virtual Dims getBindingDimensions(int32_t bindingIndex)
const noexcept = 0;
260 virtual DataType getBindingDataType(int32_t bindingIndex)
const noexcept = 0;
261 virtual int32_t getMaxBatchSize()
const noexcept = 0;
262 virtual int32_t getNbLayers()
const noexcept = 0;
263 virtual IHostMemory* serialize()
const noexcept = 0;
265 virtual TensorLocation getLocation(int32_t bindingIndex)
const noexcept = 0;
266 virtual IExecutionContext* createExecutionContextWithoutDeviceMemory() noexcept = 0;
267 virtual size_t getDeviceMemorySize()
const noexcept = 0;
268 virtual bool isRefittable()
const noexcept = 0;
269 virtual int32_t getBindingBytesPerComponent(int32_t bindingIndex)
const noexcept = 0;
270 virtual int32_t getBindingComponentsPerElement(int32_t bindingIndex)
const noexcept = 0;
271 virtual TensorFormat getBindingFormat(int32_t bindingIndex)
const noexcept = 0;
272 virtual const char* getBindingFormatDesc(int32_t bindingIndex)
const noexcept = 0;
273 virtual int32_t getBindingVectorizedDim(int32_t bindingIndex)
const noexcept = 0;
274 virtual const char* getName()
const noexcept = 0;
275 virtual int32_t getNbOptimizationProfiles()
const noexcept = 0;
276 virtual Dims getProfileDimensions(int32_t bindingIndex, int32_t profileIndex,
OptProfileSelector select)
const
279 virtual const int32_t* getProfileShapeValues(
282 virtual bool isShapeBinding(int32_t bindingIndex)
const noexcept = 0;
283 virtual bool isExecutionBinding(int32_t bindingIndex)
const noexcept = 0;
285 virtual void setErrorRecorder(
IErrorRecorder* recorder) noexcept = 0;
287 virtual bool hasImplicitBatchDimension()
const noexcept = 0;
294 virtual bool execute(int32_t batchSize,
void*
const* bindings) noexcept = 0;
295 virtual bool enqueue(
298 virtual void setDebugSync(
bool sync) noexcept = 0;
299 virtual bool getDebugSync()
const noexcept = 0;
300 virtual void setProfiler(
IProfiler*) noexcept = 0;
301 virtual IProfiler* getProfiler()
const noexcept = 0;
302 virtual const ICudaEngine& getEngine()
const noexcept = 0;
303 virtual void setName(
const char* name) noexcept = 0;
304 virtual const char* getName()
const noexcept = 0;
305 virtual void setDeviceMemory(
void* memory) noexcept = 0;
306 virtual Dims getStrides(int32_t bindingIndex)
const noexcept = 0;
307 virtual bool setOptimizationProfile(int32_t profileIndex) noexcept = 0;
308 virtual int32_t getOptimizationProfile()
const noexcept = 0;
309 virtual bool setBindingDimensions(int32_t bindingIndex,
Dims dimensions) noexcept = 0;
310 virtual Dims getBindingDimensions(int32_t bindingIndex)
const noexcept = 0;
311 virtual bool setInputShapeBinding(int32_t bindingIndex, int32_t
const* data) noexcept = 0;
312 virtual bool getShapeBinding(int32_t bindingIndex, int32_t* data)
const noexcept = 0;
313 virtual bool allInputDimensionsSpecified()
const noexcept = 0;
314 virtual bool allInputShapesSpecified()
const noexcept = 0;
315 virtual void setErrorRecorder(
IErrorRecorder* recorder) noexcept = 0;
317 virtual bool executeV2(
void*
const* bindings) noexcept = 0;
319 virtual bool setOptimizationProfileAsync(int32_t profileIndex,
cudaStream_t stream) noexcept = 0;
325 virtual void setName(
const char* name) noexcept = 0;
326 virtual const char* getName()
const noexcept = 0;
327 virtual void setDimensions(
Dims dimensions) noexcept = 0;
328 virtual Dims getDimensions()
const noexcept = 0;
329 virtual void setType(
DataType type) noexcept = 0;
330 virtual DataType getType()
const noexcept = 0;
331 virtual bool setDynamicRange(
float min,
float max) noexcept = 0;
332 virtual bool isNetworkInput()
const noexcept = 0;
333 virtual bool isNetworkOutput()
const noexcept = 0;
334 virtual void setBroadcastAcrossBatch(
bool broadcastAcrossBatch) noexcept = 0;
335 virtual bool getBroadcastAcrossBatch()
const noexcept = 0;
338 virtual bool dynamicRangeIsSet()
const noexcept = 0;
339 virtual void resetDynamicRange() noexcept = 0;
340 virtual float getDynamicRangeMin()
const noexcept = 0;
341 virtual float getDynamicRangeMax()
const noexcept = 0;
342 virtual void setAllowedFormats(
TensorFormats formats) noexcept = 0;
343 virtual TensorFormats getAllowedFormats()
const noexcept = 0;
344 virtual bool isShapeTensor()
const noexcept = 0;
345 virtual bool isExecutionTensor()
const noexcept = 0;
350 virtual LayerType getType()
const noexcept = 0;
351 virtual void setName(
const char* name) noexcept = 0;
352 virtual const char* getName()
const noexcept = 0;
353 virtual int32_t getNbInputs()
const noexcept = 0;
354 virtual ITensor* getInput(int32_t index)
const noexcept = 0;
355 virtual int32_t getNbOutputs()
const noexcept = 0;
356 virtual ITensor* getOutput(int32_t index)
const noexcept = 0;
357 virtual void setInput(int32_t index,
ITensor& tensor) noexcept = 0;
358 virtual void setPrecision(
DataType dataType) noexcept = 0;
359 virtual DataType getPrecision()
const noexcept = 0;
360 virtual bool precisionIsSet()
const noexcept = 0;
361 virtual void resetPrecision() noexcept = 0;
362 virtual void setOutputType(int32_t index,
DataType dataType) noexcept = 0;
363 virtual DataType getOutputType(int32_t index)
const noexcept = 0;
364 virtual bool outputTypeIsSet(int32_t index)
const noexcept = 0;
365 virtual void resetOutputType(int32_t index) noexcept = 0;
371 virtual void setKernelSize(
DimsHW kernelSize) noexcept = 0;
372 virtual DimsHW getKernelSize()
const noexcept = 0;
373 virtual void setNbOutputMaps(int32_t nbOutputMaps) noexcept = 0;
374 virtual int32_t getNbOutputMaps()
const noexcept = 0;
375 virtual void setStride(
DimsHW stride) noexcept = 0;
376 virtual DimsHW getStride()
const noexcept = 0;
377 virtual void setPadding(
DimsHW padding) noexcept = 0;
378 virtual DimsHW getPadding()
const noexcept = 0;
379 virtual void setNbGroups(int32_t nbGroups) noexcept = 0;
380 virtual int32_t getNbGroups()
const noexcept = 0;
381 virtual void setKernelWeights(
Weights weights) noexcept = 0;
382 virtual Weights getKernelWeights()
const noexcept = 0;
383 virtual void setBiasWeights(
Weights weights) noexcept = 0;
384 virtual Weights getBiasWeights()
const noexcept = 0;
385 virtual void setDilation(
DimsHW dilation) noexcept = 0;
386 virtual DimsHW getDilation()
const noexcept = 0;
387 virtual void setPrePadding(
Dims padding) noexcept = 0;
388 virtual Dims getPrePadding()
const noexcept = 0;
389 virtual void setPostPadding(
Dims padding) noexcept = 0;
390 virtual Dims getPostPadding()
const noexcept = 0;
391 virtual void setPaddingMode(
PaddingMode paddingMode) noexcept = 0;
392 virtual PaddingMode getPaddingMode()
const noexcept = 0;
393 virtual void setKernelSizeNd(
Dims kernelSize) noexcept = 0;
394 virtual Dims getKernelSizeNd()
const noexcept = 0;
395 virtual void setStrideNd(
Dims stride) noexcept = 0;
396 virtual Dims getStrideNd()
const noexcept = 0;
397 virtual void setPaddingNd(
Dims padding) noexcept = 0;
398 virtual Dims getPaddingNd()
const noexcept = 0;
399 virtual void setDilationNd(
Dims dilation) noexcept = 0;
400 virtual Dims getDilationNd()
const noexcept = 0;
406 virtual void setNbOutputChannels(int32_t nbOutputs) noexcept = 0;
407 virtual int32_t getNbOutputChannels()
const noexcept = 0;
408 virtual void setKernelWeights(
Weights weights) noexcept = 0;
409 virtual Weights getKernelWeights()
const noexcept = 0;
410 virtual void setBiasWeights(
Weights weights) noexcept = 0;
411 virtual Weights getBiasWeights()
const noexcept = 0;
419 virtual void setAlpha(
float alpha) noexcept = 0;
420 virtual void setBeta(
float beta) noexcept = 0;
421 virtual float getAlpha()
const noexcept = 0;
422 virtual float getBeta()
const noexcept = 0;
428 virtual void setPoolingType(
PoolingType type) noexcept = 0;
429 virtual PoolingType getPoolingType()
const noexcept = 0;
430 virtual void setWindowSize(
DimsHW windowSize) noexcept = 0;
431 virtual DimsHW getWindowSize()
const noexcept = 0;
432 virtual void setStride(
DimsHW stride) noexcept = 0;
433 virtual DimsHW getStride()
const noexcept = 0;
434 virtual void setPadding(
DimsHW padding) noexcept = 0;
435 virtual DimsHW getPadding()
const noexcept = 0;
436 virtual void setBlendFactor(
float blendFactor) noexcept = 0;
437 virtual float getBlendFactor()
const noexcept = 0;
438 virtual void setAverageCountExcludesPadding(
bool exclusive) noexcept = 0;
439 virtual bool getAverageCountExcludesPadding()
const noexcept = 0;
440 virtual void setPrePadding(
Dims padding) noexcept = 0;
441 virtual Dims getPrePadding()
const noexcept = 0;
442 virtual void setPostPadding(
Dims padding) noexcept = 0;
443 virtual Dims getPostPadding()
const noexcept = 0;
444 virtual void setPaddingMode(
PaddingMode paddingMode) noexcept = 0;
445 virtual PaddingMode getPaddingMode()
const noexcept = 0;
446 virtual void setWindowSizeNd(
Dims windowSize) noexcept = 0;
447 virtual Dims getWindowSizeNd()
const noexcept = 0;
448 virtual void setStrideNd(
Dims stride) noexcept = 0;
449 virtual Dims getStrideNd()
const noexcept = 0;
450 virtual void setPaddingNd(
Dims padding) noexcept = 0;
451 virtual Dims getPaddingNd()
const noexcept = 0;
457 virtual void setWindowSize(int32_t windowSize) noexcept = 0;
458 virtual int32_t getWindowSize()
const noexcept = 0;
459 virtual void setAlpha(
float alpha) noexcept = 0;
460 virtual float getAlpha()
const noexcept = 0;
461 virtual void setBeta(
float beta) noexcept = 0;
462 virtual float getBeta()
const noexcept = 0;
463 virtual void setK(
float k) noexcept = 0;
464 virtual float getK()
const noexcept = 0;
470 virtual void setMode(
ScaleMode mode) noexcept = 0;
471 virtual ScaleMode getMode()
const noexcept = 0;
472 virtual void setShift(
Weights shift) noexcept = 0;
473 virtual Weights getShift()
const noexcept = 0;
474 virtual void setScale(
Weights scale) noexcept = 0;
475 virtual Weights getScale()
const noexcept = 0;
476 virtual void setPower(
Weights power) noexcept = 0;
477 virtual Weights getPower()
const noexcept = 0;
478 virtual int32_t getChannelAxis()
const noexcept = 0;
479 virtual void setChannelAxis(int32_t channelAxis) noexcept = 0;
485 virtual void setAxes(uint32_t axes) noexcept = 0;
486 virtual uint32_t getAxes()
const noexcept = 0;
492 virtual void setAxis(int32_t axis) noexcept = 0;
493 virtual int32_t getAxis()
const noexcept = 0;
499 virtual void setKernelSize(
DimsHW kernelSize) noexcept = 0;
500 virtual DimsHW getKernelSize()
const noexcept = 0;
501 virtual void setNbOutputMaps(int32_t nbOutputMaps) noexcept = 0;
502 virtual int32_t getNbOutputMaps()
const noexcept = 0;
503 virtual void setStride(
DimsHW stride) noexcept = 0;
504 virtual DimsHW getStride()
const noexcept = 0;
505 virtual void setPadding(
DimsHW padding) noexcept = 0;
506 virtual DimsHW getPadding()
const noexcept = 0;
507 virtual void setNbGroups(int32_t nbGroups) noexcept = 0;
508 virtual int32_t getNbGroups()
const noexcept = 0;
509 virtual void setKernelWeights(
Weights weights) noexcept = 0;
510 virtual Weights getKernelWeights()
const noexcept = 0;
511 virtual void setBiasWeights(
Weights weights) noexcept = 0;
512 virtual Weights getBiasWeights()
const noexcept = 0;
513 virtual void setPrePadding(
Dims padding) noexcept = 0;
514 virtual Dims getPrePadding()
const noexcept = 0;
515 virtual void setPostPadding(
Dims padding) noexcept = 0;
516 virtual Dims getPostPadding()
const noexcept = 0;
517 virtual void setPaddingMode(
PaddingMode paddingMode) noexcept = 0;
518 virtual PaddingMode getPaddingMode()
const noexcept = 0;
519 virtual void setKernelSizeNd(
Dims kernelSize) noexcept = 0;
520 virtual Dims getKernelSizeNd()
const noexcept = 0;
521 virtual void setStrideNd(
Dims stride) noexcept = 0;
522 virtual Dims getStrideNd()
const noexcept = 0;
523 virtual void setPaddingNd(
Dims padding) noexcept = 0;
524 virtual Dims getPaddingNd()
const noexcept = 0;
525 virtual void setDilationNd(
Dims dilation) noexcept = 0;
526 virtual Dims getDilationNd()
const noexcept = 0;
539 virtual void setGatherAxis(int32_t axis) noexcept = 0;
540 virtual int32_t getGatherAxis()
const noexcept = 0;
541 virtual void setNbElementWiseDims(int32_t k) noexcept = 0;
542 virtual int32_t getNbElementWiseDims()
const noexcept = 0;
548 virtual int32_t getLayerCount()
const noexcept = 0;
549 virtual int32_t getHiddenSize()
const noexcept = 0;
550 virtual int32_t getMaxSeqLength()
const noexcept = 0;
551 virtual int32_t getDataLength()
const noexcept = 0;
552 virtual void setSequenceLengths(
ITensor& seqLengths) noexcept = 0;
553 virtual ITensor* getSequenceLengths()
const noexcept = 0;
554 virtual void setOperation(
RNNOperation op) noexcept = 0;
556 virtual void setInputMode(
RNNInputMode op) noexcept = 0;
558 virtual void setDirection(
RNNDirection op) noexcept = 0;
560 virtual void setWeightsForGate(int32_t layerIndex,
RNNGateType gate,
bool isW,
Weights weights) noexcept = 0;
561 virtual Weights getWeightsForGate(int32_t layerIndex,
RNNGateType gate,
bool isW)
const noexcept = 0;
562 virtual void setBiasForGate(int32_t layerIndex,
RNNGateType gate,
bool isW,
Weights bias) noexcept = 0;
563 virtual Weights getBiasForGate(int32_t layerIndex,
RNNGateType gate,
bool isW)
const noexcept = 0;
564 virtual void setHiddenState(
ITensor& hidden) noexcept = 0;
565 virtual ITensor* getHiddenState()
const noexcept = 0;
566 virtual void setCellState(
ITensor& cell) noexcept = 0;
567 virtual ITensor* getCellState()
const noexcept = 0;
573 virtual IPlugin& getPlugin() noexcept = 0;
579 virtual IPluginV2& getPlugin() noexcept = 0;
594 virtual void setReduceAxes(uint32_t reduceAxes) noexcept = 0;
595 virtual uint32_t getReduceAxes()
const noexcept = 0;
596 virtual void setKeepDimensions(
bool keepDimensions) noexcept = 0;
597 virtual bool getKeepDimensions()
const noexcept = 0;
603 virtual void setPrePadding(
DimsHW padding) noexcept = 0;
604 virtual DimsHW getPrePadding()
const noexcept = 0;
605 virtual void setPostPadding(
DimsHW padding) noexcept = 0;
606 virtual DimsHW getPostPadding()
const noexcept = 0;
607 virtual void setPrePaddingNd(
Dims padding) noexcept = 0;
608 virtual Dims getPrePaddingNd()
const noexcept = 0;
609 virtual void setPostPaddingNd(
Dims padding) noexcept = 0;
610 virtual Dims getPostPaddingNd()
const noexcept = 0;
616 virtual void setFirstTranspose(
const Permutation& permutation) noexcept = 0;
617 virtual const Permutation& getFirstTranspose()
const noexcept = 0;
618 virtual void setReshapeDimensions(
Dims dimensions) noexcept = 0;
619 virtual Dims getReshapeDimensions()
const noexcept = 0;
620 virtual void setSecondTranspose(
const Permutation& permutation) noexcept = 0;
621 virtual const Permutation& getSecondTranspose()
const noexcept = 0;
622 virtual void setZeroIsPlaceholder(
bool zeroIsPlaceholder) = 0;
623 virtual bool getZeroIsPlaceholder()
const = 0;
629 virtual void setStart(
Dims start) noexcept = 0;
630 virtual Dims getStart()
const noexcept = 0;
631 virtual void setSize(
Dims size) noexcept = 0;
632 virtual Dims getSize()
const noexcept = 0;
633 virtual void setStride(
Dims stride) noexcept = 0;
634 virtual Dims getStride()
const noexcept = 0;
635 virtual void setMode(
SliceMode mode) noexcept = 0;
636 virtual SliceMode getMode()
const noexcept = 0;
649 virtual void setK(int32_t k) noexcept = 0;
650 virtual int32_t getK()
const noexcept = 0;
651 virtual void setReduceAxes(uint32_t reduceAxes) noexcept = 0;
652 virtual uint32_t getReduceAxes()
const noexcept = 0;
658 virtual void setOperation(int32_t index,
MatrixOperation op) noexcept = 0;
659 virtual MatrixOperation getOperation(int32_t index)
const noexcept = 0;
675 virtual void setWeights(
Weights weights) noexcept = 0;
676 virtual Weights getWeights()
const noexcept = 0;
677 virtual void setDimensions(
Dims dimensions) noexcept = 0;
678 virtual Dims getDimensions()
const noexcept = 0;
689 virtual void setOutputDimensions(
Dims dimensions) noexcept = 0;
690 virtual Dims getOutputDimensions()
const noexcept = 0;
691 virtual void setScales(
const float* scales, int32_t nbScales) noexcept = 0;
692 virtual int32_t getScales(int32_t size,
float* scales)
const noexcept = 0;
693 virtual void setResizeMode(
ResizeMode resizeMode) noexcept = 0;
694 virtual ResizeMode getResizeMode()
const noexcept = 0;
695 virtual void setAlignCorners(
bool alignCorners) noexcept = 0;
696 virtual bool getAlignCorners()
const noexcept = 0;
699 virtual void setSelectorForSinglePixel(
ResizeSelector selector) noexcept = 0;
700 virtual ResizeSelector getSelectorForSinglePixel()
const noexcept = 0;
708 virtual ILoop* getLoop()
const noexcept = 0;
719 virtual LoopOutput getLoopOutput()
const noexcept = 0;
720 virtual void setAxis(int32_t axis) noexcept = 0;
721 virtual int32_t getAxis()
const noexcept = 0;
727 virtual TripLimit getTripLimit()
const noexcept = 0;
733 virtual void setAxis(int32_t axis) noexcept = 0;
734 virtual int32_t getAxis()
const noexcept = 0;
735 virtual void setReverse(
bool reverse) noexcept = 0;
736 virtual bool getReverse()
const noexcept = 0;
743 virtual IIteratorLayer* addIterator(
ITensor& tensor, int32_t axis = 0,
bool reverse =
false) noexcept = 0;
745 virtual void setName(
const char* name) noexcept = 0;
746 virtual const char* getName()
const noexcept = 0;
755 virtual void setDimensions(
Dims dimensions) noexcept = 0;
756 virtual Dims getDimensions()
const noexcept = 0;
759 virtual void setAlpha(
double alpha) noexcept = 0;
760 virtual double getAlpha()
const noexcept = 0;
761 virtual void setBeta(
double beta) noexcept = 0;
762 virtual double getBeta()
const noexcept = 0;
768 virtual int32_t getAxis()
const noexcept = 0;
769 virtual void setAxis(int32_t axis) noexcept = 0;
775 virtual int32_t getAxis()
const noexcept = 0;
776 virtual void setAxis(int32_t axis) noexcept = 0;
783 virtual void markOutput(
ITensor& tensor) noexcept = 0;
792 virtual ILRNLayer* addLRN(
ITensor& input, int32_t window,
float alpha,
float beta,
float k) noexcept = 0;
804 virtual int32_t getNbLayers()
const noexcept = 0;
805 virtual ILayer* getLayer(int32_t index)
const noexcept = 0;
806 virtual int32_t getNbInputs()
const noexcept = 0;
807 virtual ITensor* getInput(int32_t index)
const noexcept = 0;
808 virtual int32_t getNbOutputs()
const noexcept = 0;
809 virtual ITensor* getOutput(int32_t index)
const noexcept = 0;
821 ITensor& input, int32_t layerCount, int32_t hiddenSize, int32_t maxSeqLen,
RNNOperation op) noexcept
824 virtual void removeTensor(
ITensor& tensor) noexcept = 0;
825 virtual void unmarkOutput(
ITensor& tensor) noexcept = 0;
828 virtual void setName(
const char* name) noexcept = 0;
829 virtual const char* getName()
const noexcept = 0;
831 virtual bool hasImplicitBatchDimension()
const noexcept = 0;
832 virtual bool markOutputForShapes(
ITensor& tensor) noexcept = 0;
833 virtual bool unmarkOutputForShapes(
ITensor& tensor) noexcept = 0;
846 virtual bool hasExplicitPrecision()
const noexcept = 0;
847 virtual ILoop* addLoop() noexcept = 0;
851 virtual bool setWeightsName(
Weights weights,
const char* name) noexcept = 0;
852 virtual void setErrorRecorder(
IErrorRecorder* recorder) noexcept = 0;
861 virtual TensorFormat getTensorFormat()
const noexcept = 0;
862 virtual DataType getDataType()
const noexcept = 0;
863 virtual Dims getStrides()
const noexcept = 0;
869 virtual int64_t getImplementation()
const noexcept = 0;
870 virtual int64_t getTactic()
const noexcept = 0;
876 virtual const char* getName()
const noexcept = 0;
878 virtual int32_t getNbInputs()
const noexcept = 0;
879 virtual int32_t getNbOutputs()
const noexcept = 0;
885 virtual const IAlgorithmIOInfo& getAlgorithmIOInfo(int32_t index)
const noexcept = 0;
887 virtual float getTimingMSec()
const noexcept = 0;
888 virtual std::size_t getWorkspaceSize()
const noexcept = 0;
889 virtual const IAlgorithmIOInfo* getAlgorithmIOInfoByIndex(int32_t index)
const noexcept = 0;
896 virtual bool combine(
const ITimingCache& inputCache,
bool ignoreMismatch) noexcept = 0;
897 virtual bool reset() noexcept = 0;
903 virtual void setMinTimingIterations(int32_t minTiming) noexcept = 0;
904 virtual int32_t getMinTimingIterations()
const noexcept = 0;
905 virtual void setAvgTimingIterations(int32_t avgTiming) noexcept = 0;
906 virtual int32_t getAvgTimingIterations()
const noexcept = 0;
909 virtual void setInt8Calibrator(
IInt8Calibrator* calibrator) noexcept = 0;
911 virtual void setMaxWorkspaceSize(std::size_t workspaceSize) noexcept = 0;
912 virtual std::size_t getMaxWorkspaceSize()
const noexcept = 0;
913 virtual void setFlags(
BuilderFlags builderFlags) noexcept = 0;
915 virtual void clearFlag(
BuilderFlag builderFlag) noexcept = 0;
916 virtual void setFlag(
BuilderFlag builderFlag) noexcept = 0;
917 virtual bool getFlag(
BuilderFlag builderFlag)
const noexcept = 0;
918 virtual void setDeviceType(
const ILayer* layer,
DeviceType deviceType) noexcept = 0;
920 virtual bool isDeviceTypeSet(
const ILayer* layer)
const noexcept = 0;
921 virtual void resetDeviceType(
const ILayer* layer) noexcept = 0;
922 virtual bool canRunOnDLA(
const ILayer* layer)
const noexcept = 0;
923 virtual void setDLACore(int32_t dlaCore) noexcept = 0;
924 virtual int32_t getDLACore()
const noexcept = 0;
925 virtual void setDefaultDeviceType(
DeviceType deviceType) noexcept = 0;
926 virtual DeviceType getDefaultDeviceType()
const noexcept = 0;
927 virtual void reset() noexcept = 0;
928 virtual void setProfileStream(
const cudaStream_t stream) noexcept = 0;
929 virtual cudaStream_t getProfileStream()
const noexcept = 0;
931 virtual int32_t getNbOptimizationProfiles()
const noexcept = 0;
943 virtual bool setTacticSources(
TacticSources tacticSources) noexcept = 0;
946 virtual bool setTimingCache(
const ITimingCache& cache,
bool ignoreMismatch) noexcept = 0;
953 virtual void setMaxBatchSize(int32_t batchSize) noexcept = 0;
954 virtual int32_t getMaxBatchSize()
const noexcept = 0;
955 virtual bool platformHasFastFp16()
const noexcept = 0;
956 virtual bool platformHasFastInt8()
const noexcept = 0;
957 virtual int32_t getMaxDLABatchSize()
const noexcept = 0;
958 virtual int32_t getNbDLACores()
const noexcept = 0;
959 virtual void setGpuAllocator(
IGpuAllocator* allocator) noexcept = 0;
965 virtual void setErrorRecorder(
IErrorRecorder* recorder) noexcept = 0;
967 virtual void reset() noexcept = 0;
968 virtual bool platformHasTf32()
const noexcept = 0;
976 #endif // NV_INFER_RUNTIME_IMPL_H
LoopOutput
Enum that describes kinds of loop outputs.
Definition: NvInfer.h:4739
Definition: NvInferImpl.h:858
Definition: NvInferImpl.h:613
SliceMode
Controls how ISliceLayer handles out of bounds coordinates.
Definition: NvInfer.h:3863
A fully connected layer in a network definition. This layer expects an input tensor of three or more ...
Definition: NvInfer.h:1439
Definition: NvInfer.h:3684
Layer that represents a constant value.
Definition: NvInfer.h:4290
Layer that represents a parametric ReLU operation.
Definition: NvInfer.h:4353
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:211
Definition: NvInferImpl.h:705
A network definition for input to the builder.
Definition: NvInfer.h:5435
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:145
provides a unique 128-bit identifier, which along with the input and output information denotes the v...
Definition: NvInfer.h:6830
Layer that represents a padding operation.
Definition: NvInfer.h:3572
Definition: NvInferImpl.h:866
A convolution layer in a network definition.
Definition: NvInfer.h:987
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:163
A layer that represents the identity function.
Definition: NvInfer.h:4276
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:621
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations.
Definition: NvInfer.h:7098
Definition: NvInferImpl.h:403
Interface implemented by application for selecting and reporting algorithms of a layer provided by th...
Definition: NvInfer.h:6983
RNNInputMode
Enumerates the RNN input modes that may occur with an RNN layer.
Definition: NvInfer.h:3032
Definition: NvInfer.h:4892
Plugin class for user-implemented layers.
Definition: NvInferRuntimeCommon.h:409
Carries information about input or output of the algorithm. IAlgorithmIOInfo for all the input and ou...
Definition: NvInfer.h:6787
Definition: NvInferRuntimeCommon.h:189
Definition: NvInferImpl.h:467
uint32_t TensorFormats
It is capable of representing one or more TensorFormat by binary OR operations, e....
Definition: NvInfer.h:141
Definition: NvInferImpl.h:752
Definition: NvInferImpl.h:536
An RNN layer in a network definition, version 2.
Definition: NvInfer.h:3080
Descriptor for two-dimensional spatial data.
Definition: NvInferLegacyDims.h:98
Definition: NvInferImpl.h:724
ResizeMode
Enumerates various modes of resize in the resize layer. Resize mode set using setResizeMode().
Definition: NvInfer.h:4365
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntimeCommon.h:220
Definition: NvInfer.h:4834
Definition: NvInferImpl.h:489
Definition: NvInferImpl.h:252
Definition: NvInferImpl.h:738
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:1231
uint32_t QuantizationFlags
Represents one or more QuantizationFlag values using binary OR operations.
Definition: NvInfer.h:7025
ActivationType
Enumerates the types of activation to perform in an activation layer.
Definition: NvInfer.h:148
A RaggedSoftmax layer in a network definition.
Definition: NvInfer.h:4258
Definition: NvInferImpl.h:196
Definition: NvInferImpl.h:667
ElementWiseOperation
Enumerates the binary operations that may be performed by an ElementWise layer.
Definition: NvInfer.h:2773
Definition: NvInferImpl.h:589
Slices an input tensor into an output tensor based on the offset and strides.
Definition: NvInfer.h:3906
int32_t uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferImpl.h:158
Definition: NvInferImpl.h:882
Definition: NvInferImpl.h:482
ScaleMode
Controls how shift, scale and power are applied in a Scale layer.
Definition: NvInfer.h:2088
Definition: NvInferImpl.h:626
ReduceOperation
Enumerates the reduce operations that may be performed by a Reduce layer.
Definition: NvInfer.h:3471
Definition: NvInfer.h:4905
Definition: NvInferImpl.h:765
BuilderFlag
List of valid modes that the builder can enable when creating an engine from a network definition.
Definition: NvInfer.h:7064
The TensorRT API version 1 namespace.
Base class for all layer classes in a network definition.
Definition: NvInfer.h:511
Definition: NvInferImpl.h:347
Definition: NvInfer.h:4946
Layer type for pluginV2.
Definition: NvInfer.h:3361
Definition: NvInferImpl.h:672
A elementwise layer in a network definition.
Definition: NvInfer.h:2815
Definition: NvInferImpl.h:662
RNNGateType
Identifies an individual gate within an RNN cell.
Definition: NvInfer.h:3052
struct CUstream_st * cudaStream_t
Forward declaration of cudaStream_t.
Definition: NvInferRuntimeCommon.h:107
Definition: NvInferImpl.h:322
UnaryOperation
Enumerates the unary operations that may be performed by a Unary layer.
Definition: NvInfer.h:3386
Class to handle tactic timing info collected from builder.
Definition: NvInfer.h:7142
Definition: NvInferImpl.h:772
Layer type for getting shape of a tensor.
Definition: NvInfer.h:4053
Definition: NvInferImpl.h:205
struct CUevent_st * cudaEvent_t
Forward declaration of cudaEvent_t.
Definition: NvInferRuntimeCommon.h:110
Definition: NvInferImpl.h:644
NetworkDefinitionCreationFlag
List of immutable network properties expressed at network creation time. NetworkDefinitionCreationFla...
Definition: NvInfer.h:7816
PaddingMode
Enumerates the modes of padding to perform in convolution, deconvolution and pooling layer,...
Definition: NvInfer.h:955
MatrixOperation
Enumerates the operations that may be performed on a tensor by IMatrixMultiplyLayer before multiplica...
Definition: NvInfer.h:4161
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:1055
PoolingType
The type of pooling to perform in a pooling layer.
Definition: NvInfer.h:1622
ResizeSelector
The coordinate selector when resize to single pixel output.
Definition: NvInfer.h:4438
Definition: NvInferImpl.h:414
Definition: NvInferImpl.h:237
DataType
The type of weights and tensors.
Definition: NvInferRuntimeCommon.h:150
A concatenation layer in a network definition.
Definition: NvInfer.h:2327
Layer type for shuffling data.
Definition: NvInfer.h:3707
Definition: NvInferImpl.h:368
RNNOperation
Enumerates the RNN operations that may be performed by an RNN layer.
Definition: NvInfer.h:2982
A tensor in a network definition.
Definition: NvInfer.h:187
Definition: NvInfer.h:4789
Definition: NvInferImpl.h:600
Definition: NvInferImpl.h:582
LayerType
The type values of layer classes.
Definition: NvInfer.h:89
Definition: NvInferImpl.h:181
Definition: NvInferImpl.h:900
A LRN layer in a network definition.
Definition: NvInfer.h:1988
RNNDirection
Enumerates the RNN direction that may be performed by an RNN layer.
Definition: NvInfer.h:3004
TripLimit
Enum that describes kinds of trip limits.
Definition: NvInfer.h:4759
Layer that represents a reduction operator across Shape, Int32, Float, and Half tensors.
Definition: NvInfer.h:3494
Layer that represents a Matrix Multiplication.
Definition: NvInfer.h:4215
Definition: NvInferImpl.h:425
FillOperation
Enumerates the tensor fill operations that may performed by a fill layer.
Definition: NvInfer.h:5051
Definition: NvInferImpl.h:655
Generate an output tensor with specified mode.
Definition: NvInfer.h:5089
QuantizationFlag
List of valid flags for quantizing the network to int8.
Definition: NvInfer.h:7034
Application-implemented interface for calibration.
Definition: NvInfer.h:6605
Definition: NvInferImpl.h:570
Reference counted application-implemented error reporting interface for TensorRT objects.
Definition: NvInferRuntimeCommon.h:1353
Definition: NvInferImpl.h:496
Layer that represents a TopK reduction.
Definition: NvInfer.h:4085
ResizeRoundMode
The rounding mode for nearest neighbor resize.
Definition: NvInfer.h:4465
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:1019
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:599
Definition: NvInfer.h:5037
Definition: NvInferImpl.h:748
uint32_t NetworkDefinitionCreationFlags
Represents one or more NetworkDefinitionCreationFlag flags using binary OR operations....
Definition: NvInfer.h:7805
Definition: NvInfer.h:2852
Definition: NvInferImpl.h:529
Definition: NvInferRuntime.h:263
Definition: NvInferImpl.h:545
uint32_t BuilderFlags
Represents one or more QuantizationFlag values using binary OR operations, e.g., 1U << BuilderFlag::k...
Definition: NvInfer.h:7055
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:103
Definition: NvInferImpl.h:711
ResizeCoordinateTransformation
The resize coordinate transformation function.
Definition: NvInfer.h:4388
Definition: NvInferImpl.h:454
Application-implemented interface for profiling.
Definition: NvInferRuntime.h:579
Definition: NvInferImpl.h:892
Definition: NvInferImpl.h:576
A Scale layer in a network definition.
Definition: NvInfer.h:2128
A Softmax layer in a network definition.
Definition: NvInfer.h:2263
Holds properties for configuring a builder to produce an engine.
Definition: NvInfer.h:7204
Definition: NvInferImpl.h:189
Definition: NvInferImpl.h:681
Definition: NvInferImpl.h:175
Definition: NvInferImpl.h:291
A resize layer in a network definition.
Definition: NvInfer.h:4511
Definition: NvInferImpl.h:639
Definition: NvInferImpl.h:686
A Pooling layer in a network definition.
Definition: NvInfer.h:1650
Definition: NvInferImpl.h:779
Definition: NvInferImpl.h:873
Application-implemented class for controlling allocation on the GPU.
Definition: NvInferRuntimeCommon.h:1093
Layer that represents an unary operation.
Definition: NvInfer.h:3425
Definition: NvInferImpl.h:950
Definition: NvInferImpl.h:716
A Quantize layer in a network definition.
Definition: NvInfer.h:5297
Definition: NvInferImpl.h:219
TopKOperation
Enumerates the operations that may be performed by a TopK layer.
Definition: NvInfer.h:4065
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:235
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:1745
An Activation layer in a network definition.
Definition: NvInfer.h:1540
CalibrationAlgoType
Version of calibration algorithm to use.
Definition: NvInfer.h:6579
Definition: NvInferImpl.h:730
A Dequantize layer in a network definition.
Definition: NvInfer.h:5384
A deconvolution layer in a network definition.
Definition: NvInfer.h:2367