Migrating from enqueueV2 to enqueueV3 (C++)#

The examples below show TensorRT 8.x first, then TensorRT 10.x, for the same inference task. In TensorRT 10.x, enqueueV3 replaces enqueueV2: call setTensorAddress for each I/O tensor (using names from getIOTensorName) before enqueueV3, as shown in the After tab.

 1// Create RAII buffer manager object.
 2samplesCommon::BufferManager buffers(mEngine);
 3
 4auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
 5if (!context)
 6{
 7    return false;
 8}
 9
10// Pick a random digit to try to infer.
11srand(time(NULL));
12int32_t const digit = rand() % 10;
13
14// Read the input data into the managed buffers.
15// There should be just 1 input tensor.
16ASSERT(mParams.inputTensorNames.size() == 1);
17
18if (!processInput(buffers, mParams.inputTensorNames[0], digit))
19{
20    return false;
21}
22// Create a CUDA stream to execute this inference.
23cudaStream_t stream;
24CHECK(cudaStreamCreate(&stream));
25
26// Asynchronously copy data from host input buffers to device input
27buffers.copyInputToDeviceAsync(stream);
28
29// Asynchronously enqueue the inference work
30if (!context->enqueueV2(buffers.getDeviceBindings().data(), stream, nullptr))
31{
32    return false;
33}
34// Asynchronously copy data from device output buffers to host output buffers.
35buffers.copyOutputToHostAsync(stream);
36
37// Wait for the work in the stream to complete.
38CHECK(cudaStreamSynchronize(stream));
39
40// Release stream.
41CHECK(cudaStreamDestroy(stream));
 1// Create RAII buffer manager object.
 2samplesCommon::BufferManager buffers(mEngine);
 3
 4auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
 5if (!context)
 6{
 7    return false;
 8}
 9
10for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
11{
12    auto const name = mEngine->getIOTensorName(i);
13    context->setTensorAddress(name, buffers.getDeviceBuffer(name));
14}
15
16// Pick a random digit to try to infer.
17srand(time(NULL));
18int32_t const digit = rand() % 10;
19
20// Read the input data into the managed buffers.
21// There should be just 1 input tensor.
22ASSERT(mParams.inputTensorNames.size() == 1);
23
24if (!processInput(buffers, mParams.inputTensorNames[0], digit))
25{
26    return false;
27}
28// Create a CUDA stream to execute this inference.
29cudaStream_t stream;
30CHECK(cudaStreamCreate(&stream));
31
32// Asynchronously copy data from host input buffers to device input
33buffers.copyInputToDeviceAsync(stream);
34
35// Asynchronously enqueue the inference work
36if (!context->enqueueV3(stream))
37{
38    return false;
39}
40
41// Asynchronously copy data from device output buffers to host output buffers.
42buffers.copyOutputToHostAsync(stream);
43
44// Wait for the work in the stream to complete.
45CHECK(cudaStreamSynchronize(stream));
46
47// Release stream.
48CHECK(cudaStreamDestroy(stream));

Summary of Changes#

  • Added explicit tensor address setup using setTensorAddress() with tensor names from getIOTensorName()

  • Changed from enqueueV2() to enqueueV3()

  • The bindings parameter is no longer passed to enqueueV3(); tensor addresses must be set beforehand using setTensorAddress()

Migrating Dims and IShapeLayer to 64-Bit Types#

Warning

This is a breaking ABI change. Code that bitwise copies to or from Dims must be updated for the wider type.

TensorRT 10.x changes the dimension type from int32_t to int64_t. The dimensions held by Dims changed from int32_t to int64_t. However, in TensorRT 10.x, TensorRT will generally reject networks that use dimensions exceeding the range of int32_t. The tensor type returned by IShapeLayer is now DataType::kINT64. Use ICastLayer to cast the result to the tensor of type DataType::kINT32 if 32-bit dimensions are required.

Inspect code that bitwise copies to and from Dims to ensure it is correct for int64_t dimensions.