1 # Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
3 @page dnn_usecase2 DNN Tensors
5 This code snippet demonstrates the how the DNN module with DNN Tensors is typically used. Note that error handling is left out for clarity.
7 Initialize network from file.
9 If the model has been generated on DLA using `--useDLA` option with tensorrt_optimization tool,
10 the processor type should be either `::DW_PROCESSOR_TYPE_DLA_0` or `::DW_PROCESSOR_TYPE_DLA_1` depending on which DLA engine the inference should take place.
11 Otherwise, the processor type should always be `::DW_PROCESSOR_TYPE_GPU`.
13 `contextHandle` is assumed to be a previously initialized `::dwContextHandle_t`.
16 // Load the DNN from a file. Note that the DNN model has to be generated with the tensorrt_optimization tool.
17 dwDNNHandle_t dnn = nullptr;
18 dwDNN_initializeTensorRTFromFile(&dnn, "network.fp32", nullptr, DW_PROCESSOR_TYPE_GPU, contextHandle);
21 Check that the loaded network has the expected number of inputs and outputs.
24 // Find out the number of input and output blobs in the netowrk
25 uint32_t numInputs = 0;
26 uint32_t numOutputs = 0;
27 dwDNN_getInputBlobCount(&numInputs, dnn);
28 dwDNN_getOutputBlobCount(&numOutputs, dnn);
31 std::cerr << "Expected a DNN with one input blob." << std::endl;
34 if (numOutputs != 2) {
35 std::cerr << "Expected a DNN with two output blobs." << std::endl;
40 Ask the DNN about the order of the input and output blobs. The network is assumed to contain the input blob "data_in" and output blobs "data_out1" and "data_out2".
43 uint32_t inputIndex = 0;
44 uint32_t output1Index = 0;
45 uint32_t output2Index = 0;
47 // Find indices of blobs by their name.
48 dwDNN_getInputIndex(&inputIndex, "data_in", dnn);
49 dwDNN_getOutputIndex(&output1Index, "data_out1", dnn);
50 dwDNN_getOutputIndex(&output2Index, "data_out2", dnn);
56 // Get tensor properties and allocate tensors.
57 dwDNNTensorHandle_t inputTensor;
58 dwDNNTensorProperties inputProps;
59 dwDNN_getInputTensorProperties(&inputProps, inputIndex, dnn));
60 dwDNNTensor_create(&inputTensor, &inputProps, contextHandle);
62 dwDNNTensorHandle_t outputTensor1;
63 dwDNNTensorProperties outputProps1;
64 dwDNNTensorHandle_t outputTensor2;
65 dwDNNTensorProperties outputProps2;
66 dwDNN_getOutputTensorProperties(&outputProps1, output1Index, dnn));
67 dwDNN_getOutputTensorProperties(&outputProps2, output2Index, dnn));
69 dwDNNTensor_create(&outputTensor1, &outputProps1, contextHandle);
70 dwDNNTensor_create(&outputTensor2, &outputProps2, contextHandle);
72 // Create data conditioner to convert an input image to input tensor.
73 dwDNNMetaData dnnMetaData;
74 dwDNN_getMetaData(&dnnMetaData, dnn);
75 dwDataConditionerHandle_t dataConditioner;
76 dwDataConditioner_initializeFromTensorProperties(&dataConditioner, &inputProps, 1U,
77 &metadata.dataConditionerParams, cudaStream,
80 // Create CPU tensors for outputs.
81 dwDNNTensorHandle_t outputTensorHost1;
82 dwDNNTensorProperties outputPropsHost1 = outputProps1;
83 outputPropsHost1.tensorType = DW_DNN_TENSOR_TYPE_CPU;
84 dwDNNTensorHandle_t outputTensorHost2;
85 dwDNNTensorProperties outputPropsHost2 = outputProps2;
86 outputPropsHost2.tensorType = DW_DNN_TENSOR_TYPE_CPU;
88 dwDNNTensor_create(&outputTensorHost1, &outputPropsHost1, contextHandle);
89 dwDNNTensor_create(&outputTensorHost2, &outputPropsHost2, contextHandle);
91 // Create tensor streamers to stream outputs from GPU to CPU if needed
92 dwDNNTensorStreamerHandle_t streamer1;
93 dwDNNTensorStreamerHandle_t streamer2;
94 dwDNNTensorStreamer_initialize(&streamer1, &outputPropsHost1, outputPropsHost1.tensorType, m_sdk);
95 dwDNNTensorStreamer_initialize(&streamer2, &outputPropsHost2, outputPropsHost2.tensorType, m_sdk);
98 Convert DNN input from image to tensor, then perform DNN inference and stream results back. All operations are performed asynchronously with the host code.
101 // Run data conditioner to get input tensor
102 dwRect roi{0U, 0U, imageWidth, imageHeight};
103 dwDataConditioner_prepareData(inputTensor, &inputImage, 1, &roi,
104 cudaAddressModeClamp, dataConditioner);
106 // Begin DNN inference in the currently selected CUDA stream.
107 dwConstDNNTensorHandle_t inputs[1U] = {inputTensor};
108 dwDNNTensorHandle_t outputs[2U] = {outputTensor1, outputTensor2};
109 dwDNN_infer(outputs, inputs, dnn);
111 // Stream results from GPU to CPU
112 dwDNNTensorStreamer_producerSend(outputTensor1, streamer1);
113 dwDNNTensorStreamer_consumerReceive(&outputTensorHost1, streamer1);
115 dwDNNTensorStreamer_producerSend(outputTensor2, streamer2);
116 dwDNNTensorStreamer_consumerReceive(&outputTensorHost2, streamer2);
118 // Work on received output tensors.
121 dwDNNTensor_lock(&data1, outputTensorHost1);
122 dwDNNTensor_lock(&data2, outputTensorHost2);
126 dwDNNTensor_unlock(outputTensorHost1);
127 dwDNNTensor_unlock(outputTensorHost2);
129 // Return streamed tensors.
130 dwDNNTensorStreamer_consumerReturn(&outputTensorHost1, streamer1);
131 dwDNNTensorStreamer_producerReturn(nullptr, 1000, streamer1);
132 dwDNNTensorStreamer_consumerReturn(&outputTensorHost2, streamer2);
133 dwDNNTensorStreamer_producerReturn(nullptr, 1000, streamer2);
136 Finally, free previously allocated memory.
139 dwDNNTensor_destroy(outputTensor1);
140 dwDNNTensor_destroy(outputTensor2);
141 dwDNNTensor_destroy(outputTensorHost1);
142 dwDNNTensor_destroy(outputTensorHost2);
143 dwDNNTensorStreamer_release(streamer1);
144 dwDNNTensorStreamer_release(streamer2);
145 dwDataConditioner_release(dataconditioner);
149 For more information see:
150 - @ref dwx_sample_dnn_tensor