drive/driveworks-4.0/dnn__usecase2_8md_source.html

# Copyright (c) 2020 NVIDIA CORPORATION.  All rights reserved.

@page dnn_usecase2 DNN Tensors

This code snippet demonstrates the how the DNN module with DNN Tensors is typically used. Note that error handling is left out for clarity.

Initialize network from file.

If the model has been generated on DLA using `--useDLA` option with tensorrt_optimization tool,
the processor type should be either `::DW_PROCESSOR_TYPE_DLA_0` or `::DW_PROCESSOR_TYPE_DLA_1` depending on which DLA engine the inference should take place.
Otherwise, the processor type should always be `::DW_PROCESSOR_TYPE_GPU`.

`contextHandle` is assumed to be a previously initialized `::dwContextHandle_t`.

```{.cpp}
    // Load the DNN from a file. Note that the DNN model has to be generated with the tensorrt_optimization tool.
    dwDNNHandle_t dnn = nullptr;
    dwDNN_initializeTensorRTFromFile(&dnn, "network.fp32", nullptr, DW_PROCESSOR_TYPE_GPU, contextHandle);
```

Check that the loaded network has the expected number of inputs and outputs.

```{.cpp}
    // Find out the number of input and output blobs in the netowrk
    uint32_t numInputs = 0;
    uint32_t numOutputs = 0;
    dwDNN_getInputBlobCount(&numInputs, dnn);
    dwDNN_getOutputBlobCount(&numOutputs, dnn);

    if (numInputs != 1) {
        std::cerr << "Expected a DNN with one input blob." << std::endl;
        return -1;
    }
    if (numOutputs != 2) {
        std::cerr << "Expected a DNN with two output blobs." << std::endl;
        return -1;
    }
```

Ask the DNN about the order of the input and output blobs. The network is assumed to contain the input blob "data_in" and output blobs "data_out1" and "data_out2".

```{.cpp}
    uint32_t inputIndex = 0;
    uint32_t output1Index = 0;
    uint32_t output2Index = 0;

    // Find indices of blobs by their name.
    dwDNN_getInputIndex(&inputIndex, "data_in", dnn);
    dwDNN_getOutputIndex(&output1Index, "data_out1", dnn);
    dwDNN_getOutputIndex(&output2Index, "data_out2", dnn);
```

Initialize tensors.

```{.cpp}
    // Get tensor properties and allocate tensors.
    dwDNNTensorHandle_t inputTensor;
    dwDNNTensorProperties inputProps;
    dwDNN_getInputTensorProperties(&inputProps, inputIndex, dnn));
    dwDNNTensor_create(&inputTensor, &inputProps, contextHandle);

    dwDNNTensorHandle_t outputTensor1;
    dwDNNTensorProperties outputProps1;
    dwDNNTensorHandle_t outputTensor2;
    dwDNNTensorProperties outputProps2;
    dwDNN_getOutputTensorProperties(&outputProps1, output1Index, dnn));
    dwDNN_getOutputTensorProperties(&outputProps2, output2Index, dnn));

    dwDNNTensor_create(&outputTensor1, &outputProps1, contextHandle);
    dwDNNTensor_create(&outputTensor2, &outputProps2, contextHandle);

    // Create data conditioner to convert an input image to input tensor.
    dwDNNMetaData dnnMetaData;
    dwDNN_getMetaData(&dnnMetaData, dnn);
    dwDataConditionerHandle_t dataConditioner;
    dwDataConditioner_initializeFromTensorProperties(&dataConditioner, &inputProps, 1U,
                                                     &metadata.dataConditionerParams, cudaStream,
                                                     contextHandle);

    // Create CPU tensors for outputs.
    dwDNNTensorHandle_t outputTensorHost1;
    dwDNNTensorProperties outputPropsHost1 = outputProps1;
    outputPropsHost1.tensorType = DW_DNN_TENSOR_TYPE_CPU;
    dwDNNTensorHandle_t outputTensorHost2;
    dwDNNTensorProperties outputPropsHost2 = outputProps2;
    outputPropsHost2.tensorType = DW_DNN_TENSOR_TYPE_CPU;

    dwDNNTensor_create(&outputTensorHost1, &outputPropsHost1, contextHandle);
    dwDNNTensor_create(&outputTensorHost2, &outputPropsHost2, contextHandle);

    // Create tensor streamers to stream outputs from GPU to CPU if needed
    dwDNNTensorStreamerHandle_t streamer1;
    dwDNNTensorStreamerHandle_t streamer2;
    dwDNNTensorStreamer_initialize(&streamer1, &outputPropsHost1, outputPropsHost1.tensorType, m_sdk);
    dwDNNTensorStreamer_initialize(&streamer2, &outputPropsHost2, outputPropsHost2.tensorType, m_sdk);
```

Convert DNN input from image to tensor, then perform DNN inference and stream results back. All operations are performed asynchronously with the host code.

```{.cpp}
    // Run data conditioner to get input tensor
    dwRect roi{0U, 0U, imageWidth, imageHeight};
    dwDataConditioner_prepareData(inputTensor, &inputImage, 1, &roi,
                                  cudaAddressModeClamp, dataConditioner);

    // Begin DNN inference in the currently selected CUDA stream.
    dwConstDNNTensorHandle_t inputs[1U] = {inputTensor};
    dwDNNTensorHandle_t outputs[2U] = {outputTensor1, outputTensor2};
    dwDNN_infer(outputs, inputs, dnn);

    // Stream results from GPU to CPU
    dwDNNTensorStreamer_producerSend(outputTensor1, streamer1);
    dwDNNTensorStreamer_consumerReceive(&outputTensorHost1, streamer1);

    dwDNNTensorStreamer_producerSend(outputTensor2, streamer2);
    dwDNNTensorStreamer_consumerReceive(&outputTensorHost2, streamer2);

    // Work on received output tensors.
    void* data1;
    void* data2;
    dwDNNTensor_lock(&data1, outputTensorHost1);
    dwDNNTensor_lock(&data2, outputTensorHost2);

    doit(data1, data2);

    dwDNNTensor_unlock(outputTensorHost1);
    dwDNNTensor_unlock(outputTensorHost2);

    // Return streamed tensors.
    dwDNNTensorStreamer_consumerReturn(&outputTensorHost1, streamer1);
    dwDNNTensorStreamer_producerReturn(nullptr, 1000, streamer1);
    dwDNNTensorStreamer_consumerReturn(&outputTensorHost2, streamer2);
    dwDNNTensorStreamer_producerReturn(nullptr, 1000, streamer2);
```

Finally, free previously allocated memory.

```{.cpp}
    dwDNNTensor_destroy(outputTensor1);
    dwDNNTensor_destroy(outputTensor2);
    dwDNNTensor_destroy(outputTensorHost1);
    dwDNNTensor_destroy(outputTensorHost2);
    dwDNNTensorStreamer_release(streamer1);
    dwDNNTensorStreamer_release(streamer2);
    dwDataConditioner_release(dataconditioner);
    dwDNN_release(dnn);
```

For more information see:
- @ref dwx_sample_dnn_tensor