Overview

The Stereo Disparity application receives left and right stereo pair images and returns the disparity between them, which is a function of image depth. The result is saved as an image file to disk. If available, it'll also output the corresponding confidence map.

Instructions

The command line parameters are:

where

backend: either cpu, cuda, pva or pva-nvenc-vic; it defines the backend that will perform the processing. pva-nvenc-vic and cuda allows output of the confidence map in addition to the disparity.
left image: left input image of a rectified stereo pair, it accepts png, jpeg and possibly others.
right image: right input image of a stereo pair.

Here's one example:

C++
./vpi_sample_02_stereo_disparity cuda ../assets/chair_stereo_left.png ../assets/chair_stereo_right.png
Python
python main.py cuda ../assets/chair_stereo_left.png ../assets/chair_stereo_right.png

This is using the CUDA backend and the provided sample images. You can try with other stereo pair images, respecting the constraints imposed by the algorithm.

Results

Left input image	Right input image

Stereo disparity	Confidence map

Source Code

For convenience, here's the code that is also installed in the samples directory.

Language: C++ Python

 import cv2
 import sys
 import vpi
 import numpy as np
 from PIL import Image
 from argparse import ArgumentParser
  
 # ----------------------------
 # Parse command line arguments
  
 parser = ArgumentParser()
 parser.add_argument('backend', choices=['cpu','cuda','pva','pva-nvenc-vic'],
                     help='Backend to be used for processing')
  
 parser.add_argument('left',
                     help='Rectified left input image from a stereo pair')
  
 parser.add_argument('right',
                     help='Rectified right input image from a stereo pair')
  
 args = parser.parse_args();
  
 # pixel value scaling factor when loading input
 scale=1
  
 if args.backend == 'cpu':
     backend = vpi.Backend.CPU
 elif args.backend == 'cuda':
     backend = vpi.Backend.CUDA
 elif args.backend == 'pva':
     backend = vpi.Backend.PVA
 else:
     assert args.backend == 'pva-nvenc-vic'
     backend = vpi.Backend.PVA|vpi.Backend.NVENC|vpi.Backend.VIC
  
     # For PVA+NVENC+VIC mode, 16bpp input must be MSB-aligned, which
     # is equivalent to say that it is Q8.8 (fixed-point, 8 decimals).
     scale=256
  
 # Streams for left and right independent pre-processing
 streamLeft = vpi.Stream()
 streamRight = vpi.Stream()
  
 # --------------------------------------------------------------
 # Load input into a vpi.Image and convert it to grayscale, 16bpp
 with vpi.Backend.CUDA:
     with streamLeft:
         left = vpi.asimage(np.asarray(Image.open(args.left))).convert(vpi.Format.Y16_ER, scale=scale)
     with streamRight:
         right = vpi.asimage(np.asarray(Image.open(args.right))).convert(vpi.Format.Y16_ER, scale=scale)
  
 # --------------------------------------------------------------
 # Preprocess input
  
 # Block linear format is needed for pva-nvenc-vic pipeline
 # Currently we can only convert to block-linear using VIC backend.
 # The input also must be 1080p
 if args.backend == 'pva-nvenc-vic':
     with vpi.Backend.VIC:
         with streamLeft:
             left = left.convert(vpi.Format.Y16_ER_BL).rescale((1920,1080))
         with streamRight:
             right = right.convert(vpi.Format.Y16_ER_BL).rescale((1920,1080))
     maxDisparity = 256
 else:
     maxDisparity = 64
  
 if args.backend == 'pva-nvenc-vic' or args.backend == 'cuda':
     # only PVA-NVENC-VIC and CUDA have confidence map
     confidenceMap = vpi.Image(left.size, vpi.Format.U16)
 else:
     confidenceMap = None
  
 # Use stream left to consolidate actual stereo processing
 streamStereo = streamLeft
  
 # ---------------------------------------------
 # Estimate stereo disparity
 with streamStereo, backend:
     disparity = vpi.stereodisp(left, right, out_confmap=confidenceMap, window=5, maxdisp=maxDisparity)
  
 # ---------------------------------------------
 # Postprocess results and save them to disk
 with streamStereo, vpi.Backend.CUDA:
     # Scale disparity and confidence map so that values like between 0 and 255.
  
     # Disparities are in Q10.5 format, so to map it to float, it gets
     # divided by 32. Then the resulting disparity range, from 0 to
     # stereo.maxDisparity gets mapped to 0-255 for proper output.
     disparity = disparity.convert(vpi.Format.U8, scale=255.0/(32*maxDisparity))
  
     # Apply JET colormap to turn the disparities into color, reddish hues
     # represent objects closer to the camera, blueish are farther away.
     disparityColor = cv2.applyColorMap(disparity.cpu(), cv2.COLORMAP_JET)
  
     # Converts to RGB for output with PIL
     disparityColor = cv2.cvtColor(disparityColor, cv2.COLOR_BGR2RGB)
  
     if confidenceMap:
         confidenceMap = confidenceMap.convert(vpi.Format.U8, scale=255.0/65535)
  
         # When pixel confidence is 0, its color in the disparity
         # output is black.
         mask = cv2.threshold(confidenceMap.cpu(), 1, 255, cv2.THRESH_BINARY)[1]
         mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
         disparityColor = cv2.bitwise_and(disparityColor, mask)
  
 # -------------------
 # Save result to disk
  
 Image.fromarray(disparityColor).save('disparity_python'+str(sys.version_info[0])+'_'+args.backend+'.png')
  
 if confidenceMap:
     Image.fromarray(confidenceMap.cpu()).save('confidence_python'+str(sys.version_info[0])+'_'+args.backend+'.png')
  
 # vim: ts=8:sw=4:sts=4:et:ai

 #include <opencv2/core/version.hpp>
 #if CV_MAJOR_VERSION >= 3
 #    include <opencv2/imgcodecs.hpp>
 #else
 #    include <opencv2/contrib/contrib.hpp> // for colormap
 #    include <opencv2/highgui/highgui.hpp>
 #endif
  
 #include <opencv2/imgproc/imgproc.hpp>
 #include <vpi/OpenCVInterop.hpp>
  
 #include <vpi/Image.h>
 #include <vpi/Status.h>
 #include <vpi/Stream.h>
 #include <vpi/algo/ConvertImageFormat.h>
 #include <vpi/algo/Rescale.h>
 #include <vpi/algo/StereoDisparity.h>
  
 #include <cstring> // for memset
 #include <iostream>
 #include <sstream>
  
 #define CHECK_STATUS(STMT)                                    \
     do                                                        \
     {                                                         \
         VPIStatus status = (STMT);                            \
         if (status != VPI_SUCCESS)                            \
         {                                                     \
             char buffer[VPI_MAX_STATUS_MESSAGE_LENGTH];       \
             vpiGetLastStatusMessage(buffer, sizeof(buffer));  \
             std::ostringstream ss;                            \
             ss << vpiStatusGetName(status) << ": " << buffer; \
             throw std::runtime_error(ss.str());               \
         }                                                     \
     } while (0);
  
 int main(int argc, char *argv[])
 {
     // OpenCV image that will be wrapped by a VPIImage.
     // Define it here so that it's destroyed *after* wrapper is destroyed
     cv::Mat cvImageLeft, cvImageRight;
  
     // VPI objects that will be used
     VPIImage inLeft        = NULL;
     VPIImage inRight       = NULL;
     VPIImage tmpLeft       = NULL;
     VPIImage tmpRight      = NULL;
     VPIImage stereoLeft    = NULL;
     VPIImage stereoRight   = NULL;
     VPIImage disparity     = NULL;
     VPIImage confidenceMap = NULL;
     VPIStream stream       = NULL;
     VPIPayload stereo      = NULL;
  
     int retval = 0;
  
     try
     {
         // =============================
         // Parse command line parameters
  
         if (argc != 4)
         {
             throw std::runtime_error(std::string("Usage: ") + argv[0] +
                                      " <cpu|pva|cuda|pva-nvenc-vic> <left image> <right image>");
         }
  
         std::string strBackend       = argv[1];
         std::string strLeftFileName  = argv[2];
         std::string strRightFileName = argv[3];
  
         uint32_t backends;
  
         if (strBackend == "cpu")
         {
             backends = VPI_BACKEND_CPU;
         }
         else if (strBackend == "cuda")
         {
             backends = VPI_BACKEND_CUDA;
         }
         else if (strBackend == "pva")
         {
             backends = VPI_BACKEND_PVA;
         }
         else if (strBackend == "pva-nvenc-vic")
         {
             backends = VPI_BACKEND_PVA | VPI_BACKEND_NVENC | VPI_BACKEND_VIC;
         }
         else
         {
             throw std::runtime_error("Backend '" + strBackend +
                                      "' not recognized, it must be either cpu, cuda, pva or pva-nvenc-vic.");
         }
  
         // =====================
         // Load the input images
         cvImageLeft = cv::imread(strLeftFileName);
         if (cvImageLeft.empty())
         {
             throw std::runtime_error("Can't open '" + strLeftFileName + "'");
         }
  
         cvImageRight = cv::imread(strRightFileName);
         if (cvImageRight.empty())
         {
             throw std::runtime_error("Can't open '" + strRightFileName + "'");
         }
  
         // =================================
         // Allocate all VPI resources needed
  
         int32_t inputWidth  = cvImageLeft.cols;
         int32_t inputHeight = cvImageLeft.rows;
  
         // Create the stream that will be used for processing.
         CHECK_STATUS(vpiStreamCreate(0, &stream));
  
         // We now wrap the loaded images into a VPIImage object to be used by VPI.
         // VPI won't make a copy of it, so the original image must be in scope at all times.
         CHECK_STATUS(vpiImageCreateOpenCVMatWrapper(cvImageLeft, 0, &inLeft));
         CHECK_STATUS(vpiImageCreateOpenCVMatWrapper(cvImageRight, 0, &inRight));
  
         // Format conversion parameters needed for input pre-processing
         VPIConvertImageFormatParams convParams;
         CHECK_STATUS(vpiInitConvertImageFormatParams(&convParams));
  
         // Set algorithm parameters to be used. Only values what differs from defaults will be overwritten.
         VPIStereoDisparityEstimatorCreationParams stereoParams;
         CHECK_STATUS(vpiInitStereoDisparityEstimatorCreationParams(&stereoParams));
  
         // Define some backend-dependent parameters
  
         VPIImageFormat stereoFormat;
         int stereoWidth, stereoHeight;
         if (strBackend == "pva-nvenc-vic")
         {
             stereoFormat = VPI_IMAGE_FORMAT_Y16_ER_BL;
  
             // Input width and height has to be 1920x1080 in block-linear format for pva-nvenc-vic pipeline
             stereoWidth  = 1920;
             stereoHeight = 1080;
  
             // For PVA+NVENC+VIC mode, 16bpp input must be MSB-aligned, which
             // is equivalent to say that it is Q8.8 (fixed-point, 8 decimals).
             convParams.scale = 256;
  
             // Maximum disparity is fixed to 256.
             stereoParams.maxDisparity = 256;
         }
         else
         {
             stereoFormat = VPI_IMAGE_FORMAT_Y16_ER;
  
             if (strBackend == "pva")
             {
                 stereoWidth  = 480;
                 stereoHeight = 270;
             }
             else
             {
                 stereoWidth  = inputWidth;
                 stereoHeight = inputHeight;
             }
  
             stereoParams.maxDisparity = 64;
         }
  
         // Create the payload for Stereo Disparity algorithm.
         // Payload is created before the image objects so that non-supported backends can be trapped with an error.
         CHECK_STATUS(vpiCreateStereoDisparityEstimator(backends, stereoWidth, stereoHeight, stereoFormat, &stereoParams,
                                                        &stereo));
  
         // Create the image where the disparity map will be stored.
         CHECK_STATUS(vpiImageCreate(stereoWidth, stereoHeight, VPI_IMAGE_FORMAT_U16, 0, &disparity));
  
         if (strBackend == "pva-nvenc-vic")
         {
             // Need an temporary image to convert BGR8 input from OpenCV into pixel-linear 16bpp grayscale.
             // We can't convert it directly to block-linear since CUDA backend doesn't support it, and
             // VIC backend doesn't support BGR8 inputs.
             CHECK_STATUS(vpiImageCreate(inputWidth, inputHeight, VPI_IMAGE_FORMAT_Y16_ER, 0, &tmpLeft));
             CHECK_STATUS(vpiImageCreate(inputWidth, inputHeight, VPI_IMAGE_FORMAT_Y16_ER, 0, &tmpRight));
  
             // Input to pva-nvenc-vic stereo disparity must be block linear
             CHECK_STATUS(vpiImageCreate(stereoWidth, stereoHeight, stereoFormat, 0, &stereoLeft));
             CHECK_STATUS(vpiImageCreate(stereoWidth, stereoHeight, stereoFormat, 0, &stereoRight));
  
             // confidence map is needed for pva-nvenc-vic pipeline
             CHECK_STATUS(vpiImageCreate(stereoWidth, stereoHeight, VPI_IMAGE_FORMAT_U16, 0, &confidenceMap));
         }
         else
         {
             // PVA requires that input resolution is 480x270
             if (strBackend == "pva")
             {
                 CHECK_STATUS(vpiImageCreate(inputWidth, inputHeight, stereoFormat, 0, &tmpLeft));
                 CHECK_STATUS(vpiImageCreate(inputWidth, inputHeight, stereoFormat, 0, &tmpRight));
             }
             else if (strBackend == "cuda")
             {
                 CHECK_STATUS(vpiImageCreate(inputWidth, inputHeight, VPI_IMAGE_FORMAT_U16, 0, &confidenceMap));
             }
  
             // Allocate input to stereo disparity algorithm, pitch-linear 16bpp grayscale
             CHECK_STATUS(vpiImageCreate(stereoWidth, stereoHeight, stereoFormat, 0, &stereoLeft));
             CHECK_STATUS(vpiImageCreate(stereoWidth, stereoHeight, stereoFormat, 0, &stereoRight));
         }
  
         // ================
         // Processing stage
  
         // -----------------
         // Pre-process input
         if (strBackend == "pva-nvenc-vic" || strBackend == "pva")
         {
             // Convert opencv input to temporary grayscale format using CUDA
             CHECK_STATUS(vpiSubmitConvertImageFormat(stream, VPI_BACKEND_CUDA, inLeft, tmpLeft, &convParams));
             CHECK_STATUS(vpiSubmitConvertImageFormat(stream, VPI_BACKEND_CUDA, inRight, tmpRight, &convParams));
  
             // Do both scale and final image format conversion on VIC.
             CHECK_STATUS(
                 vpiSubmitRescale(stream, VPI_BACKEND_VIC, tmpLeft, stereoLeft, VPI_INTERP_LINEAR, VPI_BORDER_CLAMP, 0));
             CHECK_STATUS(vpiSubmitRescale(stream, VPI_BACKEND_VIC, tmpRight, stereoRight, VPI_INTERP_LINEAR,
                                           VPI_BORDER_CLAMP, 0));
         }
         else
         {
             // Convert opencv input to grayscale format using CUDA
             CHECK_STATUS(vpiSubmitConvertImageFormat(stream, VPI_BACKEND_CUDA, inLeft, stereoLeft, &convParams));
             CHECK_STATUS(vpiSubmitConvertImageFormat(stream, VPI_BACKEND_CUDA, inRight, stereoRight, &convParams));
         }
  
         // ------------------------------
         // Do stereo disparity estimation
  
         // Submit it with the input and output images
         CHECK_STATUS(vpiSubmitStereoDisparityEstimator(stream, backends, stereo, stereoLeft, stereoRight, disparity,
                                                        confidenceMap, NULL));
  
         // Wait until the algorithm finishes processing
         CHECK_STATUS(vpiStreamSync(stream));
  
         // ========================================
         // Output pre-processing and saving to disk
         // Lock output to retrieve its data on cpu memory
         VPIImageData data;
         CHECK_STATUS(vpiImageLock(disparity, VPI_LOCK_READ, &data));
  
         // Make an OpenCV matrix out of this image
         cv::Mat cvDisparity;
         CHECK_STATUS(vpiImageDataExportOpenCVMat(data, &cvDisparity));
  
         // Scale result and write it to disk. Disparities are in Q10.5 format,
         // so to map it to float, it gets divided by 32. Then the resulting disparity range,
         // from 0 to stereo.maxDisparity gets mapped to 0-255 for proper output.
         cvDisparity.convertTo(cvDisparity, CV_8UC1, 255.0 / (32 * stereoParams.maxDisparity), 0);
  
         // Apply JET colormap to turn the disparities into color, reddish hues
         // represent objects closer to the camera, blueish are farther away.
         cv::Mat cvDisparityColor;
         applyColorMap(cvDisparity, cvDisparityColor, cv::COLORMAP_JET);
  
         // Done handling output, don't forget to unlock it.
         CHECK_STATUS(vpiImageUnlock(disparity));
  
         // If we have a confidence map,
         if (confidenceMap)
         {
             // Write it to disk too.
             //
             VPIImageData data;
             CHECK_STATUS(vpiImageLock(confidenceMap, VPI_LOCK_READ, &data));
  
             cv::Mat cvConfidence;
             CHECK_STATUS(vpiImageDataExportOpenCVMat(data, &cvConfidence));
  
             // Confidence map varies from 0 to 65535, we scale it to
             // [0-255].
             cvConfidence.convertTo(cvConfidence, CV_8UC1, 255.0 / 65535, 0);
             imwrite("confidence_" + strBackend + ".png", cvConfidence);
  
             CHECK_STATUS(vpiImageUnlock(confidenceMap));
  
             // When pixel confidence is 0, its color in the disparity
             // output is black.
             cv::Mat cvMask;
             threshold(cvConfidence, cvMask, 1, 255, cv::THRESH_BINARY);
             cvtColor(cvMask, cvMask, cv::COLOR_GRAY2BGR);
             bitwise_and(cvDisparityColor, cvMask, cvDisparityColor);
         }
  
         imwrite("disparity_" + strBackend + ".png", cvDisparityColor);
     }
     catch (std::exception &e)
     {
         std::cerr << e.what() << std::endl;
         retval = 1;
     }
  
     // ========
     // Clean up
  
     // Destroying stream first makes sure that all work submitted to
     // it is finished.
     vpiStreamDestroy(stream);
  
     // Only then we can destroy the other objects, as we're sure they
     // aren't being used anymore.
  
     vpiImageDestroy(inLeft);
     vpiImageDestroy(inRight);
     vpiImageDestroy(tmpLeft);
     vpiImageDestroy(tmpRight);
     vpiImageDestroy(stereoLeft);
     vpiImageDestroy(stereoRight);
     vpiImageDestroy(confidenceMap);
     vpiImageDestroy(disparity);
     vpiPayloadDestroy(stereo);
  
     return retval;
 }
  
 // vim: ts=8:sw=4:sts=4:et:ai

VPI - Vision Programming Interface

1.2 Release

Overview

Instructions

Results

Source Code