Overview

This application fetches frames from input video source, runs the algorithms on the previous and current images, and then calculate the motion vectors for every 4x4 pixel block. The output motion vectors will be mapped to the HSV colorspace, where hue relates to motion angle, value relates to motion speed, and the result will be saved to a video file.

Instructions

The command line parameters are:

where

backend: Defines the backend that will perform the processing. Only OFA backend supported. ofa is only supported on Jetson AGX Orin.
input video: Input video file name, it accepts .mp4, .avi and possibly others, depending on OpenCV's support.
quality: Specify the quality that the algorithm will use. Available options are: low (fastest), medium (balanced perf and quality) and high (slowest).
gridsize: size of the regular grid over the image, each cell will result in one motion vector. Use 1 for dense grid.
numlevels: number of pyramid levels used.

Here's one example for Jetson AGX Orin.

C++
./vpi_sample_13_optflow_dense ofa ../assets/pedestrians.mp4 high 1 5
Python
python3 main.py ofa ../assets/pedestrians.mp4 high 2

The application will process pedestrians.mp4 and create denseoptflow_mv_ofa.mp4.

Results

Input video	Motion vector video

Source Code

For convenience, here's the code that is also installed in the samples directory.

Language: C++ Python

 import sys
 import vpi
 import numpy as np
 from os import path
 from argparse import ArgumentParser
 from contextlib import contextmanager
 import cv2
  
  
 # ----------------------------
 # Some utility functions
  
 def process_motion_vectors(mv):
     with mv.rlock_cpu() as data:
         # convert S10.5 format to float
         flow = np.float32(data)/(1<<5)
  
     # Create an image where the motion vector angle is
     # mapped to a color hue, and intensity is proportional
     # to vector's magnitude
     magnitude, angle = cv2.cartToPolar(flow[:,:,0], flow[:,:,1], angleInDegrees=True)
  
     clip = 5.0
     cv2.threshold(magnitude, clip, clip, cv2.THRESH_TRUNC, magnitude)
  
     # build the hsv image
     hsv = np.ndarray([flow.shape[0], flow.shape[1], 3], np.float32)
     hsv[:,:,0] = angle
     hsv[:,:,1] = np.ones((angle.shape[0], angle.shape[1]), np.float32)
     hsv[:,:,2] = magnitude / clip
  
     # Convert HSV to BGR8
     bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
     return np.uint8(bgr*255)
  
 # ----------------------------
 # Parse command line arguments
  
 parser = ArgumentParser()
 parser.add_argument('backend', choices=['ofa'],
                     help='Backend to be used for processing')
  
 parser.add_argument('input',
                     help='Input video to be processed')
  
 parser.add_argument('quality', choices=['low', 'medium', 'high'],
                     help='Quality setting')
  
 parser.add_argument('gridSize', type=int, choices=[1,2,4,8],
                     help='Grid size')
  
 parser.add_argument('numLevels', type=int, choices=[1,2,3,4,5],
                     help='Number of pyramid levels')
  
 args = parser.parse_args();
  
 assert args.backend == 'ofa'
 if args.backend == 'ofa':
     backend = vpi.Backend.OFA
  
 if args.quality == "low":
     quality = vpi.OptFlowQuality.LOW
 elif args.quality == "medium":
     quality = vpi.OptFlowQuality.MEDIUM
 else:
     assert args.quality == "high"
     quality = vpi.OptFlowQuality.HIGH
  
 # -----------------------------
 # Open input and output videos
  
 inVideo = cv2.VideoCapture(args.input)
  
 fourcc = cv2.VideoWriter_fourcc(*'MPEG')
 inSize = (int(inVideo.get(cv2.CAP_PROP_FRAME_WIDTH)), int(inVideo.get(cv2.CAP_PROP_FRAME_HEIGHT)))
 fps = inVideo.get(cv2.CAP_PROP_FPS)
  
 # Calculate the output dimensions based on the input's and the chosen grid size
 outSize = ((inSize[0] + args.gridSize-1)//args.gridSize, (inSize[1]+args.gridSize-1)//args.gridSize)
  
 outVideo = cv2.VideoWriter('denseoptflow_mv_python'+str(sys.version_info[0])+'_'+args.backend+'.mp4',
                             fourcc, fps, outSize)
  
 #---------------------------------
 # Main processing loop
  
 prevFrame = None
  
 idFrame = 0
 while True:
     # Read one input frame
     ret, cvFrame = inVideo.read()
     if not ret:
         break
  
     # Convert it to Y8_ER_BL pyramid format to be used by VPI
     # No single backend can convert from OpenCV's BGR8 to Y8_ER_BL
     # required by the algorithm. We must do in two steps using CUDA and VIC.
     curFrame = vpi.asimage(cvFrame, vpi.Format.BGR8) \
                 .convert(vpi.Format.Y8_ER, backend=vpi.Backend.CUDA) \
                 .gaussian_pyramid(args.numLevels, backend=vpi.Backend.CUDA) \
                 .convert(vpi.Format.Y8_ER_BL, backend=vpi.Backend.VIC)
  
     # Need at least 2 frames to start processing
     if prevFrame is not None:
         print("Processing frame {}".format(idFrame))
  
         # Calculate the motion vectors from previous to current frame
         with backend:
             motion_vectors = vpi.optflow_dense(prevFrame, curFrame, quality = quality, gridsize = args.gridSize)
  
         # Turn motion vectors into an image
         motion_image = process_motion_vectors(motion_vectors)
  
         # Save it to output video
         outVideo.write(motion_image)
  
     # Prepare next iteration
     prevFrame = curFrame
     idFrame += 1

 #include <opencv2/core/version.hpp>
 #include <opencv2/imgcodecs.hpp>
 #include <opencv2/imgproc/imgproc.hpp>
 #include <opencv2/videoio.hpp>
 #include <vpi/OpenCVInterop.hpp>
  
 #include <vpi/Array.h>
 #include <vpi/Image.h>
 #include <vpi/ImageFormat.h>
 #include <vpi/Pyramid.h>
 #include <vpi/Status.h>
 #include <vpi/Stream.h>
 #include <vpi/algo/ConvertImageFormat.h>
 #include <vpi/algo/GaussianPyramid.h>
 #include <vpi/algo/OpticalFlowDense.h>
  
 #include <iostream>
 #include <sstream>
  
 #define CHECK_STATUS(STMT)                                    \
     do                                                        \
     {                                                         \
         VPIStatus status = (STMT);                            \
         if (status != VPI_SUCCESS)                            \
         {                                                     \
             char buffer[VPI_MAX_STATUS_MESSAGE_LENGTH];       \
             vpiGetLastStatusMessage(buffer, sizeof(buffer));  \
             std::ostringstream ss;                            \
             ss << "line " << __LINE__ << ": ";                \
             ss << vpiStatusGetName(status) << ": " << buffer; \
             throw std::runtime_error(ss.str());               \
         }                                                     \
     } while (0);
  
 static void ProcessMotionVector(VPIImage mvImg, cv::Mat &outputImage)
 {
     // Lock the input image to access it from CPU
     VPIImageData mvData;
     CHECK_STATUS(vpiImageLockData(mvImg, VPI_LOCK_READ, VPI_IMAGE_BUFFER_HOST_PITCH_LINEAR, &mvData));
  
     // Create a cv::Mat that points to the input image data
     cv::Mat mvImage;
     CHECK_STATUS(vpiImageDataExportOpenCVMat(mvData, &mvImage));
  
     // Convert S10.5 format to float
     cv::Mat flow(mvImage.size(), CV_32FC2);
     mvImage.convertTo(flow, CV_32F, 1.0f / (1 << 5));
  
     // Image not needed anymore, we can unlock it.
     CHECK_STATUS(vpiImageUnlock(mvImg));
  
     // Create an image where the motion vector angle is
     // mapped to a color hue, and intensity is proportional
     // to vector's magnitude.
     cv::Mat magnitude, angle;
     {
         cv::Mat flowChannels[2];
         split(flow, flowChannels);
         cv::cartToPolar(flowChannels[0], flowChannels[1], magnitude, angle, true);
     }
  
     float clip = 5;
     cv::threshold(magnitude, magnitude, clip, clip, cv::THRESH_TRUNC);
  
     // build hsv image
     cv::Mat _hsv[3], hsv, bgr;
     _hsv[0] = angle;
     _hsv[1] = cv::Mat::ones(angle.size(), CV_32F);
     _hsv[2] = magnitude / clip; // intensity must vary from 0 to 1
     merge(_hsv, 3, hsv);
  
     cv::cvtColor(hsv, bgr, cv::COLOR_HSV2BGR);
     bgr.convertTo(outputImage, CV_8U, 255.0);
 }
  
 int main(int argc, char *argv[])
 {
     // OpenCV image that will be wrapped by a VPIImage.
     // Define it here so that it's destroyed *after* wrapper is destroyed
     cv::Mat cvPrevFrame, cvCurFrame;
  
     // VPI objects that will be used
     VPIStream stream         = NULL;
     VPIImage imgPrevFramePL  = NULL;
     VPIImage imgPrevFrameTmp = NULL;
     VPIImage imgCurFramePL   = NULL;
     VPIImage imgCurFrameTmp  = NULL;
     VPIImage imgMotionVecBL  = NULL;
     VPIImage imgMotionVecPL  = NULL;
  
     VPIPyramid prevPyrTmp = NULL;
     VPIPyramid prevPyrBL  = NULL;
     VPIPyramid curPyrTmp  = NULL;
     VPIPyramid curPyrBL   = NULL;
  
     VPIPayload payload = NULL;
  
     int retval = 0;
  
     try
     {
         if (argc != 6)
         {
             throw std::runtime_error(std::string("Usage: ") + argv[0] +
                                      " <ofa> <input_video> <low|medium|high> <gridsize> <numlevels>");
         }
  
         // Parse input parameters
         std::string strBackend    = argv[1];
         std::string strInputVideo = argv[2];
         std::string strQuality    = argv[3];
         std::string strGridSize   = argv[4];
         std::string strNumLevels  = argv[5];
  
         VPIOpticalFlowQuality quality;
         if (strQuality == "low")
         {
             quality = VPI_OPTICAL_FLOW_QUALITY_LOW;
         }
         else if (strQuality == "medium")
         {
             quality = VPI_OPTICAL_FLOW_QUALITY_MEDIUM;
         }
         else if (strQuality == "high")
         {
             quality = VPI_OPTICAL_FLOW_QUALITY_HIGH;
         }
         else
         {
             throw std::runtime_error("Unknown quality provided");
         }
  
         VPIBackend backend;
         if (strBackend == "ofa")
         {
             backend = VPI_BACKEND_OFA;
         }
         else
         {
             throw std::runtime_error("Backend '" + strBackend + "' not recognized, it must be ofa.");
         }
  
         char *endptr;
         int gridSize = strtol(strGridSize.c_str(), &endptr, 10);
         if (*endptr != '\0')
         {
             throw std::runtime_error("Syntax error parsing gridsize " + strGridSize);
         }
  
         int numLevels = strtol(strNumLevels.c_str(), &endptr, 10);
         if (*endptr != '\0')
         {
             throw std::runtime_error("Syntax error parsing numlevels " + strNumLevels);
         }
  
         // Load the input video
         cv::VideoCapture invid;
         if (!invid.open(strInputVideo))
         {
             throw std::runtime_error("Can't open '" + strInputVideo + "'");
         }
  
         // Create the stream where processing will happen. We'll use user-provided backend
         // for Optical Flow, and CUDA/VIC for image format conversions.
         CHECK_STATUS(vpiStreamCreate(backend | VPI_BACKEND_CUDA | VPI_BACKEND_VIC, &stream));
  
         // Fetch the first frame
         if (!invid.read(cvPrevFrame))
         {
             throw std::runtime_error("Cannot read frame from input video");
         }
  
         // Create the previous and current frame wrapper using the first frame. This wrapper will
         // be set to point to every new frame in the main loop.
         CHECK_STATUS(vpiImageCreateWrapperOpenCVMat(cvPrevFrame, 0, &imgPrevFramePL));
         CHECK_STATUS(vpiImageCreateWrapperOpenCVMat(cvPrevFrame, 0, &imgCurFramePL));
  
         // Define the image formats we'll use throughout this sample.
         VPIImageFormat imgFmt   = VPI_IMAGE_FORMAT_Y8_ER;
         VPIImageFormat imgFmtBL = VPI_IMAGE_FORMAT_Y8_ER_BL;
  
         int32_t width  = cvPrevFrame.cols;
         int32_t height = cvPrevFrame.rows;
  
         // Create Dense Optical Flow payload to be executed on the given backend
         std::vector<int32_t> pyrGridSize(numLevels, gridSize); // all levels will have the same grid size
         CHECK_STATUS(vpiCreateOpticalFlowDense(backend, width, height, imgFmtBL, &pyrGridSize[0], pyrGridSize.size(),
                                                quality, &payload));
  
         // The Dense Optical Flow on NVENC or OFA backends expects input to be in block-linear format.
         // Since Convert Image Format algorithm doesn't currently support direct BGR
         // pitch-linear (from OpenCV) to Y8 block-linear conversion, it must be done in two
         // passes, first from BGR/PL to Y8/PL using CUDA, then from Y8/PL to Y8/BL using VIC.
         // The temporary image buffer below will store the intermediate Y8/PL representation.
         CHECK_STATUS(vpiImageCreate(width, height, imgFmt, 0, &imgPrevFrameTmp));
         CHECK_STATUS(vpiImageCreate(width, height, imgFmt, 0, &imgCurFrameTmp));
  
         // Now create the final block-linear buffer that'll be used as input to the
         // algorithm.
  
         CHECK_STATUS(vpiPyramidCreate(width, height, imgFmt, pyrGridSize.size(), 0.5, 0, &prevPyrTmp));
         CHECK_STATUS(vpiPyramidCreate(width, height, imgFmt, pyrGridSize.size(), 0.5, 0, &curPyrTmp));
  
         CHECK_STATUS(vpiPyramidCreate(width, height, imgFmtBL, pyrGridSize.size(), 0.5, 0, &prevPyrBL));
         CHECK_STATUS(vpiPyramidCreate(width, height, imgFmtBL, pyrGridSize.size(), 0.5, 0, &curPyrBL));
  
         // Motion vector image width and height, align to be multiple of gridSize
         int32_t mvWidth  = (width + gridSize - 1) / gridSize;
         int32_t mvHeight = (height + gridSize - 1) / gridSize;
  
         // The output video will be heatmap of motion vector image
         int fourcc = cv::VideoWriter::fourcc('M', 'P', 'E', 'G');
         double fps = invid.get(cv::CAP_PROP_FPS);
  
         cv::VideoWriter outVideo("denseoptflow_mv_" + strBackend + ".mp4", fourcc, fps, cv::Size(mvWidth, mvHeight));
         if (!outVideo.isOpened())
         {
             throw std::runtime_error("Can't create output video");
         }
  
         // Create the output motion vector buffers
         CHECK_STATUS(vpiImageCreate(mvWidth, mvHeight, VPI_IMAGE_FORMAT_2S16_BL, 0, &imgMotionVecBL));
         CHECK_STATUS(vpiImageCreate(mvWidth, mvHeight, VPI_IMAGE_FORMAT_2S16, 0, &imgMotionVecPL));
  
         // First convert the first frame to Y8_BL pyramid. It'll be used as previous frame when the algorithm is called.
         CHECK_STATUS(vpiSubmitConvertImageFormat(stream, VPI_BACKEND_CUDA, imgPrevFramePL, imgPrevFrameTmp, nullptr));
         CHECK_STATUS(
             vpiSubmitGaussianPyramidGenerator(stream, VPI_BACKEND_CUDA, imgPrevFrameTmp, prevPyrTmp, VPI_BORDER_CLAMP));
         CHECK_STATUS(vpiSubmitConvertImageFormatPyramid(stream, VPI_BACKEND_VIC, prevPyrTmp, prevPyrBL, NULL));
  
         // Create a output image which holds the rendered motion vector image.
         cv::Mat mvOutputImage;
  
         // Fetch a new frame until video ends
         int idxFrame = 1;
         while (invid.read(cvCurFrame))
         {
             printf("Processing frame %d\n", idxFrame++);
             // Wrap frame into a VPIImage, reusing the existing imgCurFramePL.
             CHECK_STATUS(vpiImageSetWrappedOpenCVMat(imgCurFramePL, cvCurFrame));
  
             // Convert current frame to Y8_BL pyramid format
             CHECK_STATUS(vpiSubmitConvertImageFormat(stream, VPI_BACKEND_CUDA, imgCurFramePL, imgCurFrameTmp, nullptr));
             CHECK_STATUS(vpiSubmitGaussianPyramidGenerator(stream, VPI_BACKEND_CUDA, imgCurFrameTmp, curPyrTmp,
                                                            VPI_BORDER_CLAMP));
             CHECK_STATUS(vpiSubmitConvertImageFormatPyramid(stream, VPI_BACKEND_VIC, curPyrTmp, curPyrBL, NULL));
  
             CHECK_STATUS(
                 vpiSubmitOpticalFlowDensePyramid(stream, backend, payload, prevPyrBL, curPyrBL, imgMotionVecBL));
  
             // Convert output in BL to PL format.
             CHECK_STATUS(vpiSubmitConvertImageFormat(stream, VPI_BACKEND_VIC, imgMotionVecBL, imgMotionVecPL, NULL));
  
             // Wait for processing to finish.
             CHECK_STATUS(vpiStreamSync(stream));
  
             // Render the resulting motion vector in the output image
             ProcessMotionVector(imgMotionVecPL, mvOutputImage);
  
             // Save to output video
             outVideo << mvOutputImage;
  
             // Swap previous frame and next frame
             std::swap(cvPrevFrame, cvCurFrame);
             std::swap(imgPrevFramePL, imgCurFramePL);
             std::swap(prevPyrBL, curPyrBL);
         }
     }
     catch (std::exception &e)
     {
         std::cerr << e.what() << std::endl;
         retval = 1;
     }
  
     // Destroy all resources used
     vpiStreamDestroy(stream);
     vpiPayloadDestroy(payload);
  
     vpiImageDestroy(imgPrevFramePL);
     vpiImageDestroy(imgPrevFrameTmp);
     vpiImageDestroy(imgCurFramePL);
     vpiImageDestroy(imgCurFrameTmp);
     vpiImageDestroy(imgMotionVecBL);
     vpiImageDestroy(imgMotionVecPL);
  
     vpiPyramidDestroy(prevPyrTmp);
     vpiPyramidDestroy(prevPyrBL);
     vpiPyramidDestroy(curPyrTmp);
     vpiPyramidDestroy(curPyrBL);
  
     return retval;
 }

VPI - Vision Programming Interface

3.2 Release

Overview

Instructions

Results

Source Code