Overview

This application tracks feature points on an input video, draws the features on each frame and saves them to disk. The user can define what backend will be used for processing.

Note: The output will be in grayscale as the algorithm currently doesn't support color inputs.

Instructions

The command line parameters are:

where

backend: either cpu or cuda; it defines the backend that will perform the processing.
input video: input video file name, it accepts all video types that OpenCV's cv::VideoCapture accepts.
pyramid levels: specify the number of pyramid that used in the algorithm.
output frames: the file name that will be used for the output frames. Example: output.png will generate frames output_0000.png, output_0001.png, output_0002.png, and so on.

Here's one example:

C++
./vpi_sample_12_optflow_lk cuda ../assets/dashcam.mp4 5 frame.png
Python
python3 main.py cuda ../assets/dashcam.mp4 5 frame.png

This is using the CUDA backend and one of the provided sample videos with pyramid level equals 5.

Results

Frame 0009

Source Code

For convenience, here's the code that is also installed in the samples directory.

Language: C++ Python

 import sys
 import vpi
 import numpy as np
 from os import path
 from argparse import ArgumentParser
 from contextlib import contextmanager
 import cv2
  
  
 # --------------------------------------
 # Some definitions and utility functions
  
 # Maximum number of keypoints that will be tracked
 MAX_KEYPOINTS = 100
  
 def update_mask(mask, trackColors, prevFeatures, curFeatures, status = None):
     '''Draw keypoint path from previous frame to current one'''
  
     numTrackedKeypoints = 0
  
     def none_context(a=None): return contextmanager(lambda: (x for x in [a]))()
  
     with curFeatures.rlock_cpu(), \
          (status.rlock_cpu() if status else none_context()), \
          (prevFeatures.rlock_cpu() if prevFeatures else none_context()):
  
         for i in range(curFeatures.size):
             # keypoint is being tracked?
             if not status or status.cpu()[i] == 0:
                 color = tuple(trackColors[i,0].tolist())
  
                 # OpenCV 4.5+ wants integers in the tuple arguments below
                 cf = tuple(np.round(curFeatures.cpu()[i]).astype(int))
  
                 # draw the tracks
                 if prevFeatures:
                     pf = tuple(np.round(prevFeatures.cpu()[i]).astype(int))
                     cv2.line(mask, pf, cf, color, 2)
  
                 cv2.circle(mask, cf, 5, color, -1)
  
                 numTrackedKeypoints += 1
  
     return numTrackedKeypoints
  
 def save_file_to_disk(frame, mask, baseFileName, frameCounter):
     '''Apply mask on frame and save it to disk'''
  
     frame = frame.convert(vpi.Format.BGR8, backend=vpi.Backend.CUDA)
     with frame.rlock_cpu() as frameData:
         frame = cv2.add(frameData, mask)
  
     name, ext = path.splitext(baseFileName)
     fname = "{}_{:04d}{}".format(name, frameCounter, ext)
  
     cv2.imwrite(fname, frame, [cv2.IMWRITE_JPEG_QUALITY, 70])
  
 # ----------------------------
 # Parse command line arguments
  
 parser = ArgumentParser()
 parser.add_argument('backend', choices=['cpu', 'cuda'],
                     help='Backend to be used for processing')
  
 parser.add_argument('input',
                     help='Input video to be processed')
  
 parser.add_argument('pyramid_levels', type=int,
                     help='Number of levels in the pyramid used with the algorithm')
  
 parser.add_argument('output',
                     help='Output file name')
  
 args = parser.parse_args();
  
 if args.backend == 'cuda':
     backend = vpi.Backend.CUDA
 else:
     assert args.backend == 'cpu'
     backend = vpi.Backend.CPU
  
 # adjust output file name to take into account backend used and python version
 name, ext = path.splitext(args.output)
 args.output = "{}_python{}_{}{}".format(name, sys.version_info[0], args.backend, ext)
  
 # ----------------
 # Open input video
  
 inVideo = cv2.VideoCapture(args.input)
  
 # Read first input frame
 ok, cvFrame = inVideo.read()
 if not ok:
     exit('Cannot read first input frame')
  
 # ---------------------------
 # Perform some pre-processing
  
 # Retrieve features to be tracked from first frame using
 # Harris Corners Detector
 with vpi.Backend.CPU:
     frame = vpi.asimage(cvFrame, vpi.Format.BGR8).convert(vpi.Format.U8)
     curFeatures, scores = frame.harriscorners(strength=0.1, sensitivity=0.01)
  
 # Limit the number of features we'll track and calculate their colors on the
 # output image
 with curFeatures.lock_cpu() as featData, scores.rlock_cpu() as scoresData:
     # Sort features in descending scores order and keep the first MAX_KEYPOINTS
     ind = np.argsort(scoresData, kind='mergesort')[::-1]
     featData[:] = np.take(featData, ind, axis=0)
     curFeatures.size = min(curFeatures.size, MAX_KEYPOINTS)
  
     # Keypoints' have different hues, calculated from their position in the first frame
     trackColors = np.array([[(int(p[0]) ^ int(p[1])) % 180,255,255] for p in featData], np.uint8).reshape(-1,1,3)
     # Convert colors from HSV to RGB
     trackColors = cv2.cvtColor(trackColors, cv2.COLOR_HSV2BGR).astype(int)
  
 with backend:
     optflow = vpi.OpticalFlowPyrLK(frame, curFeatures, args.pyramid_levels)
  
 # Counter for the frames
 idFrame = 0
  
 # Create mask with features' tracks over time
 mask = np.zeros((frame.height, frame.width, 3), np.uint8)
 numTrackedKeypoints = update_mask(mask, trackColors, None, curFeatures)
  
 while True:
     # Apply mask to frame and save it to disk
     save_file_to_disk(frame, mask, args.output, idFrame)
  
     print("Frame id={}: {} points tracked.".format(idFrame, numTrackedKeypoints))
  
     prevFeatures = curFeatures
  
     # Read one input frame
     ret, cvFrame = inVideo.read()
     if not ret:
         print("Video ended.")
         break
     idFrame += 1
  
     # Convert frame to grayscale
     with vpi.Backend.CUDA:
         frame = vpi.asimage(cvFrame, vpi.Format.BGR8).convert(vpi.Format.U8);
  
     # Calculate where keypoints are in current frame
     curFeatures, status = optflow(frame)
  
     # Update the mask with the current keypoints' position
     numTrackedKeypoints = update_mask(mask, trackColors, prevFeatures, curFeatures, status)
  
     # No more keypoints to track?
     if numTrackedKeypoints == 0:
         print("No keypoints to track.")
         break # nothing else to do

 #include <opencv2/core/version.hpp>
 #if CV_MAJOR_VERSION >= 3
 #    include <opencv2/imgcodecs.hpp>
 #    include <opencv2/videoio.hpp>
 #else
 #    include <opencv2/highgui/highgui.hpp>
 #endif
  
 #include <opencv2/imgproc/imgproc.hpp>
 #include <vpi/OpenCVInterop.hpp>
  
 #include <vpi/Array.h>
 #include <vpi/Image.h>
 #include <vpi/Pyramid.h>
 #include <vpi/Status.h>
 #include <vpi/Stream.h>
 #include <vpi/algo/ConvertImageFormat.h>
 #include <vpi/algo/GaussianPyramid.h>
 #include <vpi/algo/HarrisCorners.h>
 #include <vpi/algo/OpticalFlowPyrLK.h>
  
 #include <algorithm>
 #include <cstring> // for memset
 #include <fstream>
 #include <iostream>
 #include <map>
 #include <numeric>
 #include <sstream>
 #include <vector>
  
 // Max number of corners detected by harris corner algo
 constexpr int MAX_HARRIS_CORNERS = 8192;
  
 // Max number of keypoints to be tracked
 constexpr int MAX_KEYPOINTS = 100;
  
 #define CHECK_STATUS(STMT)                                      \
     do                                                          \
     {                                                           \
         VPIStatus status__ = (STMT);                            \
         if (status__ != VPI_SUCCESS)                            \
         {                                                       \
             char buffer[VPI_MAX_STATUS_MESSAGE_LENGTH];         \
             vpiGetLastStatusMessage(buffer, sizeof(buffer));    \
             std::ostringstream ss;                              \
             ss << vpiStatusGetName(status__) << ": " << buffer; \
             throw std::runtime_error(ss.str());                 \
         }                                                       \
     } while (0);
  
 static void SaveFileToDisk(VPIImage img, cv::Mat cvMask, std::string baseFileName, int32_t frameCounter)
 {
     VPIImageData imgData;
     CHECK_STATUS(vpiImageLockData(img, VPI_LOCK_READ, VPI_IMAGE_BUFFER_HOST_PITCH_LINEAR, &imgData));
  
     cv::Mat cvImage;
     try
     {
         cv::Mat tmp;
         CHECK_STATUS(vpiImageDataExportOpenCVMat(imgData, &tmp));
         cvtColor(tmp, cvImage, cv::COLOR_GRAY2BGR);
  
         CHECK_STATUS(vpiImageUnlock(img));
     }
     catch (...)
     {
         CHECK_STATUS(vpiImageUnlock(img));
         throw;
     }
  
     add(cvImage, cvMask, cvImage);
  
     // Create the output file name
     std::string fname = baseFileName;
     int ext           = fname.rfind('.');
  
     char buffer[512] = {};
     snprintf(buffer, sizeof(buffer) - 1, "%s_%04d%s", fname.substr(0, ext).c_str(), frameCounter,
              fname.substr(ext).c_str());
  
     // Finally, write frame to disk
     if (!imwrite(buffer, cvImage, {cv::IMWRITE_JPEG_QUALITY, 70}))
     {
         throw std::runtime_error("Can't write to " + std::string(buffer));
     }
 }
  
 // Sort keypoints by decreasing score, and retain only the first 'max'
 static void SortKeypoints(VPIArray keypoints, VPIArray scores, int max)
 {
     VPIArrayData ptsData, scoresData;
     CHECK_STATUS(vpiArrayLockData(keypoints, VPI_LOCK_READ_WRITE, VPI_ARRAY_BUFFER_HOST_AOS, &ptsData));
     CHECK_STATUS(vpiArrayLockData(scores, VPI_LOCK_READ_WRITE, VPI_ARRAY_BUFFER_HOST_AOS, &scoresData));
  
     VPIArrayBufferAOS &aosKeypoints = ptsData.buffer.aos;
     VPIArrayBufferAOS &aosScores    = scoresData.buffer.aos;
  
     std::vector<int> indices(*aosKeypoints.sizePointer);
     std::iota(indices.begin(), indices.end(), 0);
  
     stable_sort(indices.begin(), indices.end(), [&aosScores](int a, int b) {
         uint32_t *score = reinterpret_cast<uint32_t *>(aosScores.data);
         return score[a] >= score[b]; // decreasing score order
     });
  
     // keep the only 'max' indexes.
     indices.resize(std::min<size_t>(indices.size(), max));
  
     VPIKeypointF32 *kptData = reinterpret_cast<VPIKeypointF32 *>(aosKeypoints.data);
  
     // reorder the keypoints to keep the first 'max' with highest scores.
     std::vector<VPIKeypointF32> kpt;
     std::transform(indices.begin(), indices.end(), std::back_inserter(kpt),
                    [kptData](int idx) { return kptData[idx]; });
     std::copy(kpt.begin(), kpt.end(), kptData);
  
     // update keypoint array size.
     *aosKeypoints.sizePointer = kpt.size();
  
     vpiArrayUnlock(scores);
     vpiArrayUnlock(keypoints);
 }
  
 static int UpdateMask(cv::Mat &cvMask, const std::vector<cv::Scalar> &trackColors, VPIArray prevFeatures,
                       VPIArray curFeatures, VPIArray status)
 {
     // Now that optical flow is completed, there are usually two approaches to take:
     // 1. Add new feature points from current frame using a feature detector such as
     //    \ref algo_harris_corners "Harris Corner Detector"
     // 2. Keep using the points that are being tracked.
     //
     // The sample app uses the valid feature point and continue to do the tracking.
  
     // Lock the input and output arrays to draw the tracks to the output mask.
     VPIArrayData curFeaturesData, statusData;
     CHECK_STATUS(vpiArrayLockData(curFeatures, VPI_LOCK_READ, VPI_ARRAY_BUFFER_HOST_AOS, &curFeaturesData));
     CHECK_STATUS(vpiArrayLockData(status, VPI_LOCK_READ, VPI_ARRAY_BUFFER_HOST_AOS, &statusData));
  
     const VPIArrayBufferAOS &aosCurFeatures = curFeaturesData.buffer.aos;
     const VPIArrayBufferAOS &aosStatus      = statusData.buffer.aos;
  
     const VPIKeypointF32 *pCurFeatures = (VPIKeypointF32 *)aosCurFeatures.data;
     const uint8_t *pStatus             = (uint8_t *)aosStatus.data;
  
     const VPIKeypointF32 *pPrevFeatures;
     if (prevFeatures)
     {
         VPIArrayData prevFeaturesData;
         CHECK_STATUS(vpiArrayLockData(prevFeatures, VPI_LOCK_READ, VPI_ARRAY_BUFFER_HOST_AOS, &prevFeaturesData));
         pPrevFeatures = (VPIKeypointF32 *)prevFeaturesData.buffer.aos.data;
     }
     else
     {
         pPrevFeatures = NULL;
     }
  
     int numTrackedKeypoints = 0;
     int totKeypoints        = *curFeaturesData.buffer.aos.sizePointer;
  
     for (int i = 0; i < totKeypoints; i++)
     {
         // keypoint is being tracked?
         if (pStatus[i] == 0)
         {
             // draw the tracks
             cv::Point curPoint{(int)round(pCurFeatures[i].x), (int)round(pCurFeatures[i].y)};
             if (pPrevFeatures != NULL)
             {
                 cv::Point2f prevPoint{pPrevFeatures[i].x, pPrevFeatures[i].y};
                 line(cvMask, prevPoint, curPoint, trackColors[i], 2);
             }
  
             circle(cvMask, curPoint, 5, trackColors[i], -1);
  
             numTrackedKeypoints++;
         }
     }
  
     // We're finished working with the arrays.
     if (prevFeatures)
     {
         CHECK_STATUS(vpiArrayUnlock(prevFeatures));
     }
     CHECK_STATUS(vpiArrayUnlock(curFeatures));
     CHECK_STATUS(vpiArrayUnlock(status));
  
     return numTrackedKeypoints;
 }
  
 int main(int argc, char *argv[])
 {
     // OpenCV image that will be wrapped by a VPIImage.
     // Define it here so that it's destroyed *after* wrapper is destroyed
     cv::Mat cvFrame;
  
     // VPI objects that will be used
     VPIStream stream        = NULL;
     VPIImage imgTempFrame   = NULL;
     VPIImage imgFrame       = NULL;
     VPIPyramid pyrPrevFrame = NULL, pyrCurFrame = NULL;
     VPIArray prevFeatures = NULL, curFeatures = NULL, status = NULL;
     VPIPayload optflow = NULL;
     VPIArray scores    = NULL;
     VPIPayload harris  = NULL;
  
     int retval = 0;
  
     try
     {
         // ============================
         // Parse command line arguments
  
         if (argc != 5)
         {
             throw std::runtime_error(std::string("Usage: ") + argv[0] +
                                      " <cpu|cuda> <input_video> <pyramid_levels> <output>");
         }
  
         std::string strBackend     = argv[1];
         std::string strInputVideo  = argv[2];
         int32_t pyrLevel           = std::stoi(argv[3]);
         std::string strOutputFiles = argv[4];
  
         // Now parse the backend
         VPIBackend backend;
  
         if (strBackend == "cpu")
         {
             backend = VPI_BACKEND_CPU;
         }
         else if (strBackend == "cuda")
         {
             backend = VPI_BACKEND_CUDA;
         }
         else
         {
             throw std::runtime_error("Backend '" + strBackend + "' not recognized, it must be either cpu or cuda.");
         }
  
         {
             int ext        = strOutputFiles.rfind('.');
             strOutputFiles = strOutputFiles.substr(0, ext) + "_" + strBackend + strOutputFiles.substr(ext);
         }
  
         // ====================
         // Load the input video
         cv::VideoCapture invid;
         if (!invid.open(strInputVideo))
         {
             throw std::runtime_error("Can't open '" + strInputVideo + "'");
         }
  
         // Fetch the first frame and wrap it into a VPIImage.
         // The points to be tracked will be gathered from this frame later on.
         if (!invid.read(cvFrame))
         {
             throw std::runtime_error("Can't retrieve first frame from '" + strInputVideo + "'");
         }
  
         // =================================================
         // Allocate VPI resources and do some pre-processing
  
         // Create the stream where processing will happen.
         CHECK_STATUS(vpiStreamCreate(0, &stream));
  
         CHECK_STATUS(vpiImageCreateWrapperOpenCVMat(cvFrame, 0, &imgTempFrame));
  
         // Create grayscale image representation of input.
         CHECK_STATUS(vpiImageCreate(cvFrame.cols, cvFrame.rows, VPI_IMAGE_FORMAT_U8, 0, &imgFrame));
  
         // Create the image pyramids used by the algorithm
         CHECK_STATUS(
             vpiPyramidCreate(cvFrame.cols, cvFrame.rows, VPI_IMAGE_FORMAT_U8, pyrLevel, 0.5, 0, &pyrPrevFrame));
         CHECK_STATUS(vpiPyramidCreate(cvFrame.cols, cvFrame.rows, VPI_IMAGE_FORMAT_U8, pyrLevel, 0.5, 0, &pyrCurFrame));
  
         // Create input and output arrays
         CHECK_STATUS(vpiArrayCreate(MAX_HARRIS_CORNERS, VPI_ARRAY_TYPE_KEYPOINT_F32, 0, &prevFeatures));
         CHECK_STATUS(vpiArrayCreate(MAX_HARRIS_CORNERS, VPI_ARRAY_TYPE_KEYPOINT_F32, 0, &curFeatures));
         CHECK_STATUS(vpiArrayCreate(MAX_HARRIS_CORNERS, VPI_ARRAY_TYPE_U8, 0, &status));
  
         // Create Optical Flow payload
         CHECK_STATUS(vpiCreateOpticalFlowPyrLK(backend, cvFrame.cols, cvFrame.rows, VPI_IMAGE_FORMAT_U8, pyrLevel, 0.5,
                                                &optflow));
  
         // Parameters we'll use. No need to change them on the fly, so just define them here.
         // We're using the default parameters.
         VPIOpticalFlowPyrLKParams lkParams;
         CHECK_STATUS(vpiInitOpticalFlowPyrLKParams(&lkParams));
  
         // Create a mask image for drawing purposes
         cv::Mat cvMask = cv::Mat::zeros(cvFrame.size(), CV_8UC3);
  
         // Gather feature points from first frame using Harris Corners on CPU.
         {
             CHECK_STATUS(vpiArrayCreate(MAX_HARRIS_CORNERS, VPI_ARRAY_TYPE_U32, 0, &scores));
  
             VPIHarrisCornerDetectorParams harrisParams;
             CHECK_STATUS(vpiInitHarrisCornerDetectorParams(&harrisParams));
             harrisParams.strengthThresh = 0;
             harrisParams.sensitivity    = 0.01;
  
             CHECK_STATUS(vpiCreateHarrisCornerDetector(VPI_BACKEND_CPU, cvFrame.cols, cvFrame.rows, &harris));
  
             // Convert input to grayscale to conform with harris corner detector restrictions
             CHECK_STATUS(vpiSubmitConvertImageFormat(stream, VPI_BACKEND_CUDA, imgTempFrame, imgFrame, NULL));
  
             CHECK_STATUS(vpiSubmitHarrisCornerDetector(stream, VPI_BACKEND_CPU, harris, imgFrame, curFeatures, scores,
                                                        &harrisParams));
  
             CHECK_STATUS(vpiStreamSync(stream));
  
             SortKeypoints(curFeatures, scores, MAX_KEYPOINTS);
         }
  
         // Create some random colors
         std::vector<cv::Scalar> trackColors;
         {
             std::vector<cv::Vec3b> tmpTrackColors;
  
             VPIArrayData ptsData;
             CHECK_STATUS(vpiArrayLockData(curFeatures, VPI_LOCK_READ, VPI_ARRAY_BUFFER_HOST_AOS, &ptsData));
  
             const VPIArrayBufferAOS &aosKeypoints = ptsData.buffer.aos;
  
             const VPIKeypointF32 *pts = (VPIKeypointF32 *)aosKeypoints.data;
  
             for (int i = 0; i < *aosKeypoints.sizePointer; i++)
             {
                 // Track hue depends on its initial position
                 int hue = ((int)pts[i].x ^ (int)pts[i].y) % 180;
  
                 tmpTrackColors.push_back(cv::Vec3b(hue, 255, 255));
             }
             CHECK_STATUS(vpiArrayUnlock(curFeatures));
  
             cvtColor(tmpTrackColors, tmpTrackColors, cv::COLOR_HSV2BGR);
  
             for (size_t i = 0; i < tmpTrackColors.size(); i++)
             {
                 trackColors.push_back(cv::Scalar(tmpTrackColors[i]));
             }
         }
  
         // Update the mask with info from first frame.
         int numTrackedKeypoints = UpdateMask(cvMask, trackColors, NULL, curFeatures, status);
  
         // =================================================
         // Main processing stage
  
         // Generate pyramid for first frame.
         CHECK_STATUS(vpiSubmitGaussianPyramidGenerator(stream, backend, imgFrame, pyrCurFrame, VPI_BORDER_CLAMP));
  
         // Counter for the frames
         int idxFrame = 0;
  
         while (true)
         {
             // Save frame to disk
             SaveFileToDisk(imgFrame, cvMask, strOutputFiles, idxFrame);
  
             printf("Frame id=%d: %d points tracked. \n", idxFrame, numTrackedKeypoints);
  
             // Last iteration's current frame/features become this iteration's prev frame/features.
             // The former will contain information gathered in this iteration.
             std::swap(prevFeatures, curFeatures);
             std::swap(pyrPrevFrame, pyrCurFrame);
  
             // Fetch a new frame
             if (!invid.read(cvFrame))
             {
                 printf("Video ended.\n");
                 break;
             }
  
             ++idxFrame;
  
             // Wrap frame into a VPIImage, reusing the existing imgFrame.
             CHECK_STATUS(vpiImageSetWrappedOpenCVMat(imgTempFrame, cvFrame));
  
             // Convert it to grayscale
             CHECK_STATUS(vpiSubmitConvertImageFormat(stream, backend, imgTempFrame, imgFrame, NULL))
  
             // Generate a pyramid out of it
             CHECK_STATUS(vpiSubmitGaussianPyramidGenerator(stream, backend, imgFrame, pyrCurFrame, VPI_BORDER_CLAMP));
  
             // Estimate the features' position in current frame given their position in previous frame
             CHECK_STATUS(vpiSubmitOpticalFlowPyrLK(stream, 0, optflow, pyrPrevFrame, pyrCurFrame, prevFeatures,
                                                    curFeatures, status, &lkParams));
  
             // Wait for processing to finish.
             CHECK_STATUS(vpiStreamSync(stream));
  
             // Update the output mask
             numTrackedKeypoints = UpdateMask(cvMask, trackColors, prevFeatures, curFeatures, status);
  
             // No more keypoints being tracked?
             if (numTrackedKeypoints == 0)
             {
                 printf("No keypoints to track.\n");
                 break; // we can finish procesing.
             }
         }
     }
     catch (std::exception &e)
     {
         std::cerr << e.what() << std::endl;
         retval = 1;
     }
  
     vpiStreamDestroy(stream);
     vpiPayloadDestroy(harris);
     vpiPayloadDestroy(optflow);
  
     vpiPyramidDestroy(pyrPrevFrame);
     vpiImageDestroy(imgTempFrame);
     vpiImageDestroy(imgFrame);
     vpiArrayDestroy(prevFeatures);
     vpiArrayDestroy(curFeatures);
     vpiArrayDestroy(status);
     vpiArrayDestroy(scores);
  
     return retval;
 }

VPI - Vision Programming Interface

3.1 Release

Overview

Instructions

Results

Source Code