This application tracks bounding boxes on an input video, draws the frames on each frame and saves them to disk. The user can define what backend will be used for processing.
This is using the CUDA backend and one of the provided sample videos and bounding boxes.
For convenience, here's the code that is also installed in the samples directory.
29 #include <opencv2/core/version.hpp>
30 #if CV_MAJOR_VERSION >= 3
31 # include <opencv2/imgcodecs.hpp>
32 # include <opencv2/videoio.hpp>
34 # include <opencv2/highgui/highgui.hpp>
37 #include <opencv2/imgproc/imgproc.hpp>
54 #define CHECK_STATUS(STMT) \
57 VPIStatus status = (STMT); \
58 if (status != VPI_SUCCESS) \
60 char buffer[VPI_MAX_STATUS_MESSAGE_LENGTH]; \
61 vpiGetLastStatusMessage(buffer, sizeof(buffer)); \
62 std::ostringstream ss; \
63 ss << vpiStatusGetName(status) << ": " << buffer; \
64 throw std::runtime_error(ss.str()); \
115 throw std::runtime_error(
"Image type not supported");
121 if (cvimg.type() == CV_16U)
123 cvimg.convertTo(out, CV_8U);
128 cvtColor(cvimg, out, cv::COLOR_GRAY2BGR);
146 if (pboxes[i].trackingStatus == 1)
157 x = pboxes[i].bbox.xform.mat3[0][2] + ppreds[i].mat3[0][2];
158 y = pboxes[i].bbox.xform.mat3[1][2] + ppreds[i].mat3[1][2];
159 w = pboxes[i].bbox.width * pboxes[i].bbox.xform.mat3[0][0] * ppreds[i].mat3[0][0];
160 h = pboxes[i].bbox.height * pboxes[i].bbox.xform.mat3[1][1] * ppreds[i].mat3[1][1];
162 rectangle(out, cv::Rect(x, y, w, h), cv::Scalar(rand() % 256, rand() % 256, rand() % 256), 2);
169 std::string fname = filename;
170 int ext = fname.rfind(
'.');
172 char buffer[512] = {};
173 snprintf(buffer,
sizeof(buffer) - 1,
"%s_%04d%s", fname.substr(0, ext).c_str(), frame, fname.substr(ext).c_str());
176 if (!imwrite(buffer, out, {cv::IMWRITE_JPEG_QUALITY, 70}))
178 throw std::runtime_error(
"Can't write to " + std::string(buffer));
182 int main(
int argc,
char *argv[])
195 throw std::runtime_error(std::string(
"Usage: ") + argv[0] +
196 " <cpu|pva|cuda> <input_video> <bbox descr> <output>");
199 std::string strBackend = argv[1];
200 std::string strInputVideo = argv[2];
201 std::string strInputBBoxes = argv[3];
202 std::string strOutputFiles = argv[4];
205 cv::VideoCapture invid;
206 if (!invid.open(strInputVideo))
208 throw std::runtime_error(
"Can't open '" + strInputVideo +
"'");
222 VPIArray inputBoxList, inputPredList;
225 std::vector<VPIKLTTrackedBoundingBox> bboxes;
226 int32_t bboxesSize = 0;
227 std::vector<VPIHomographyTransform2D> preds;
228 int32_t predsSize = 0;
232 std::map<int, size_t> bboxes_size_at_frame;
240 std::ifstream in(strInputBBoxes);
243 throw std::runtime_error(
"Can't open '" + strInputBBoxes +
"'");
247 int frame, x, y, w, h;
248 while (in >> frame >> x >> y >> w >> h)
250 if (bboxes.size() == 64)
252 throw std::runtime_error(
"Too many bounding boxes");
273 bboxes.push_back(track);
277 xform.
mat3[0][0] = 1;
278 xform.
mat3[1][1] = 1;
279 xform.
mat3[2][2] = 1;
280 preds.push_back(xform);
282 bboxes_size_at_frame[frame] = bboxes.size();
285 if (!in && !in.eof())
287 throw std::runtime_error(
"Can't parse bounding boxes, stopped at bbox #" +
288 std::to_string(bboxes.size()));
296 data.
data = &bboxes[0];
301 data.
data = &preds[0];
308 if (strBackend ==
"cpu")
312 else if (strBackend ==
"cuda")
316 else if (strBackend ==
"pva")
322 throw std::runtime_error(
"Backend '" + strBackend +
323 "' not recognized, it must be either cpu, cuda or pva.");
332 auto fetchFrame = [&invid, &nextFrame, backendType]() {
334 if (!invid.read(frame))
340 if (frame.channels() == 3)
342 cvtColor(frame, frame, cv::COLOR_BGR2GRAY);
351 frame.convertTo(aux, CV_16U);
356 assert(frame.type() == CV_8U);
365 cv::Mat cvTemplate = fetchFrame(), cvReference;
366 VPIImage imgTemplate = ToVPIImage(
nullptr, cvTemplate);
397 size_t curNumBoxes = 0;
401 size_t curFrame = nextFrame - 1;
404 auto tmp = --bboxes_size_at_frame.upper_bound(curFrame);
405 size_t bbox_count = tmp->second;
407 assert(bbox_count >= curNumBoxes &&
"input bounding boxes must be sorted by frame");
410 if (curNumBoxes != bbox_count)
422 for (
size_t i = 0; i < bbox_count - curNumBoxes; ++i)
424 std::cout << curFrame <<
" -> new " << curNumBoxes + i << std::endl;
426 assert(bbox_count <= bboxes.capacity());
427 assert(bbox_count <= preds.capacity());
429 curNumBoxes = bbox_count;
433 SaveKLTBoxes(imgTemplate, inputBoxList, inputPredList, strOutputFiles, curFrame);
436 cvReference = fetchFrame();
439 if (cvReference.data ==
nullptr)
446 imgReference = ToVPIImage(imgReference, cvReference);
451 imgReference, outputBoxList, outputEstimList, ¶ms));
467 for (
size_t b = 0; b < curNumBoxes; ++b)
470 if (updated_bbox[b].trackingStatus)
473 if (bboxes[b].trackingStatus == 0)
475 std::cout << curFrame <<
" -> dropped " << b << std::endl;
476 bboxes[b].trackingStatus = 1;
483 if (updated_bbox[b].templateStatus)
485 std::cout << curFrame <<
" -> update " << b << std::endl;
495 bboxes[b] = updated_bbox[b];
498 bboxes[b].templateStatus = 1;
502 preds[b].
mat3[0][0] = 1;
503 preds[b].mat3[1][1] = 1;
504 preds[b].mat3[2][2] = 1;
509 bboxes[b].templateStatus = 0;
526 std::swap(imgTemplate, imgReference);
527 std::swap(cvTemplate, cvReference);
530 catch (std::exception &e)
532 std::cerr << e.what() << std::endl;
Functions and structures for dealing with VPI arrays.
Functions and structures for dealing with VPI contexts.
Functions and structures for dealing with VPI images.
Declares functions that implement the KLT Feature Tracker algorithm.
Functions for handling OpenCV interoperability with VPI.
Declaration of VPI status codes handling functions.
Declares functions dealing with VPI streams.
int32_t * sizePointer
Points to the number of elements in the array.
int32_t capacity
Maximum number of elements that the array can hold.
VPIArrayType format
Format of each array element.
void * data
Points to the first element of the array.
VPIStatus vpiArraySetSize(VPIArray array, int32_t size)
Set the array size in elements.
VPIStatus vpiArrayUnlock(VPIArray array)
Releases the lock on array object.
VPIStatus vpiArrayLock(VPIArray array, VPILockMode mode, VPIArrayData *arrayData)
Acquires the lock on array object and returns a pointer to array data.
VPIStatus vpiArrayCreate(int32_t capacity, VPIArrayType type, uint32_t flags, VPIArray *array)
Create an empty array instance.
struct VPIArrayImpl * VPIArray
A handle to an array.
VPIStatus vpiArrayInvalidate(VPIArray array)
Informs that the array's wrapped memory was updated outside VPI.
@ VPI_ARRAY_TYPE_KLT_TRACKED_BOUNDING_BOX
VPIKLTTrackedBoundingBox element.
@ VPI_ARRAY_TYPE_HOMOGRAPHY_TRANSFORM_2D
VPIHomographyTransform2D element.
Stores information about array characteristics and content.
VPIStatus vpiArrayCreateHostMemWrapper(const VPIArrayData *arrayData, uint32_t flags, VPIArray *array)
Create an array object by wrapping an existing host memory block.
VPIStatus vpiContextSetCurrent(VPIContext ctx)
Sets the context for the calling thread.
void vpiContextDestroy(VPIContext ctx)
Destroy a context instance as well as all resources it owns.
VPIStatus vpiContextCreate(uint32_t flags, VPIContext *ctx)
Create a context instance.
struct VPIContextImpl * VPIContext
A handle to a context.
int32_t height
Height of this plane in pixels.
int32_t width
Width of this plane in pixels.
void * data
Pointer to the first row of this plane.
int32_t pitchBytes
Difference in bytes of beginning of one row and the beginning of the previous.
VPIImagePlane planes[VPI_MAX_PLANE_COUNT]
Data of all image planes.
VPIImageFormat format
Image format.
VPIStatus vpiImageLock(VPIImage img, VPILockMode mode, VPIImageData *hostData)
Acquires the lock on an image object and returns a pointer to the image planes.
struct VPIImageImpl * VPIImage
A handle to an image.
VPIStatus vpiImageGetFormat(VPIImage img, VPIImageFormat *format)
Get the image format.
VPIStatus vpiImageUnlock(VPIImage img)
Releases the lock on an image object.
Stores information about image characteristics and content.
int8_t templateStatus
Status of the template related to this bounding box.
float maxScaleChange
Maximum relative scale change.
int8_t trackingStatus
Tracking status of this bounding box.
float maxTranslationChange
Maximum relative translation change.
VPIBoundingBox bbox
Bounding box being tracked.
float nccThresholdUpdate
Threshold for requiring template update.
float nccThresholdStop
Threshold to stop estimating.
float nccThresholdKill
Threshold to consider template tracking was lost.
int32_t numberOfIterationsScaling
Number of Inverse compositional iterations of scale estimations.
VPIKLTFeatureTrackerType trackingType
Type of KLT tracking that will be performed.
VPIStatus vpiSubmitKLTFeatureTracker(VPIStream stream, uint32_t backend, VPIPayload payload, VPIImage templateImage, VPIArray inputBoxList, VPIArray inputPredictionList, VPIImage referenceImage, VPIArray outputBoxList, VPIArray outputEstimationList, const VPIKLTFeatureTrackerParams *params)
Runs KLT Feature Tracker on two frames.
VPIStatus vpiCreateKLTFeatureTracker(uint32_t backends, int32_t imageWidth, int32_t imageHeight, VPIImageFormat imageFormat, const VPIKLTFeatureTrackerCreationParams *params, VPIPayload *payload)
Creates payload for vpiSubmitKLTFeatureTracker.
@ VPI_KLT_INVERSE_COMPOSITIONAL
Inverse compositional algorithm for KLT tracker.
Structure that defines the parameters for vpiCreateKLTFeatureTracker.
Stores a bounding box that is being tracked by KLT Tracker.
VPIStatus vpiImageSetWrappedOpenCVMat(VPIImage img, const cv::Mat &mat)
Redefines the wrapped cv::Mat of an existing VPIImage wrapper.
VPIStatus vpiImageCreateOpenCVMatWrapper(const cv::Mat &mat, VPIImageFormat fmt, uint32_t flags, VPIImage *img)
Wraps a cv::Mat in an VPIImage with the given image format.
struct VPIPayloadImpl * VPIPayload
A handle to an algorithm payload.
struct VPIStreamImpl * VPIStream
A handle to a stream.
VPIStatus vpiStreamSync(VPIStream stream)
Blocks the calling thread until all submitted commands in this stream queue are done (queue is empty)...
VPIBackend
VPI Backend types.
VPIStatus vpiStreamCreate(uint32_t flags, VPIStream *stream)
Create a stream instance.
@ VPI_BACKEND_CUDA
CUDA backend.
@ VPI_BACKEND_PVA
PVA backend.
@ VPI_BACKEND_CPU
CPU backend.
float width
Bounding box width.
float height
Bounding box height.
VPIHomographyTransform2D xform
Defines the bounding box top left corner and its homography.
float mat3[3][3]
3x3 homogeneous matrix that defines the homography.
@ VPI_LOCK_READ_WRITE
Lock memory for reading and writing.
@ VPI_LOCK_READ
Lock memory only for reading.