This application tracks bounding boxes on an input video, draws the frames on each frame and saves them to disk. The user can define what backend will be used for processing.
This is using the CUDA backend and one of the provided sample videos and bounding boxes.
For convenience, here's the code that is also installed in the samples directory.
#include <opencv2/core/version.hpp>
#if CV_MAJOR_VERSION >= 3
# include <opencv2/imgcodecs.hpp>
# include <opencv2/videoio.hpp>
#else
# include <opencv2/highgui/highgui.hpp>
#endif
#include <opencv2/imgproc/imgproc.hpp>
#include <cstring>
#include <fstream>
#include <iostream>
#include <map>
#include <vector>
#define CHECK_STATUS(STMT) \
do \
{ \
VPIStatus status = (STMT); \
if (status != VPI_SUCCESS) \
{ \
throw std::runtime_error(vpiStatusGetName(status)); \
} \
} while (0);
static VPIImage ToVPIImage(
const cv::Mat &frame)
{
memset(&imgData, 0, sizeof(imgData));
switch (frame.type())
{
case CV_16U:
break;
case CV_8U:
break;
default:
throw std::runtime_error("Frame type not supported");
}
return img;
};
{
cv::Mat out;
{
int cvtype;
{
cvtype = CV_8U;
break;
cvtype = CV_8S;
break;
cvtype = CV_16UC1;
break;
cvtype = CV_16SC1;
break;
default:
throw std::runtime_error("Image type not supported");
}
if (cvimg.type() == CV_16U)
{
cvimg.convertTo(out, CV_8U);
cvimg = out;
out = cv::Mat();
}
cvtColor(cvimg, out, cv::COLOR_GRAY2BGR);
}
srand(0);
for (
size_t i = 0; i < boxdata.
size; ++i)
{
if (pboxes[i].trackingStatus == 1)
{
rand();
rand();
rand();
continue;
}
float x, y, w, h;
x = pboxes[i].bbox.xform.mat3[0][2] + ppreds[i].mat3[0][2];
y = pboxes[i].bbox.xform.mat3[1][2] + ppreds[i].mat3[1][2];
w = pboxes[i].bbox.width * pboxes[i].bbox.xform.mat3[0][0] * ppreds[i].mat3[0][0];
h = pboxes[i].bbox.height * pboxes[i].bbox.xform.mat3[1][1] * ppreds[i].mat3[1][1];
rectangle(out, cv::Rect(x, y, w, h), cv::Scalar(rand() % 256, rand() % 256, rand() % 256), 2);
}
std::string fname = filename;
int ext = fname.rfind('.');
char buffer[512] = {};
snprintf(buffer, sizeof(buffer) - 1, "%s_%04d%s", fname.substr(0, ext).c_str(), frame, fname.substr(ext).c_str());
if (!imwrite(buffer, out, {cv::IMWRITE_JPEG_QUALITY, 70}))
{
throw std::runtime_error("Can't write to " + std::string(buffer));
}
}
int main(int argc, char *argv[])
{
int retval = 0;
try
{
if (argc != 5)
{
throw std::runtime_error(std::string("Usage: ") + argv[0] +
" <cpu|pva|cuda> <input_video> <bbox descr> <output>");
}
std::string strDevType = argv[1];
std::string strInputVideo = argv[2];
std::string strInputBBoxes = argv[3];
std::string strOutputFiles = argv[4];
cv::VideoCapture invid;
if (!invid.open(strInputVideo))
{
throw std::runtime_error("Can't open '" + strInputVideo + "'");
}
std::vector<VPIKLTTrackedBoundingBox> bboxes;
std::vector<VPIHomographyTransform2D> preds;
std::map<int, size_t> bboxes_size_at_frame;
bboxes.reserve(128);
preds.reserve(128);
{
std::ifstream in(strInputBBoxes);
if (!in)
{
throw std::runtime_error("Can't open '" + strInputBBoxes + "'");
}
int frame, x, y, w, h;
while (in >> frame >> x >> y >> w >> h)
{
if (bboxes.size() == 64)
{
throw std::runtime_error("Too many bounding boxes");
}
bboxes.push_back(track);
preds.push_back(xform);
bboxes_size_at_frame[frame] = bboxes.size();
}
if (!in && !in.eof())
{
throw std::runtime_error("Can't parse bounding boxes, stopped at bbox #" +
std::to_string(bboxes.size()));
}
}
if (strDevType == "cpu")
{
}
else if (strDevType == "cuda")
{
}
else if (strDevType == "pva")
{
}
else
{
throw std::runtime_error("Backend '" + strDevType +
"' not recognized, it must be either cpu, cuda or pva.");
}
int nextFrame = 0;
auto fetchFrame = [&invid, &nextFrame, devType]() {
cv::Mat frame;
if (!invid.read(frame))
{
return cv::Mat();
}
if (frame.channels() == 3)
{
cvtColor(frame, frame, cv::COLOR_BGR2GRAY);
}
{
cv::Mat aux;
frame.convertTo(aux, CV_16U);
frame = aux;
}
else
{
assert(frame.type() == CV_8U);
}
++nextFrame;
return frame;
};
cv::Mat cvTemplate = fetchFrame(), cvReference;
VPIImage imgTemplate = ToVPIImage(cvTemplate);
size_t curNumBoxes = 0;
do
{
size_t curFrame = nextFrame - 1;
auto tmp = --bboxes_size_at_frame.upper_bound(curFrame);
size_t bbox_count = tmp->second;
assert(bbox_count >= curNumBoxes && "input bounding boxes must be sorted by frame");
if (curNumBoxes != bbox_count)
{
for (size_t i = 0; i < bbox_count - curNumBoxes; ++i)
{
std::cout << curFrame << " -> new " << curNumBoxes + i << std::endl;
}
assert(bbox_count <= bboxes.capacity());
assert(bbox_count <= preds.capacity());
curNumBoxes = bbox_count;
}
SaveKLTBoxes(imgTemplate, inputBoxList, inputPredList, strOutputFiles, curFrame);
cvReference = fetchFrame();
if (cvReference.data == nullptr)
{
break;
}
imgReference = ToVPIImage(cvReference);
outputBoxList, outputEstimList, ¶ms));
for (size_t b = 0; b < curNumBoxes; ++b)
{
if (updated_bbox[b].trackingStatus)
{
if (bboxes[b].trackingStatus == 0)
{
std::cout << curFrame << " -> dropped " << b << std::endl;
bboxes[b].trackingStatus = 1;
}
continue;
}
if (updated_bbox[b].templateStatus)
{
std::cout << curFrame << " -> update " << b << std::endl;
bboxes[b] = updated_bbox[b];
bboxes[b].templateStatus = 1;
preds[b].mat3[1][1] = 1;
preds[b].mat3[2][2] = 1;
}
else
{
bboxes[b].templateStatus = 0;
preds[b] = estim[b];
}
}
std::swap(imgTemplate, imgReference);
std::swap(cvTemplate, cvReference);
} while (true);
}
catch (std::exception &e)
{
std::cerr << e.what() << std::endl;
retval = 1;
}
return retval;
}