VPI - Vision Programming Interface

3.0 Release

Cross-Compilation Targeting aarch64

Overview

This sample shows how to build your applications in a x86_64 host, targeting Jetson devices that use aarch64 architecture. It uses several features first made available in cmake-3.5. The sample application itself creates an input image, applies a box filter to it and save the result to disk.

Instructions

JetPack's installer already set up the cross-compilation toolchain using gcc, but if some reason it isn't available, install it manually with:

apt-get install gcc-aarch64-linux-gnu g++-aarch64-linux-gnu

Now cmake can be instructed to create a cross-compiling build tree by calling it as follows in the samples directory:

cmake . -DCMAKE_TOOLCHAIN_FILE=Toolchain_aarch64_l4t.cmake

The file Toolchain_aarch64_l4t.cmake is included in the samples directory and defines the cross-compiler that will be used, among other configurations. In particular, it also allows cross-compilation of CUDA applications, provided that the CUDA aarch64 cross-compilation libraries are correctly installed on host.

Note
This sample can also be compiled targeting the host. Just omit the CMAKE_TOOLCHAIN_FILE parameter during cmake invocation.

The usage is:

./vpi_sample_08_cross_aarch64_l4t <backend>

where

  • backend: either cpu, cuda or pva; it defines the backend that will perform the processing.

Source Code

For convenience, here's the code that is also installed in the samples directory.

Language:
29 #include <vpi/Image.h>
30 #include <vpi/Status.h>
31 #include <vpi/Stream.h>
32 #include <vpi/algo/BoxFilter.h>
33 
34 #include <cstring> // for memset
35 #include <fstream>
36 #include <iostream>
37 #include <sstream>
38 
39 #define CHECK_STATUS(STMT) \
40  do \
41  { \
42  VPIStatus status = (STMT); \
43  if (status != VPI_SUCCESS) \
44  { \
45  char buffer[VPI_MAX_STATUS_MESSAGE_LENGTH]; \
46  vpiGetLastStatusMessage(buffer, sizeof(buffer)); \
47  std::ostringstream ss; \
48  ss << vpiStatusGetName(status) << ": " << buffer; \
49  throw std::runtime_error(ss.str()); \
50  } \
51  } while (0);
52 
53 int main(int argc, char *argv[])
54 {
55  // VPI objects that will be used
56  VPIImage image = NULL;
57  VPIImage blurred = NULL;
58  VPIStream stream = NULL;
59 
60  int retval = 0;
61 
62  try
63  {
64  if (argc != 2)
65  {
66  throw std::runtime_error(std::string("Usage: ") + argv[0] + " <cpu|pva|cuda>");
67  }
68 
69  std::string strBackend = argv[1];
70 
71  // Now parse the backend
72  VPIBackend backend;
73 
74  if (strBackend == "cpu")
75  {
76  backend = VPI_BACKEND_CPU;
77  }
78  else if (strBackend == "cuda")
79  {
80  backend = VPI_BACKEND_CUDA;
81  }
82  else if (strBackend == "pva")
83  {
84  backend = VPI_BACKEND_PVA;
85  }
86  else
87  {
88  throw std::runtime_error("Backend '" + strBackend +
89  "' not recognized, it must be either cpu, cuda or pva.");
90  }
91 
92  // Create the stream for the given backend.
93  CHECK_STATUS(vpiStreamCreate(backend, &stream));
94 
95  char imgContents[512][512];
96  for (int i = 0; i < 512; ++i)
97  {
98  for (int j = 0; j < 512; ++j)
99  {
100  imgContents[i][j] = i * 512 + j + i;
101  }
102  }
103 
104  // We now wrap the loaded image into a VPIImage object to be used by VPI.
105  {
106  // First fill VPIImageBufferPitchLinear with the, well, image data...
107  VPIImageData imgData;
108  memset(&imgData, 0, sizeof(imgData));
111  imgData.buffer.pitch.numPlanes = 1;
112  imgData.buffer.pitch.planes[0].width = 512;
113  imgData.buffer.pitch.planes[0].height = 512;
114  imgData.buffer.pitch.planes[0].pitchBytes = 512;
115  imgData.buffer.pitch.planes[0].data = imgContents[0];
116 
117  // Wrap it into a VPIImage. VPI won't make a copy of it, so the original
118  // image must be in scope at all times.
119  CHECK_STATUS(vpiImageCreateWrapper(&imgData, nullptr, 0, &image));
120  }
121 
122  // Now create the output image, single unsigned 8-bit channel.
123  CHECK_STATUS(vpiImageCreate(512, 512, VPI_IMAGE_FORMAT_U8, 0, &blurred));
124 
125  // Submit it for processing passing the image to be blurred and the result image
126  CHECK_STATUS(vpiSubmitBoxFilter(stream, backend, image, blurred, 3, 3, VPI_BORDER_ZERO));
127 
128  // Wait until the algorithm finishes processing
129  CHECK_STATUS(vpiStreamSync(stream));
130 
131  // Now let's retrieve the output image contents and output it to disk
132  {
133  // Lock output image to retrieve its data on cpu memory
134  VPIImageData outData;
135  CHECK_STATUS(vpiImageLockData(blurred, VPI_LOCK_READ, VPI_IMAGE_BUFFER_HOST_PITCH_LINEAR, &outData));
136 
138  VPIImageBufferPitchLinear &outPitch = outData.buffer.pitch;
139 
140  std::ofstream fd(("boxfiltered_" + strBackend + ".pgm").c_str());
141 
142  fd << "P5\n512 512 255\n";
143  for (int i = 0; i < 512; ++i)
144  {
145  fd.write(reinterpret_cast<const char *>(outPitch.planes[0].data) + outPitch.planes[0].pitchBytes * i,
146  512);
147  }
148  fd.close();
149 
150  // Done handling output image, don't forget to unlock it.
151  CHECK_STATUS(vpiImageUnlock(blurred));
152  }
153  }
154  catch (std::exception &e)
155  {
156  std::cerr << e.what() << std::endl;
157  retval = 1;
158  }
159 
160  // Clean up
161 
162  // Make sure stream is synchronized before destroying the objects
163  // that might still be in use.
164  if (stream != NULL)
165  {
166  vpiStreamSync(stream);
167  }
168 
169  vpiImageDestroy(image);
170  vpiImageDestroy(blurred);
171  vpiStreamDestroy(stream);
172 
173  return retval;
174 }
Declares functions that implement the Box Filter algorithm.
#define VPI_IMAGE_FORMAT_U8
Single plane with one 8-bit unsigned integer channel.
Definition: ImageFormat.h:100
Functions and structures for dealing with VPI images.
Declaration of VPI status codes handling functions.
Declares functions dealing with VPI streams.
VPIStatus vpiSubmitBoxFilter(VPIStream stream, uint64_t backend, VPIImage input, VPIImage output, int32_t kernelWidth, int32_t kernelHeight, VPIBorderExtension border)
Runs a 2D box filter over an image.
VPIImageBuffer buffer
Stores the image contents.
Definition: Image.h:241
VPIImagePlanePitchLinear planes[VPI_MAX_PLANE_COUNT]
Data of all image planes in pitch-linear layout.
Definition: Image.h:160
VPIImageBufferPitchLinear pitch
Image stored in pitch-linear layout.
Definition: Image.h:210
void * data
Pointer to the first row of this plane.
Definition: Image.h:141
int32_t numPlanes
Number of planes.
Definition: Image.h:156
VPIImageFormat format
Image format.
Definition: Image.h:152
VPIImageBufferType bufferType
Type of image buffer.
Definition: Image.h:238
int32_t height
Height of this plane in pixels.
Definition: Image.h:123
int32_t width
Width of this plane in pixels.
Definition: Image.h:119
int32_t pitchBytes
Difference in bytes of beginning of one row and the beginning of the previous.
Definition: Image.h:134
void vpiImageDestroy(VPIImage img)
Destroy an image instance.
struct VPIImageImpl * VPIImage
A handle to an image.
Definition: Types.h:256
VPIStatus vpiImageCreateWrapper(const VPIImageData *data, const VPIImageWrapperParams *params, uint64_t flags, VPIImage *img)
Create an image object by wrapping an existing memory block.
VPIStatus vpiImageLockData(VPIImage img, VPILockMode mode, VPIImageBufferType bufType, VPIImageData *data)
Acquires the lock on an image object and returns the image contents.
VPIStatus vpiImageCreate(int32_t width, int32_t height, VPIImageFormat fmt, uint64_t flags, VPIImage *img)
Create an empty image instance with the specified flags.
VPIStatus vpiImageUnlock(VPIImage img)
Releases the lock on an image object.
@ VPI_IMAGE_BUFFER_HOST_PITCH_LINEAR
Host-accessible with planes in pitch-linear memory layout.
Definition: Image.h:172
Stores the image plane contents.
Definition: Image.h:150
Stores information about image characteristics and content.
Definition: Image.h:234
struct VPIStreamImpl * VPIStream
A handle to a stream.
Definition: Types.h:250
VPIStatus vpiStreamSync(VPIStream stream)
Blocks the calling thread until all submitted commands in this stream queue are done (queue is empty)...
VPIBackend
VPI Backend types.
Definition: Types.h:91
void vpiStreamDestroy(VPIStream stream)
Destroy a stream instance and deallocate all HW resources.
VPIStatus vpiStreamCreate(uint64_t flags, VPIStream *stream)
Create a stream instance.
@ VPI_BACKEND_CUDA
CUDA backend.
Definition: Types.h:93
@ VPI_BACKEND_PVA
PVA backend.
Definition: Types.h:94
@ VPI_BACKEND_CPU
CPU backend.
Definition: Types.h:92
@ VPI_BORDER_ZERO
All pixels outside the image are considered to be zero.
Definition: Types.h:278
@ VPI_LOCK_READ
Lock memory only for reading.
Definition: Types.h:595

Here is the cmake toolchain file that is being used.

27 set(CMAKE_SYSTEM_NAME Linux)
28 set(CMAKE_SYSTEM_PROCESSOR aarch64)
29 
30 set(target_arch aarch64-linux-gnu)
31 set(CMAKE_LIBRARY_ARCHITECTURE ${target_arch} CACHE STRING "" FORCE)
32 
33 # Configure cmake to look for libraries, include directories and
34 # packages inside the target root prefix.
35 set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
36 set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
37 set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
38 set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
39 set(CMAKE_FIND_ROOT_PATH "/usr/${target_arch}")
40 
41 # needed to avoid doing some more strict compiler checks that
42 # are failing when cross-compiling
43 set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
44 
45 # specify the toolchain programs
46 find_program(CMAKE_C_COMPILER ${target_arch}-gcc)
47 find_program(CMAKE_CXX_COMPILER ${target_arch}-g++)
48 if(NOT CMAKE_C_COMPILER OR NOT CMAKE_CXX_COMPILER)
49  message(FATAL_ERROR "Can't find suitable C/C++ cross compiler for ${target_arch}")
50 endif()
51 
52 set(CMAKE_AR ${target_arch}-ar CACHE FILEPATH "" FORCE)
53 set(CMAKE_RANLIB ${target_arch}-ranlib)
54 set(CMAKE_LINKER ${target_arch}-ld)
55 
56 # Not all shared libraries dependencies are instaled in host machine.
57 # Make sure linker doesn't complain.
58 set(CMAKE_EXE_LINKER_FLAGS_INIT -Wl,--allow-shlib-undefined)
59 
60 # instruct nvcc to use our cross-compiler
61 set(CMAKE_CUDA_FLAGS "-ccbin ${CMAKE_CXX_COMPILER} -Xcompiler -fPIC" CACHE STRING "" FORCE)

And finally the accompanying CMakeLists.txt. Note that it is just a plain simple CMakeLists.txt. Everything related to cross-compilation is defined in the toolchain file above.

27 cmake_minimum_required(VERSION 3.5)
28 
29 # To cross-compile for aarch64-l4t target from x86,
30 # pass -DCMAKE_TOOLCHAIN_FILE=Toolchain_aarch64_l4t.cmake
31 # to cmake when creating build tree.
32 
33 project(vpi_sample_08_cross_aarch64_l4t)
34 
35 find_package(vpi ${vpi_API_VERSION} REQUIRED)
36 
37 add_executable(${PROJECT_NAME} main.cpp)
38 target_link_libraries(${PROJECT_NAME} vpi)