VPI - Vision Programming Interface

4.0 Release

Cross-Compilation Targeting aarch64

Overview

This sample shows how to build your applications in a x86_64 host, targeting Jetson devices that use aarch64 architecture. It uses several features first made available in cmake-3.5. The sample application itself creates an input image, applies a box filter to it and save the result to disk.

Instructions

JetPack's installer already set up the cross-compilation toolchain using gcc, but if some reason it isn't available, install it manually with:

apt-get install gcc-aarch64-linux-gnu g++-aarch64-linux-gnu

Now cmake can be instructed to create a cross-compiling build tree by calling it as follows in the samples directory:

cmake . -DCMAKE_TOOLCHAIN_FILE=Toolchain_aarch64_l4t.cmake

The file Toolchain_aarch64_l4t.cmake is included in the samples directory and defines the cross-compiler that will be used, among other configurations. In particular, it also allows cross-compilation of CUDA applications, provided that the CUDA aarch64 cross-compilation libraries are correctly installed on host.

Note
This sample can also be compiled targeting the host. Just omit the CMAKE_TOOLCHAIN_FILE parameter during cmake invocation.

The usage is:

./vpi_sample_08_cross_aarch64_l4t <backend>

where

  • backend: either cpu, cuda or pva; it defines the backend that will perform the processing.

Source Code

For convenience, here's the code that is also installed in the samples directory.

Language:
29 #include <vpi/Image.h>
30 #include <vpi/Status.h>
31 #include <vpi/Stream.h>
32 #include <vpi/algo/BoxFilter.h>
33 
34 #include <cstring> // for memset
35 #include <fstream>
36 #include <iostream>
37 #include <sstream>
38 
39 #define CHECK_STATUS(STMT) \
40  do \
41  { \
42  VPIStatus status = (STMT); \
43  if (status != VPI_SUCCESS) \
44  { \
45  char buffer[VPI_MAX_STATUS_MESSAGE_LENGTH]; \
46  vpiGetLastStatusMessage(buffer, sizeof(buffer)); \
47  std::ostringstream ss; \
48  ss << vpiStatusGetName(status) << ": " << buffer; \
49  throw std::runtime_error(ss.str()); \
50  } \
51  } while (0);
52 
53 int main(int argc, char *argv[])
54 {
55  // VPI objects that will be used
56  VPIImage image = NULL;
57  VPIImage blurred = NULL;
58  VPIStream stream = NULL;
59 
60  int retval = 0;
61 
62  try
63  {
64  if (argc != 2)
65  {
66  throw std::runtime_error(std::string("Usage: ") + argv[0] + " <cpu|pva|cuda>");
67  }
68 
69  std::string strBackend = argv[1];
70 
71  // Now parse the backend
72  VPIBackend backend;
73 
74  if (strBackend == "cpu")
75  {
76  backend = VPI_BACKEND_CPU;
77  }
78  else if (strBackend == "cuda")
79  {
80  backend = VPI_BACKEND_CUDA;
81  }
82  else if (strBackend == "pva")
83  {
84  backend = VPI_BACKEND_PVA;
85  }
86  else
87  {
88  throw std::runtime_error("Backend '" + strBackend +
89  "' not recognized, it must be either cpu, cuda or pva.");
90  }
91 
92  // Create the stream for the given backend.
93  CHECK_STATUS(vpiStreamCreate(backend, &stream));
94 
95  char imgContents[512][512];
96  for (int i = 0; i < 512; ++i)
97  {
98  for (int j = 0; j < 512; ++j)
99  {
100  imgContents[i][j] = i * 512 + j + i;
101  }
102  }
103 
104  // We now wrap the loaded image into a VPIImage object to be used by VPI.
105  {
106  // First fill VPIImageBufferPitchLinear with the, well, image data...
107  VPIImageData imgData;
108  memset(&imgData, 0, sizeof(imgData));
111  imgData.buffer.pitch.numPlanes = 1;
112  imgData.buffer.pitch.planes[0].width = 512;
113  imgData.buffer.pitch.planes[0].height = 512;
114  imgData.buffer.pitch.planes[0].pitchBytes = 512;
115  imgData.buffer.pitch.planes[0].pBase = reinterpret_cast<VPIByte *>(imgContents[0]);
116  imgData.buffer.pitch.planes[0].offsetBytes = 0;
117 
118  // Wrap it into a VPIImage. VPI won't make a copy of it, so the original
119  // image must be in scope at all times.
120  CHECK_STATUS(vpiImageCreateWrapper(&imgData, nullptr, 0, &image));
121  }
122 
123  // Now create the output image, single unsigned 8-bit channel.
124  CHECK_STATUS(vpiImageCreate(512, 512, VPI_IMAGE_FORMAT_U8, 0, &blurred));
125 
126  // Submit it for processing passing the image to be blurred and the result image
127  CHECK_STATUS(vpiSubmitBoxFilter(stream, backend, image, blurred, 3, 3, VPI_BORDER_ZERO));
128 
129  // Wait until the algorithm finishes processing
130  CHECK_STATUS(vpiStreamSync(stream));
131 
132  // Now let's retrieve the output image contents and output it to disk
133  {
134  // Lock output image to retrieve its data on cpu memory
135  VPIImageData outData;
136  CHECK_STATUS(vpiImageLockData(blurred, VPI_LOCK_READ, VPI_IMAGE_BUFFER_HOST_PITCH_LINEAR, &outData));
137 
139  VPIImageBufferPitchLinear &outPitch = outData.buffer.pitch;
140 
141  std::ofstream fd(("boxfiltered_" + strBackend + ".pgm").c_str());
142 
143  fd << "P5\n512 512 255\n";
144  for (int i = 0; i < 512; ++i)
145  {
146  fd.write(reinterpret_cast<char *>(outPitch.planes[0].pBase + outPitch.planes[0].offsetBytes +
147  outPitch.planes[0].pitchBytes * i),
148  512);
149  }
150  fd.close();
151 
152  // Done handling output image, don't forget to unlock it.
153  CHECK_STATUS(vpiImageUnlock(blurred));
154  }
155  }
156  catch (std::exception &e)
157  {
158  std::cerr << e.what() << std::endl;
159  retval = 1;
160  }
161 
162  // Clean up
163 
164  // Make sure stream is synchronized before destroying the objects
165  // that might still be in use.
166  if (stream != NULL)
167  {
168  vpiStreamSync(stream);
169  }
170 
171  vpiImageDestroy(image);
172  vpiImageDestroy(blurred);
173  vpiStreamDestroy(stream);
174 
175  return retval;
176 }
Declares functions that implement the Box Filter algorithm.
#define VPI_IMAGE_FORMAT_U8
Single plane with one 8-bit unsigned integer channel.
Definition: ImageFormat.h:100
Functions and structures for dealing with VPI images.
Declaration of VPI status codes handling functions.
Declares functions dealing with VPI streams.
unsigned char VPIByte
Definition of a byte type.
Definition: Types.h:288
VPIStatus vpiSubmitBoxFilter(VPIStream stream, uint64_t backend, VPIImage input, VPIImage output, int32_t kernelWidth, int32_t kernelHeight, VPIBorderExtension border)
Runs a 2D box filter over an image.
VPIImageBuffer buffer
Stores the image contents.
Definition: Image.h:245
VPIImagePlanePitchLinear planes[VPI_MAX_PLANE_COUNT]
Data of all image planes in pitch-linear layout.
Definition: Image.h:164
VPIImageBufferPitchLinear pitch
Image stored in pitch-linear layout.
Definition: Image.h:214
int32_t numPlanes
Number of planes.
Definition: Image.h:160
VPIImageFormat format
Image format.
Definition: Image.h:156
VPIImageBufferType bufferType
Type of image buffer.
Definition: Image.h:242
int64_t offsetBytes
Offset in bytes from pBase to the first column of the first plane row.
Definition: Image.h:137
int32_t height
Height of this plane in pixels.
Definition: Image.h:123
VPIByte * pBase
Pointer to the memory buffer which contains the plane data.
Definition: Image.h:145
int32_t width
Width of this plane in pixels.
Definition: Image.h:119
int32_t pitchBytes
Difference in bytes of beginning of one row and the beginning of the previous.
Definition: Image.h:134
void vpiImageDestroy(VPIImage img)
Destroy an image instance.
struct VPIImageImpl * VPIImage
A handle to an image.
Definition: Types.h:254
VPIStatus vpiImageCreateWrapper(const VPIImageData *data, const VPIImageWrapperParams *params, uint64_t flags, VPIImage *img)
Create an image object by wrapping an existing memory block.
VPIStatus vpiImageLockData(VPIImage img, VPILockMode mode, VPIImageBufferType bufType, VPIImageData *data)
Acquires the lock on an image object and returns the image contents.
VPIStatus vpiImageCreate(int32_t width, int32_t height, VPIImageFormat fmt, uint64_t flags, VPIImage *img)
Create an empty image instance with the specified flags.
VPIStatus vpiImageUnlock(VPIImage img)
Releases the lock on an image object.
@ VPI_IMAGE_BUFFER_HOST_PITCH_LINEAR
Host-accessible with planes in pitch-linear memory layout.
Definition: Image.h:176
Stores the image plane contents.
Definition: Image.h:154
Stores information about image characteristics and content.
Definition: Image.h:238
struct VPIStreamImpl * VPIStream
A handle to a stream.
Definition: Types.h:248
VPIStatus vpiStreamSync(VPIStream stream)
Blocks the calling thread until all submitted commands in this stream queue are done (queue is empty)...
VPIBackend
VPI Backend types.
Definition: Types.h:91
void vpiStreamDestroy(VPIStream stream)
Destroy a stream instance and deallocate all HW resources.
VPIStatus vpiStreamCreate(uint64_t flags, VPIStream *stream)
Create a stream instance.
@ VPI_BACKEND_CUDA
CUDA backend.
Definition: Types.h:93
@ VPI_BACKEND_PVA
PVA backend.
Definition: Types.h:94
@ VPI_BACKEND_CPU
CPU backend.
Definition: Types.h:92
@ VPI_BORDER_ZERO
All pixels outside the image are considered to be zero.
Definition: Types.h:276
@ VPI_LOCK_READ
Lock memory only for reading.
Definition: Types.h:621

Here is the cmake toolchain file that is being used.

27 set(CMAKE_SYSTEM_NAME Linux)
28 set(CMAKE_SYSTEM_PROCESSOR aarch64)
29 
30 set(target_arch aarch64-linux-gnu)
31 set(CMAKE_LIBRARY_ARCHITECTURE ${target_arch} CACHE STRING "" FORCE)
32 
33 # Configure cmake to look for libraries, include directories and
34 # packages inside the target root prefix.
35 set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
36 set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
37 set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
38 set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
39 set(CMAKE_FIND_ROOT_PATH "/usr/${target_arch}")
40 
41 # needed to avoid doing some more strict compiler checks that
42 # are failing when cross-compiling
43 set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
44 
45 # specify the toolchain programs
46 find_program(CMAKE_C_COMPILER ${target_arch}-gcc)
47 find_program(CMAKE_CXX_COMPILER ${target_arch}-g++)
48 if(NOT CMAKE_C_COMPILER OR NOT CMAKE_CXX_COMPILER)
49  message(FATAL_ERROR "Can't find suitable C/C++ cross compiler for ${target_arch}")
50 endif()
51 
52 set(CMAKE_AR ${target_arch}-ar CACHE FILEPATH "" FORCE)
53 set(CMAKE_RANLIB ${target_arch}-ranlib)
54 set(CMAKE_LINKER ${target_arch}-ld)
55 
56 # Not all shared libraries dependencies are instaled in host machine.
57 # Make sure linker doesn't complain.
58 set(CMAKE_EXE_LINKER_FLAGS_INIT -Wl,--allow-shlib-undefined)
59 
60 # instruct nvcc to use our cross-compiler
61 set(CMAKE_CUDA_FLAGS "-ccbin ${CMAKE_CXX_COMPILER} -Xcompiler -fPIC" CACHE STRING "" FORCE)

And finally the accompanying CMakeLists.txt. Note that it is just a plain simple CMakeLists.txt. Everything related to cross-compilation is defined in the toolchain file above.

27 cmake_minimum_required(VERSION 3.22)
28 
29 # To cross-compile for aarch64-l4t target from x86,
30 # pass -DCMAKE_TOOLCHAIN_FILE=Toolchain_aarch64_l4t.cmake
31 # to cmake when creating build tree.
32 
33 project(vpi_sample_08_cross_aarch64_l4t)
34 
35 find_package(vpi ${vpi_API_VERSION} REQUIRED)
36 
37 add_executable(${PROJECT_NAME} main.cpp)
38 target_link_libraries(${PROJECT_NAME} vpi)