VPI - Vision Programming Interface

1.2 Release

Cross-Compilation Targeting aarch64

Overview

This sample shows how to build your applications in a x86_64 host, targeting Jetson devices that use aarch64 architecture. It uses several features first made available in cmake-3.5. The sample application itself creates an input image, applies a box filter to it and save the result to disk.

Instructions

JetPack's installer already set up the cross-compilation toolchain using gcc, but if some reason it isn't available, install it manually with:

apt-get install gcc-aarch64-linux-gnu g++-aarch64-linux-gnu

Now cmake can be instructed to create a cross-compiling build tree by calling it as follows in the samples directory:

cmake . -DCMAKE_TOOLCHAIN_FILE=Toolchain_aarch64_l4t.cmake

The file Toolchain_aarch64_l4t.cmake is included in the samples directory and defines the cross-compiler that will be used, among other configurations. In particular, it also allows cross-compilation of CUDA applications, provided that the CUDA aarch64 cross-compilation libraries are correctly installed on host.

Note
This sample can also be compiled targeting the host. Just omit the CMAKE_TOOLCHAIN_FILE parameter during cmake invocation.

The usage is:

./vpi_sample_08_cross_aarch64_l4t <backend>

where

  • backend: either cpu, cuda or pva; it defines the backend that will perform the processing.

Source Code

For convenience, here's the code that is also installed in the samples directory.

Language:
29 #include <vpi/Image.h>
30 #include <vpi/Status.h>
31 #include <vpi/Stream.h>
32 #include <vpi/algo/BoxFilter.h>
33 
34 #include <cstring> // for memset
35 #include <fstream>
36 #include <iostream>
37 #include <sstream>
38 
39 #define CHECK_STATUS(STMT) \
40  do \
41  { \
42  VPIStatus status = (STMT); \
43  if (status != VPI_SUCCESS) \
44  { \
45  char buffer[VPI_MAX_STATUS_MESSAGE_LENGTH]; \
46  vpiGetLastStatusMessage(buffer, sizeof(buffer)); \
47  std::ostringstream ss; \
48  ss << vpiStatusGetName(status) << ": " << buffer; \
49  throw std::runtime_error(ss.str()); \
50  } \
51  } while (0);
52 
53 int main(int argc, char *argv[])
54 {
55  // VPI objects that will be used
56  VPIImage image = NULL;
57  VPIImage blurred = NULL;
58  VPIStream stream = NULL;
59 
60  int retval = 0;
61 
62  try
63  {
64  if (argc != 2)
65  {
66  throw std::runtime_error(std::string("Usage: ") + argv[0] + " <cpu|pva|cuda>");
67  }
68 
69  std::string strBackend = argv[1];
70 
71  // Now parse the backend
72  VPIBackend backend;
73 
74  if (strBackend == "cpu")
75  {
76  backend = VPI_BACKEND_CPU;
77  }
78  else if (strBackend == "cuda")
79  {
80  backend = VPI_BACKEND_CUDA;
81  }
82  else if (strBackend == "pva")
83  {
84  backend = VPI_BACKEND_PVA;
85  }
86  else
87  {
88  throw std::runtime_error("Backend '" + strBackend +
89  "' not recognized, it must be either cpu, cuda or pva.");
90  }
91 
92  // Create the stream for the given backend.
93  CHECK_STATUS(vpiStreamCreate(backend, &stream));
94 
95  char imgContents[512][512];
96  for (int i = 0; i < 512; ++i)
97  {
98  for (int j = 0; j < 512; ++j)
99  {
100  imgContents[i][j] = i * 512 + j + i;
101  }
102  }
103 
104  // We now wrap the loaded image into a VPIImage object to be used by VPI.
105  {
106  // First fill VPIImageData with the, well, image data...
107  VPIImageData imgData;
108  memset(&imgData, 0, sizeof(imgData));
109  imgData.format = VPI_IMAGE_FORMAT_U8;
110  imgData.numPlanes = 1;
111  imgData.planes[0].width = 512;
112  imgData.planes[0].height = 512;
113  imgData.planes[0].pitchBytes = 512;
114  imgData.planes[0].data = imgContents[0];
115 
116  // Wrap it into a VPIImage. VPI won't make a copy of it, so the original
117  // image must be in scope at all times.
118  CHECK_STATUS(vpiImageCreateHostMemWrapper(&imgData, 0, &image));
119  }
120 
121  // Now create the output image, single unsigned 8-bit channel.
122  CHECK_STATUS(vpiImageCreate(512, 512, VPI_IMAGE_FORMAT_U8, 0, &blurred));
123 
124  // Submit it for processing passing the image to be blurred and the result image
125  CHECK_STATUS(vpiSubmitBoxFilter(stream, backend, image, blurred, 3, 3, VPI_BORDER_ZERO));
126 
127  // Wait until the algorithm finishes processing
128  CHECK_STATUS(vpiStreamSync(stream));
129 
130  // Now let's retrieve the output image contents and output it to disk
131  {
132  // Lock output image to retrieve its data on cpu memory
133  VPIImageData outData;
134  CHECK_STATUS(vpiImageLock(blurred, VPI_LOCK_READ, &outData));
135 
136  std::ofstream fd(("boxfiltered_" + strBackend + ".pgm").c_str());
137 
138  fd << "P5\n512 512 255\n";
139  for (int i = 0; i < 512; ++i)
140  {
141  fd.write(reinterpret_cast<const char *>(outData.planes[0].data) + outData.planes[0].pitchBytes * i,
142  512);
143  }
144  fd.close();
145 
146  // Done handling output image, don't forget to unlock it.
147  CHECK_STATUS(vpiImageUnlock(blurred));
148  }
149  }
150  catch (std::exception &e)
151  {
152  std::cerr << e.what() << std::endl;
153  retval = 1;
154  }
155 
156  // Clean up
157 
158  // Make sure stream is synchronized before destroying the objects
159  // that might still be in use.
160  if (stream != NULL)
161  {
162  vpiStreamSync(stream);
163  }
164 
165  vpiImageDestroy(image);
166  vpiImageDestroy(blurred);
167  vpiStreamDestroy(stream);
168 
169  return retval;
170 }
171 
172 // vim: ts=8:sw=4:sts=4:et:ai
Declares functions that implement the Box Filter algorithm.
Functions and structures for dealing with VPI images.
Declaration of VPI status codes handling functions.
Declares functions dealing with VPI streams.
VPIStatus vpiSubmitBoxFilter(VPIStream stream, uint32_t backend, VPIImage input, VPIImage output, int32_t kernelSizeX, int32_t kernelSizeY, VPIBorderExtension border)
Runs a 2D box filter over an image.
VPIStatus vpiImageCreateHostMemWrapper(const VPIImageData *hostData, uint32_t flags, VPIImage *img)
Create an image object by wrapping around an existing host memory block.
@ VPI_IMAGE_FORMAT_U8
Single plane with one 8-bit unsigned integer channel.
Definition: ImageFormat.h:104
int32_t height
Height of this plane in pixels.
Definition: Image.h:138
int32_t numPlanes
Number of planes.
Definition: Image.h:161
int32_t width
Width of this plane in pixels.
Definition: Image.h:137
void * data
Pointer to the first row of this plane.
Definition: Image.h:147
int32_t pitchBytes
Difference in bytes of beginning of one row and the beginning of the previous.
Definition: Image.h:139
VPIImagePlane planes[VPI_MAX_PLANE_COUNT]
Data of all image planes.
Definition: Image.h:166
VPIImageFormat format
Image format.
Definition: Image.h:160
VPIStatus vpiImageLock(VPIImage img, VPILockMode mode, VPIImageData *hostData)
Acquires the lock on an image object and returns a pointer to the image planes.
void vpiImageDestroy(VPIImage img)
Destroy an image instance.
struct VPIImageImpl * VPIImage
A handle to an image.
Definition: Types.h:215
VPIStatus vpiImageCreate(int32_t width, int32_t height, VPIImageFormat fmt, uint32_t flags, VPIImage *img)
Create an empty image instance with the specified flags.
VPIStatus vpiImageUnlock(VPIImage img)
Releases the lock on an image object.
Stores information about image characteristics and content.
Definition: Image.h:159
struct VPIStreamImpl * VPIStream
A handle to a stream.
Definition: Types.h:209
VPIStatus vpiStreamSync(VPIStream stream)
Blocks the calling thread until all submitted commands in this stream queue are done (queue is empty)...
VPIBackend
VPI Backend types.
Definition: Types.h:91
void vpiStreamDestroy(VPIStream stream)
Destroy a stream instance and deallocate all HW resources.
VPIStatus vpiStreamCreate(uint32_t flags, VPIStream *stream)
Create a stream instance.
@ VPI_BACKEND_CUDA
CUDA backend.
Definition: Types.h:93
@ VPI_BACKEND_PVA
PVA backend.
Definition: Types.h:94
@ VPI_BACKEND_CPU
CPU backend.
Definition: Types.h:92
@ VPI_BORDER_ZERO
All pixels outside the image are considered to be zero.
Definition: Types.h:237
@ VPI_LOCK_READ
Lock memory only for reading.
Definition: Types.h:383

Here is the cmake toolchain file that is being used.

27 set(CMAKE_SYSTEM_NAME Linux)
28 set(CMAKE_SYSTEM_PROCESSOR aarch64)
29 
30 set(target_arch aarch64-linux-gnu)
31 set(CMAKE_LIBRARY_ARCHITECTURE ${target_arch} CACHE STRING "" FORCE)
32 
33 # Configure cmake to look for libraries, include directories and
34 # packages inside the target root prefix.
35 set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
36 set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
37 set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
38 set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
39 set(CMAKE_FIND_ROOT_PATH "/usr/${target_arch}")
40 
41 # needed to avoid doing some more strict compiler checks that
42 # are failing when cross-compiling
43 set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
44 
45 # specify the toolchain programs
46 find_program(CMAKE_C_COMPILER ${target_arch}-gcc)
47 find_program(CMAKE_CXX_COMPILER ${target_arch}-g++)
48 if(NOT CMAKE_C_COMPILER OR NOT CMAKE_CXX_COMPILER)
49  message(FATAL_ERROR "Can't find suitable C/C++ cross compiler for ${target_arch}")
50 endif()
51 
52 set(CMAKE_AR ${target_arch}-ar CACHE FILEPATH "" FORCE)
53 set(CMAKE_RANLIB ${target_arch}-ranlib)
54 set(CMAKE_LINKER ${target_arch}-ld)
55 
56 # Not all shared libraries dependencies are instaled in host machine.
57 # Make sure linker doesn't complain.
58 set(CMAKE_EXE_LINKER_FLAGS_INIT -Wl,--allow-shlib-undefined)
59 
60 # instruct nvcc to use our cross-compiler
61 set(CMAKE_CUDA_FLAGS "-ccbin ${CMAKE_CXX_COMPILER} -Xcompiler -fPIC" CACHE STRING "" FORCE)
62 
63 # vim: ts=8:sw=4:sts=4:et:ai

And finally the accompanying CMakeLists.txt. Note that it is just a plain simple CMakeLists.txt. Everything related to cross-compilation is defined in the toolchain file above.

27 cmake_minimum_required(VERSION 3.5)
28 
29 # To cross-compile for aarch64-l4t target from x86,
30 # pass -DCMAKE_TOOLCHAIN_FILE=Toolchain_aarch64_l4t.cmake
31 # to cmake when creating build tree.
32 
33 project(vpi_sample_08_cross_aarch64_l4t)
34 
35 find_package(vpi ${vpi_API_VERSION} REQUIRED)
36 
37 add_executable(${PROJECT_NAME} main.cpp)
38 target_link_libraries(${PROJECT_NAME} vpi)
39 
40 # vim: ts=8:sw=4:sts=4:et:ai