NVIPC Integration#

Configuration#

NVIPC provides load_nv_ipc_yaml_config function to load configuration parameters from a YAML file. Since NVIPC shared memory is created by primary app, the primary app must be provided with full configuration parameters while the secondary app only needs to provide minimal configuration parameters.

Reference NVIPC yaml configuration files and integration code example are provided in <aerial_sdk>/cuPHY-CP/gt_common_libs/nvIPC/tests/example Below are the yaml configuration files for primary and secondary applications.

Primary Application Configuration#

# Transport settings for L1 / primary NVIPC
transport:
  type: shm
  shm_config:
    prefix: nvipc
    cuda_device_id: 0
    ring_len: 8192
    mempool_size:
      cpu_msg:
        buf_size: 8192
        pool_len: 4096
      cpu_data:
        buf_size: 576000
        pool_len: 1024
      cpu_large:
        buf_size: 4096000
        pool_len: 64
      cuda_data:
        buf_size: 307200
        pool_len: 1024
  app_config:
    grpc_forward: 0
    debug_timing: 0
    pcap_enable: 0
    pcap_shm_caching_cpu_core: 17 # CPU core of pcap shared memory caching thread
    pcap_file_saving_cpu_core: 17 # CPU core of pcap file saving thread
    pcap_cache_size_bits: 29 # 2^29 = 512MB, size of /dev/shm/${prefix}_pcap
    pcap_file_size_bits: 31 # 2^31 = 2GB, max size of /var/log/aerial/${prefix}_pcap. Requires pcap_file_size_bits > pcap_cache_size_bits.
    pcap_max_data_size: 8000 # Max DL/UL FAPI data size to capture reduce pcap size.
    msg_filter: [] # Example: [0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85]
    cell_filter: [] # Example: [0, 1, 2, 3]

Secondary Application Configuration#

# Transport settings for L2 / secondary NVIPC
transport:
  type: shm
  shm_config:
    prefix: nvipc

Optional NVIPC Logger Configuration#

To enable logger for nvipc in case where nvlog is not initialized (with nvipc_config.yaml), the following log configuration can be added to the nvipc yaml configuration file to enable logger for nvipc.

nvipc_log:
  # Log level: 0 - NONE, 1 - FATAL, 2 - ERROR, 3 - CONSOLE, 4 - WARNING, 5 - INFO, 6 - DEBUG, 7 - VERBOSE
  log_level: 3 # Can set to 5 when debug, but set to <=3 in production.

  # Below are FMT log configurations. Only available when NVIPC_FMTLOG_ENABLE=ON
  # Log file path and file name prefix
  fmt_log_path: "/var/log/aerial"
  fmt_log_name: "nvipc"
  # Maximum FMT log file size. Unit: MB.
  fmt_log_max_size: 128

Initiation#

Here is the reference code for initialization. The NVIPC primary process is responsible to create and initiate SHM pools, ring queues. The NVIPC secondary process looks up the created pools and queues. In Aerial L1 is the primary process, L2 should be configured as the secondary process.

// Create configuration
nv_ipc_config_t config;

// Select module_type for primary or secondary process
nv_ipc_module_t module_type = NV_IPC_MODULE_PRIMARY/SECONDARY;

// Recommended initialization: load nvipc configurations from yaml file
load_nv_ipc_yaml_config(&config, yaml_path, module_type);

// Create IPC interface: nv_ipc_t ipc
nv_ipc_t* ipc;
if ((ipc = create_nv_ipc_interface(&config)) == NULL) {
    NVLOGE(TAG, "%s: create IPC interface failed\n", __func__);
    return -1;
}

After successfully created IPC interface, some shared memory files can be seen in /dev/shm/ folder. For example, if <prefix>=”nvipc”:

ls -al /dev/shm/nvipc*
nvipc_shm
nvipc_cpu_msg
nvipc_cpu_data
nvipc_cpu_large

De-Initialization#

Below example code is for de-initialization.

if (ipc->ipc_destroy(ipc) < 0) {
    NVLOGE(TAG, "%s close IPC interface failed\n", __func__);
}

Sending#

The procedure for sending is as follows:

allocate buffers -> fill content -> send.

When filling content, for CUDA memory, the data_buf is a CUDA memory pointer which can’t be accessed directly in CPU memory space. The NVIPC APIs provide basic memcpy functions to copy between CPU memory and CUDA memory. For more CUDA operation, users can directly access the GPU memory buffer with CUDA APIs.

// Allocate NVIPC buffer for TX message
// Define data_pool type before call tx_allocate. Options: CPU_MSG, CPU_DATA, CUDA_DATA
nv_ipc_msg_t send_msg;
send_msg.data_pool = NV_IPC_MEMPOOL_CPU_DATA;
if (ipc->tx_allocate(ipc, &send_msg, 0) != 0) {
    NVLOGE(TAG, "%s error: allocate buffer failed\n", __func__);
    return -1;
}

// Fill the MSG content
int8_t fapi_msg[SHM_MSG_BUF_SIZE];
memcpy(send_msg.msg_buf, fapi_msg, fapi_len);

// Fill the nv_ipc_msg_t struct
send_msg.msg_id = fapi_msg_id; // Optional: FAPI message ID
send_msg.msg_len = fapi_msg_len; // Max length is the MSG buffer size, configurable
send_msg.data_len = fapi_data_len; // Max length is the MSG buffer size, configurable

// Fill the DATA content if data exist.
int8_t fapi_data[SHM_MSG_DATA_SIZE];
if (send_msg.data_pool == NV_IPC_MEMPOOL_CPU_DATA) { // CPU_DATA case
    memcpy(send_msg.data_buf, fapi_data, send_msg.data_len);
} else if (send_msg.data_pool == NV_IPC_MEMPOOL_CUDA_DATA) { // CUDA_DATA case
    if (ipc->cuda_memcpy_to_device(ipc, send_msg.data_buf, fapi_data, send_msg.data_len) < 0){
        NVLOGE(TAG, "%s CUDA copy failed\n", __func__);
    }
} else { // NO_DATA case
    // NO data, do nothing
}

// Send the message
if (ipc->tx_send_msg(ipc, &send_msg) < 0){
    NVLOGE(TAG, "%s error: send message failed\n", __func__);
    // May need future retry or release the send_msg buffers
    // If it fails, check configuration: ring queue length > memory pool length
}

Receive#

The procedure for sending is as follows: .. code-block:

receive -> handle message -> release buffers

nv_ipc_msg_t recv_msg;
if (ipc->rx_recv_msg(ipc, &recv_msg) < 0) {
    LOGV(TAG, "%s: no more message available\n", __func__);
    return -1;
}

// Example: Handle MSG part
int8_t fapi_msg[SHM_MSG_BUF_SIZE];
memcpy(fapi_msg, recv_msg.msg_buf, recv_msg.msg_len);

// Example: Handle DATA part
int8_t fapi_data[SHM_MSG_BUF_SIZE];
if (recv_msg.data_pool == NV_IPC_MEMPOOL_CPU_DATA) { // CPU_DATA case
    memcpy(fapi_data, recv_msg.data_buf, &recv_msg.data_len);
} else if (recv_msg.data_pool == NV_IPC_MEMPOOL_CUDA_DATA) { // CUDA_DATA case
    if (ipc->cuda_memcpy_to_host(ipc, fapi_data, recv_msg.data_buf, recv_msg.data_len) < 0){
        LOGE(TAG, "%s CUDA copy failed\n", __func__);
    }
} else { // NO_DATA case
    // NO data, do nothing
}

if (ipc->rx_release(ipc, &recv_msg) < 0){
    LOGW(TAG, "%s: release error\n", __func__);
}

Notification#

Two styles of notification APIs are provided: semaphore style and event_fd style. Each NVIPC process can choose any type no matter what the peer process chooses, but keep using one type in one process.

In low level of the SHM IPC library event_fd is implemented. The semaphore API interface is a wrapper of the event_fd implementation.

The APIs are ready to use after IPC interface successfully created by create_nv_ipc_interface().

For semaphore tyles, it’s easy to use:

Receiver:

ipc->tx_tti_sem_wait(ipc);

Sender:

ipc->tx_tti_sem_post(ipc);

For event_fd style, user should get the fd and use epoll functions to listen to I/O events.

Receiver:

struct epoll_event ev, events[MAX_EVENTS];
int epoll_fd = epoll_create1(0);
if (epoll_fd == -1) {
   NVLOGE(TAG, "%s epoll_create failed\n", __func__);
}
int ipc_rx_event_fd = ipc->get_fd(ipc); // IPC notification API: get_fd()
ev.events = EPOLLIN;
ev.data.fd = ipc_rx_event_fd;
if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, ev.data.fd, &ev) == -1) {
   NVLOGE(TAG, "%s epoll_ctl failed\n", __func__);
}
while (1) {
   int nfds = epoll_wait(epoll_fd, events, MAX_EVENTS, -1);
   if (nfds == -1) {
      NVLOGE(TAG, "epoll_wait notified: nfds =%d\n", nfds);
   }
   for (int n = 0; n < nfds; ++n) {
      if (events[n].data.fd == ipc_rx_event_fd) {
            ipc->get_value(ipc); // IPC notification API: get_value()
            // Receive incoming message here
      }
   }
}
close(epoll_fd);

Sender:

ipc->notify(ipc, 1); // IPC notification API: notify()