NVIPC Integration#

Configuration#

NVIPC supports YAML file to store the configurations. Recommend L2 partner to use YAML file. Below is the example configuration used in Aerial L1. Both primary and secondary processes should have the same ring_len and mempool_size configurations.

transport:

  type: shm

  shm_config:

    primary: 1

    prefix: nvipc # Note: prefix string length should < 32

    cuda_device_id: 0

    ring_len: 8192

    mempool_size:

      cpu_msg:

        buf_size: 8192

        pool_len: 4096

      cpu_data:

        buf_size: 576000

        pool_len: 1024

      cpu_large:

        buf_size: 4096000

        pool_len: 64

      cuda_data:

        buf_size: 307200

        pool_len: 0

      gpu_data:

        buf_size: 576000

        pool_len: 0

  app_config:

    grpc_forward: 0

    debug_timing: 0

    pcap_enable: 0

    pcap_cpu_core: 17 # CPU core of background pcap log save thread

    pcap_cache_size_bits: 29 # 2^29 = 512MB, size of /dev/shm/${prefix}_pcap

    pcap_file_size_bits: 31 # 2^31 = 2GB, max size of /var/log/aerial/${prefix}_pcap. Requires pcap_file_size_bits > pcap_cache_size_bits.

    pcap_max_data_size: 8000 # Max DL/UL FAPI data size to capture reduce pcap size.

Initiation#

Here is the reference code for initialization. The NVIPC primary process is responsible to create and initiate SHM pools, ring queues. The NVIPC secondary process looks up the created pools and queues. In Aerial L1 is the primary process, L2 should be configured as the secondary process.

// Create configuration

nv_ipc_config_t config;

// Select module_type for primary or secondary process

nv_ipc_module_t module_type = NV_IPC_MODULE_PRIMARY/SECONDARY;

// Recommended initialization: load nvipc configurations from yaml file

load_nv_ipc_yaml_config(&config, yaml_path, module_type);

// Optional: set default configs and overwrite what need change

config.ipc_transport = *NV_IPC_TRANSPORT_SHM*;

**if**\ (set_nv_ipc_default_config(&config, module_type) < 0) {

NVLOGE(TAG, "%s: set configuration failed\\n", \__func\_\_);

**return** -1;

}

// Override the default configurations

config.transport_config.shm.cuda_device_id = test_cuda_device_id;

// Create IPC interface: nv_ipc_t ipc

nv_ipc_t\* ipc;

**if**\ ((ipc = create_nv_ipc_interface(&config)) == *NULL*) {

NVLOGE(TAG, "%s: create IPC interface failed\\n", \__func\_\_);

**return** -1;

}

After successfully created IPC interface, some shared memory files can be seen in /dev/shm/ folder. For example, if <prefix>=”nvipc”:

ls -al /dev/shm/nvipc\*

nvipc_shm

nvipc_cpu_msg

nvipc_cpu_data

nvipc_cpu_large

De-Initialization#

Below example code is for de-initialization.

**if**\ (ipc->ipc_destroy(ipc) < 0) {

NVLOGE(TAG, "%s close IPC interface failed\\n", \__func\_\_);

}

Sending#

The procedure for sending is as follows:

allocate buffers –> fill content –> send.

When fill content, for CUDA memory, the data_buf is a CUDA memory pointer which can’t be accessed directly in CPU memory space. The NVIPC APIs provide basic memcpy functions to copy between CPU memory and CUDA memory. For more CUDA operation, user can directly access the GPU memory buffer with CUDA APIs.

nv_ipc_msg_t send_msg,

send_msg.msg_id = fapi_msg_id; // Optional: FAPI message ID

send_msg.msg_len = fapi_msg_len; // Max length is the MSG buffer size,
configurable

send_msg.data_len = fapi_data_len; // Max length is the MSG buffer size,
configurable

send_msg.data_pool = *NV_IPC_MEMPOOL_CPU_DATA*; // Options: CPU_MSG,
CPU_DATA, CUDA_DATA

// Allocate buffer for TX message

**if**\ (ipc->tx_allocate(ipc, &send_msg, 0) != 0)

{

NVLOGE(TAG, "%s error: allocate buffer failed\\n", \__func\_\_);

**return** -1;

}

// Fill the MSG content

int8_t fapi_msg[SHM_MSG_BUF_SIZE];

memcpy(send_msg.msg_buf, fapi_msg, fapi_len);

// Fill the DATA content if data exist.

int8_t fapi_data[SHM_MSG_DATA_SIZE];

**if** (send_msg.data_pool == *NV_IPC_MEMPOOL_CPU_DATA*) { // CPU_DATA
case

memcpy(send_msg.data_buf, fapi_data, send_msg.data_len);

} **else** **if** (send_msg.data_pool == *NV_IPC_MEMPOOL_CUDA_DATA*) {
// CUDA_DATA case

**if**\ (ipc->cuda_memcpy_to_device(ipc, send_msg.data_buf, fapi_data,
send_msg.data_len) < 0){

NVLOGE(TAG, "%s CUDA copy failed\\n", \__func\_\_);

}

} **else** { // NO_DATA case

// NO data, do nothing

}

// Send the message

**if**\ (ipc->tx_send_msg(ipc, &send_msg) < 0){

NVLOGE(TAG, "%s error: send message failed\\n", \__func\_\_);

// May need future retry or release the send_msg buffers

// If fail, check configuration: ring queue length > memory pool length

}

Receive#

The procedure for sending is as follows:

receive –> handle message –> release buffers
nv_ipc_msg_t recv_ms\ *g*

**if**\ (ipc->rx_recv_msg(ipc, &recv_msg) < 0)

{

LOGV(TAG, "%s: no more message available\\n", \__func\_\_);

**return** -1;

}

// Example: Handle MSG part

int8_t fapi_msg[SHM_MSG_BUF_SIZE];

memcpy(fapi_msg, recv_msg.msg_buf, recv_msg.msg_len);

// Example: Handle DATA part

int8_t fapi_data[SHM_MSG_BUF_SIZE];

**if** (recv_msg.data_pool == *NV_IPC_MEMPOOL_CPU_DATA*) { // CPU_DATA
case

memcpy(fapi_data, recv_msg.data_buf, &recv_msg.data_len);

} **else** **if** (recv_msg.data_pool == *NV_IPC_MEMPOOL_CUDA_DATA*) {
// CUDA_DATA case

**if**\ (ipc->cuda_memcpy_to_host(ipc, fapi_data, recv_msg.data_buf,
recv_msg.data_len) < 0){

LOGE(TAG, "%s CUDA copy failed\\n", \__func\_\_);

}

} **else** { // NO_DATA case

// NO data, do nothing

}

**if**\ (ipc->rx_release(ipc, &recv_msg) < 0){

LOGW(TAG, "%s: release error\\n", \__func\_\_);

}

Notification#

Two tyles of notification APIs are provided: semaphore style and event_fd style. Each NVIPC process can choose any tyles no matter what the peer process chooses, but keep using one tyles in one process.

In low level of the SHM IPC library event_fd is implemented. The semaphore API interface is a wapper of the event_fd implementation.

The APIs are ready to use after IPC interface successfully created by create_nv_ipc_interface().

For semaphore tyles, it’s easy to use:

  • Receiver:

    ipc->tx_tti_sem_wait(ipc);
    
  • Sender:

    ipc->tx_tti_sem_post(ipc);
    

For event_fd style, user should get the fd and use epoll functions to listen to I/O events.

  • Receiver:

    **struct** epoll_event ev, events[MAX_EVENTS];
    
    **int** epoll_fd = epoll_create1(0);
    
    **if**\ (epoll_fd == -1)
    
    {
    
    NVLOGE(TAG, "%s epoll_create failed\\n", \__func\_\_);
    
    }
    
    **int** ipc_rx_event_fd = ipc->get_fd(ipc); // IPC notification API:
    get_fd()
    
    ev.\ *events* = *EPOLLIN*;
    
    ev.\ *data*.\ *fd* = ipc_rx_event_fd;
    
    **if**\ (epoll_ctl(epoll_fd, *EPOLL_CTL_ADD*, ev.\ *data*.\ *fd*, &ev)
    == -1)
    
    {
    
    NVLOGE(TAG, "%s epoll_ctl failed\\n", \__func\_\_);
    
    }
    
    **while**\ (1)
    
    {
    
    **int** nfds = epoll_wait(epoll_fd, events, MAX_EVENTS, -1);
    
    **if**\ (nfds == -1)
    
    {
    
    NVLOGE(TAG, "epoll_wait notified: *nfds*\ =%d\\n", nfds);
    
    }
    
    **for**\ (**int** n = 0; n < nfds; ++n)
    
    {
    
    **if**\ (events[n].\ *data*.\ *fd* == ipc_rx_event_fd)
    
    {
    
    ipc->get_value(ipc); // IPC notification API: get_value()
    
    // Receive incoming message here
    
    }
    
    }
    
    }
    
    close(epoll_fd);
    
  • Sender:

    ipc->notify(ipc, 1); // IPC notification API: notify()