A dynamically connected transport service is an extension to transport services that enables a higher degree of scalability while maintaining high performance for sparse traffic. Utilization of DC transport reduces the total number of QPs required system-wide, by having QPs of reliable type dynamically connect and disconnect from any remote node.
DC QP is only supported in mlx5 driver.
DC QPs Experimental vs. RDMA-Core Verbs and Capabilities
Experimental | RDMA-Core | ||
---|---|---|---|
Verbs | |||
ibv_exp_create_dct | mlx5dv_create_qp
| ||
dc_key | dct_access_key | ||
access flags | access flags | ||
flow_label | ah_attr.grh.flow_label | ||
Inline_size | Not supported by RDMA-Core | ||
IBV_EXP_DCT_OOO_RW_DATA_PLACEMENT | Supported through opensm | ||
ibv_exp_destroy_dct | ibv_destroy_qp | ||
ibv_exp_query_dct | ibv_query_qp | ||
dc_key | Not supported by RDMA-Core | ||
port | Port | ||
access_flags | access_flags | ||
min_rnr_timer | min_rnr_timer | ||
tclass | tclass | ||
flow_label | flow_label | ||
mtu | mtu | ||
pkey_index | pkey_index | ||
gid_index | gid_index | ||
hop_limit | hop_limit | ||
key_violations | Not supported by RDMA-Core | ||
state | Not supported by RDMA-Core | ||
ibv_exp_post_send | dct_access_key, dct_number | mlx5dv_wr_post | |
ibv_exp_poll_cq | IBV_EXP_WC_DCT | mlx5dv_wr_set_dc_addr | |
ibv_exp_modify_qp | dct_key, IBV_EXP_QP_DC_KEY | Not supported by RDMA-Core | |
Capabilities and Device Attributes | |||
IBV_EXP_DEVICE_DC_TRANSPORT | No DC capabilities flags needed in RDMA-Core | ||
IBV_EXP_DEVICE_DC_RD_REQ, IBV_EXP_DEVICE_DC_RD_RES | |||
IBV_EXP_DEVICE_DC_INFO | |||
IBV_EXP_DEVICE_ATTR_MAX_DCT | |||
IBV_EXP_TM_CAP_DC |
Relevant Man Pages
- mlx5dv_create_qp: https://github.com/linux-rdma/rdma-core/blob/master/providers/mlx5/man/mlx5dv_create_qp.3.md
- mlx5dv_wr_post: https://github.com/linux-rdma/rdma-core/blob/master/providers/mlx5/man/mlx5dv_wr_post.3.md
Example
/**Create DC QP**/ struct mlx5dv_qp_init_attr dv_init_attr; struct ibv_qp_init_attr_ex init_attr; memset(&dv_init_attr, 0, sizeof(dv_init_attr)); memset(&init_attr, 0, sizeof(init_attr)); init_attr.qp_type = IBV_QPT_DRIVER; init_attr.send_cq = send_cq; init_attr.recv_cq = recv_cq; init_attr.pd = pd; if (initiator) { /** DCI **/ init_attr.comp_mask |= IBV_QP_INIT_ATTR_SEND_OPS_FLAGS | IBV_QP_INIT_ATTR_PD; init_attr.send_ops_flags |= IBV_QP_EX_WITH_SEND; dv_init_attr.comp_mask |= MLX5DV_QP_INIT_ATTR_MASK_DC | MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS; dv_init_attr.create_flags |= MLX5DV_QP_CREATE_DISABLE_SCATTER_TO_CQE; dv_init_attr.dc_init_attr.dc_type = MLX5DV_DCTYPE_DCI; } else { /** DCT **/ init_attr.comp_mask |= IBV_QP_INIT_ATTR_PD; init_attr.srq = srq; dv_init_attr.comp_mask = MLX5DV_QP_INIT_ATTR_MASK_DC; dv_init_attr.dc_init_attr.dc_type = MLX5DV_DCTYPE_DCT; dv_init_attr.dc_init_attr.dct_access_key = DC_KEY; } qp = mlx5dv_create_qp(context, &init_attr, &dv_init_attr); if (initiator) { ex_qp = ibv_qp_to_qp_ex(qp); dv_qp = mlx5dv_qp_ex_from_ibv_qp_ex(ex_qp); }
/**DCI post send**/ struct ibv_ah_attr ah_attr; ah_attr.dlid = rem_dest->lid; ah_attr.port_num = ib_port; ah = ibv_create_ah(pd, &ah_attr); if (ah) { return -1; } ibv_wr_start(ex_qp); ex_qp->wr_id = SEND_WRID; ex_qp->wr_flags = IBV_SEND_SIGNALED; ibv_wr_send(ex_qp); mlx5dv_wr_set_dc_addr(dv_qp, ah, rem_dest->dctn, DC_KEY); ibv_wr_set_sge(ex_qp, mr->lkey, (uint64_t)mr->addr, size); ibv_wr_complete(ex_qp); return 0;