Dynamically Connected (DC) QPs
A dynamically connected transport service is an extension to transport services that enables a higher degree of scalability while maintaining high performance for sparse traffic. Utilization of DC transport reduces the total number of QPs required system-wide, by having QPs of reliable type dynamically connect and disconnect from any remote node.
DC QP is only supported in mlx5 driver.
Experimental |
RDMA-Core |
||
Verbs |
|||
ibv_exp_create_dct |
mlx5dv_create_qp
|
||
dc_key |
dct_access_key |
||
access flags |
access flags |
||
flow_label |
ah_attr.grh.flow_label |
||
Inline_size |
Not supported by RDMA-Core |
||
IBV_EXP_DCT_OOO_RW_DATA_PLACEMENT |
Supported through opensm |
||
ibv_exp_destroy_dct |
ibv_destroy_qp |
||
ibv_exp_query_dct |
ibv_query_qp |
||
dc_key |
Not supported by RDMA-Core |
||
port |
Port |
||
access_flags |
access_flags |
||
min_rnr_timer |
min_rnr_timer |
||
tclass |
tclass |
||
flow_label |
flow_label |
||
mtu |
mtu |
||
pkey_index |
pkey_index |
||
gid_index |
gid_index |
||
hop_limit |
hop_limit |
||
key_violations |
Not supported by RDMA-Core |
||
state |
Not supported by RDMA-Core |
||
ibv_exp_post_send |
dct_access_key, dct_number |
mlx5dv_wr_post |
|
ibv_exp_poll_cq |
IBV_EXP_WC_DCT |
mlx5dv_wr_set_dc_addr |
|
ibv_exp_modify_qp |
dct_key, IBV_EXP_QP_DC_KEY |
Not supported by RDMA-Core |
|
Capabilities and Device Attributes |
|||
IBV_EXP_DEVICE_DC_TRANSPORT |
No DC capabilities flags needed in RDMA-Core |
||
IBV_EXP_DEVICE_DC_RD_REQ, IBV_EXP_DEVICE_DC_RD_RES |
|||
IBV_EXP_DEVICE_DC_INFO |
|||
IBV_EXP_DEVICE_ATTR_MAX_DCT |
|||
IBV_EXP_TM_CAP_DC |
/**Create DC QP**/
struct mlx5dv_qp_init_attr dv_init_attr;
struct ibv_qp_init_attr_ex init_attr;
memset(&dv_init_attr, 0
, sizeof(dv_init_attr));
memset(&init_attr, 0
, sizeof(init_attr));
init_attr.qp_type = IBV_QPT_DRIVER;
init_attr.send_cq = send_cq;
init_attr.recv_cq = recv_cq;
init_attr.pd = pd;
if
(initiator) { /** DCI **/
init_attr.comp_mask |= IBV_QP_INIT_ATTR_SEND_OPS_FLAGS | IBV_QP_INIT_ATTR_PD;
init_attr.send_ops_flags |= IBV_QP_EX_WITH_SEND;
dv_init_attr.comp_mask |=
MLX5DV_QP_INIT_ATTR_MASK_DC |
MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
dv_init_attr.create_flags |=
MLX5DV_QP_CREATE_DISABLE_SCATTER_TO_CQE;
dv_init_attr.dc_init_attr.dc_type = MLX5DV_DCTYPE_DCI;
} else
{ /** DCT **/
init_attr.comp_mask |= IBV_QP_INIT_ATTR_PD;
init_attr.srq = srq;
dv_init_attr.comp_mask = MLX5DV_QP_INIT_ATTR_MASK_DC;
dv_init_attr.dc_init_attr.dc_type = MLX5DV_DCTYPE_DCT;
dv_init_attr.dc_init_attr.dct_access_key = DC_KEY;
}
qp = mlx5dv_create_qp(context, &init_attr, &dv_init_attr);
if
(initiator) {
ex_qp = ibv_qp_to_qp_ex(qp);
dv_qp = mlx5dv_qp_ex_from_ibv_qp_ex(ex_qp);
}
/**DCI post send**/
struct ibv_ah_attr ah_attr;
ah_attr.dlid = rem_dest->lid;
ah_attr.port_num = ib_port;
ah = ibv_create_ah(pd, &ah_attr);
if
(ah) {
return
-1
;
}
ibv_wr_start(ex_qp);
ex_qp->wr_id = SEND_WRID;
ex_qp->wr_flags = IBV_SEND_SIGNALED;
ibv_wr_send(ex_qp);
mlx5dv_wr_set_dc_addr(dv_qp, ah, rem_dest->dctn, DC_KEY);
ibv_wr_set_sge(ex_qp, mr->lkey, (uint64_t)mr->addr, size);
ibv_wr_complete(ex_qp);
return
0
;