From 860c9af0501ada5462a636d52acab2eb38122896 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Thu, 17 Apr 2025 16:54:41 +0530 Subject: [PATCH 1/4] bnxt_re/lib: Support debug logging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a new logging function and new enviornment variables to control it. This will be used in subsequent patches for debug logging. Below environment variables are supported. BNXT_DEBUG_FILE: User can provide absolute file location that will be used for logging the prints from library code. Default path is – “/var/log/messages” BNXT_DEBUG_MASK: Set this value to log direct verbs related information. BNXT_DUMP_DV = 1 Signed-off-by: Kashyap Desai Signed-off-by: Sriharsha Basavapatna Reviewed-by: Damodharam Ammepalli Reviewed-by: Kalesh AP --- providers/bnxt_re/main.c | 40 ++++++++++++++++++++++++++++++++++++++ providers/bnxt_re/main.h | 42 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/providers/bnxt_re/main.c b/providers/bnxt_re/main.c index a99f3afc1..c1833cd6a 100644 --- a/providers/bnxt_re/main.c +++ b/providers/bnxt_re/main.c @@ -91,6 +91,8 @@ static const struct verbs_match_ent cna_table[] = { {} }; +uint32_t bnxt_debug_mask; + static const struct verbs_context_ops bnxt_re_cntx_ops = { .query_device_ex = bnxt_re_query_device, .query_port = bnxt_re_query_port, @@ -181,6 +183,39 @@ static int bnxt_re_alloc_map_dbr_bar_page(struct ibv_context *ibvctx) return 0; } +static void bnxt_open_debug_file(FILE **dbg_fp) +{ + FILE *default_dbg_fp = NULL; + const char *env; + + env = getenv("BNXT_DEBUG_FILE"); + + if (!env) + env = "/var/log/messages"; + + *dbg_fp = fopen(env, "aw+"); + if (!*dbg_fp) { + *dbg_fp = default_dbg_fp; + bnxt_err(NULL, "Failed opening debug file %s\n", env); + return; + } +} + +static void bnxt_close_debug_file(FILE *dbg_fp) +{ + if (dbg_fp && dbg_fp != stderr) + fclose(dbg_fp); +} + +static void bnxt_set_debug_mask(void) +{ + char *env; + + env = getenv("BNXT_DEBUG_MASK"); + if (env) + bnxt_debug_mask = strtol(env, NULL, 0); +} + /* Context Init functions */ static struct verbs_context *bnxt_re_alloc_context(struct ibv_device *vdev, int cmd_fd, @@ -261,6 +296,9 @@ static struct verbs_context *bnxt_re_alloc_context(struct ibv_device *vdev, if (ret) goto failed; + bnxt_open_debug_file(&cntx->dbg_fp); + bnxt_set_debug_mask(); + return &cntx->ibvctx; failed: @@ -274,6 +312,8 @@ static void bnxt_re_free_context(struct ibv_context *ibvctx) struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx); struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibvctx->device); + bnxt_close_debug_file(cntx->dbg_fp); + /* Unmap if anything device specific was mapped in init_context. */ pthread_mutex_destroy(&cntx->shlock); if (cntx->shpg) diff --git a/providers/bnxt_re/main.h b/providers/bnxt_re/main.h index 067b0c18d..7e123df94 100644 --- a/providers/bnxt_re/main.h +++ b/providers/bnxt_re/main.h @@ -45,6 +45,8 @@ #include #include #include +#include +#include #include #include @@ -270,6 +272,7 @@ struct bnxt_re_context { uint32_t wc_handle; void *dbr_page; void *bar_map; + FILE *dbg_fp; }; struct bnxt_re_pacing_data { @@ -321,6 +324,45 @@ int bnxt_re_get_toggle_mem(struct ibv_context *ibvctx, struct bnxt_re_mmap_info *minfo, uint32_t *page_handle); +extern uint32_t bnxt_debug_mask; +enum { + BNXT_DUMP_DV = 1 << 0, +}; + +#define LEN_50 50 +#define bnxt_trace_dv(cntx, fmt, ...) \ +{ \ + if (bnxt_debug_mask & BNXT_DUMP_DV) \ + bnxt_err(cntx, fmt, ##__VA_ARGS__); \ +} + +static inline void bnxt_err(struct bnxt_re_context *cntx, const char *fmt, ...) + __attribute__((format(printf, 2, 3))); + +static inline void bnxt_err(struct bnxt_re_context *cntx, const char *fmt, ...) +{ + FILE *fp = cntx ? cntx->dbg_fp : stderr; + char prefix[LEN_50] = {}; + char timestamp[LEN_50]; + struct tm *timeinfo; + time_t rawtime; + va_list args; + + time(&rawtime); + timeinfo = localtime(&rawtime); + + strftime(timestamp, LEN_50, "%b %d %X", timeinfo); + sprintf(prefix, " %s: ", "libbnxt_re"); + + if (!fp) + return; + va_start(args, fmt); + fprintf(fp, "%s", timestamp); + fprintf(fp, "%s", prefix); + vfprintf(fp, fmt, args); + va_end(args); +} + /* pointer conversion functions*/ static inline struct bnxt_re_dev *to_bnxt_re_dev(struct ibv_device *ibvdev) { From c3a5dc21b39d40182325715fd36351038517d3c7 Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Fri, 28 Nov 2025 22:31:19 +0530 Subject: [PATCH 2/4] Update kernel headers To commit: ?? ("RDMA/bnxt_re: Direct Verbs: Support CQ and QP verbs"). Signed-off-by: Sriharsha Basavapatna --- kernel-headers/rdma/bnxt_re-abi.h | 47 ++++++++++++++++++++++++++++++ kernel-headers/rdma/irdma-abi.h | 16 +++++++++- kernel-headers/rdma/rdma_user_cm.h | 16 +++++++++- 3 files changed, 77 insertions(+), 2 deletions(-) diff --git a/kernel-headers/rdma/bnxt_re-abi.h b/kernel-headers/rdma/bnxt_re-abi.h index faa9d62b3..53139c439 100644 --- a/kernel-headers/rdma/bnxt_re-abi.h +++ b/kernel-headers/rdma/bnxt_re-abi.h @@ -101,10 +101,14 @@ struct bnxt_re_pd_resp { struct bnxt_re_cq_req { __aligned_u64 cq_va; __aligned_u64 cq_handle; + __aligned_u64 comp_mask; + __u32 ncqe; + __u32 dmabuf_fd; }; enum bnxt_re_cq_mask { BNXT_RE_CQ_TOGGLE_PAGE_SUPPORT = 0x1, + BNXT_RE_CQ_DV_SUPPORT = 0x2 }; struct bnxt_re_cq_resp { @@ -121,6 +125,7 @@ struct bnxt_re_resize_cq_req { enum bnxt_re_qp_mask { BNXT_RE_QP_REQ_MASK_VAR_WQE_SQ_SLOTS = 0x1, + BNXT_RE_QP_DV_SUPPORT = 0x2, }; struct bnxt_re_qp_req { @@ -129,11 +134,23 @@ struct bnxt_re_qp_req { __aligned_u64 qp_handle; __aligned_u64 comp_mask; __u32 sq_slots; + __u32 pd_id; + __u32 dpi; + __u32 sq_dmabuf_fd; + __u32 sq_len; /* total len including MSN area */ + __u32 sq_wqe_sz; + __u32 sq_psn_sz; + __u32 sq_npsn; + __u32 rq_dmabuf_fd; + __u32 rq_len; + __u32 rq_slots; /* == max_recv_wr */ + __u32 rq_wqe_sz; }; struct bnxt_re_qp_resp { __u32 qpid; __u32 rsvd; + __aligned_u64 comp_mask; }; struct bnxt_re_srq_req { @@ -162,6 +179,7 @@ enum bnxt_re_objects { BNXT_RE_OBJECT_ALLOC_PAGE = (1U << UVERBS_ID_NS_SHIFT), BNXT_RE_OBJECT_NOTIFY_DRV, BNXT_RE_OBJECT_GET_TOGGLE_MEM, + BNXT_RE_OBJECT_DBR, }; enum bnxt_re_alloc_page_type { @@ -215,4 +233,33 @@ enum bnxt_re_toggle_mem_methods { BNXT_RE_METHOD_GET_TOGGLE_MEM = (1U << UVERBS_ID_NS_SHIFT), BNXT_RE_METHOD_RELEASE_TOGGLE_MEM, }; + +struct bnxt_re_dv_db_region { + __u32 dbr_handle; + __u32 dpi; + __u64 umdbr; + void *dbr; + __aligned_u64 comp_mask; +}; + +enum bnxt_re_obj_dbr_alloc_attrs { + BNXT_RE_DV_ALLOC_DBR_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + BNXT_RE_DV_ALLOC_DBR_ATTR, + BNXT_RE_DV_ALLOC_DBR_OFFSET, +}; + +enum bnxt_re_obj_dbr_free_attrs { + BNXT_RE_DV_FREE_DBR_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum bnxt_re_obj_dbr_query_attrs { + BNXT_RE_DV_QUERY_DBR_ATTR = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum bnxt_re_obj_dpi_methods { + BNXT_RE_METHOD_DBR_ALLOC = (1U << UVERBS_ID_NS_SHIFT), + BNXT_RE_METHOD_DBR_FREE, + BNXT_RE_METHOD_DBR_QUERY, +}; + #endif /* __BNXT_RE_UVERBS_ABI_H__*/ diff --git a/kernel-headers/rdma/irdma-abi.h b/kernel-headers/rdma/irdma-abi.h index bb18f1548..f7788d333 100644 --- a/kernel-headers/rdma/irdma-abi.h +++ b/kernel-headers/rdma/irdma-abi.h @@ -20,11 +20,14 @@ enum irdma_memreg_type { IRDMA_MEMREG_TYPE_MEM = 0, IRDMA_MEMREG_TYPE_QP = 1, IRDMA_MEMREG_TYPE_CQ = 2, + IRDMA_MEMREG_TYPE_SRQ = 3, }; enum { IRDMA_ALLOC_UCTX_USE_RAW_ATTR = 1 << 0, IRDMA_ALLOC_UCTX_MIN_HW_WQ_SIZE = 1 << 1, + IRDMA_ALLOC_UCTX_MAX_HW_SRQ_QUANTA = 1 << 2, + IRDMA_SUPPORT_WQE_FORMAT_V2 = 1 << 3, }; struct irdma_alloc_ucontext_req { @@ -54,7 +57,8 @@ struct irdma_alloc_ucontext_resp { __u8 rsvd2; __aligned_u64 comp_mask; __u16 min_hw_wq_size; - __u8 rsvd3[6]; + __u32 max_hw_srq_quanta; + __u8 rsvd3[2]; }; struct irdma_alloc_pd_resp { @@ -71,6 +75,16 @@ struct irdma_create_cq_req { __aligned_u64 user_shadow_area; }; +struct irdma_create_srq_req { + __aligned_u64 user_srq_buf; + __aligned_u64 user_shadow_area; +}; + +struct irdma_create_srq_resp { + __u32 srq_id; + __u32 srq_size; +}; + struct irdma_create_qp_req { __aligned_u64 user_wqe_bufs; __aligned_u64 user_compl_ctx; diff --git a/kernel-headers/rdma/rdma_user_cm.h b/kernel-headers/rdma/rdma_user_cm.h index 00501da05..5ded17468 100644 --- a/kernel-headers/rdma/rdma_user_cm.h +++ b/kernel-headers/rdma/rdma_user_cm.h @@ -68,7 +68,8 @@ enum { RDMA_USER_CM_CMD_BIND, RDMA_USER_CM_CMD_RESOLVE_ADDR, RDMA_USER_CM_CMD_JOIN_MCAST, - RDMA_USER_CM_CMD_RESOLVE_IB_SERVICE + RDMA_USER_CM_CMD_RESOLVE_IB_SERVICE, + RDMA_USER_CM_CMD_WRITE_CM_EVENT, }; /* See IBTA Annex A11, servies ID bytes 4 & 5 */ @@ -304,6 +305,7 @@ struct rdma_ucm_event_resp { union { struct rdma_ucm_conn_param conn; struct rdma_ucm_ud_param ud; + __u32 arg32[2]; } param; __u32 reserved; struct rdma_ucm_ece ece; @@ -362,4 +364,16 @@ struct rdma_ucm_resolve_ib_service { __u32 id; struct rdma_ucm_ib_service ibs; }; + +struct rdma_ucm_write_cm_event { + __u32 id; + __u32 reserved; + __u32 event; + __u32 status; + union { + struct rdma_ucm_conn_param conn; + struct rdma_ucm_ud_param ud; + __u64 arg; + } param; +}; #endif /* RDMA_USER_CM_H */ From a8deb6a70a981f9d101ff2eed43554ea1a20c148 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 17 Jun 2025 03:02:43 -0700 Subject: [PATCH 3/4] bnxt_re/lib: Direct Verbs: Support DBR and UMEM verbs The following Direct Verb (DV) APIs have been implemented in this patch. Doorbell Region Direct Verbs: ----------------------------- - bnxt_re_dv_alloc_db_region(): This will allow the appliation to create extra doorbell regions and use the associated doorbell page index in dv_create_qp() and use the associated DB address while ringing the doorbell. - bnxt_re_dv_free_db_region(): Free the allocated doorbell region. - bnxt_re_dv_get_default_db_region(): Return the default doorbell page index and doorbell page address associated with the ucontext. Umem Registration Direct Verbs: ------------------------------- - bnxt_re_dv_umem_reg(): Register the user memory to be used by the application with the library. Application can register a large chunk of memory and use it during subsequent resource creation DV APIs. Note that the API terminates in the library and the app specified memory params (addr, len) are saved and a umem-handle is returned. That is, there is no ioctl to the driver at this point to map/pin the user memory. This memory is mapped/pinned later when the application creates the required resources (CQ/QP) using respective direct verbs. This is implemented in the next patch in this series. - bnxt_re_dv_umem_dereg(): Deregister the user memory specified by the umem-handle. Co-developed-by: Sriharsha Basavapatna Signed-off-by: Sriharsha Basavapatna Signed-off-by: Kalesh AP Reviewed-by: Selvin Thyparampil Xavier --- providers/bnxt_re/CMakeLists.txt | 1 + providers/bnxt_re/bnxt_re.map | 10 ++ providers/bnxt_re/bnxt_re_dv.h | 78 ++++++++++++ providers/bnxt_re/dv.c | 199 +++++++++++++++++++++++++++++++ providers/bnxt_re/dv_internal.h | 52 ++++++++ 5 files changed, 340 insertions(+) create mode 100644 providers/bnxt_re/bnxt_re.map create mode 100644 providers/bnxt_re/bnxt_re_dv.h create mode 100644 providers/bnxt_re/dv.c create mode 100644 providers/bnxt_re/dv_internal.h diff --git a/providers/bnxt_re/CMakeLists.txt b/providers/bnxt_re/CMakeLists.txt index 13ad287c1..74ff88a4d 100644 --- a/providers/bnxt_re/CMakeLists.txt +++ b/providers/bnxt_re/CMakeLists.txt @@ -3,4 +3,5 @@ rdma_provider(bnxt_re main.c memory.c verbs.c + dv.c ) diff --git a/providers/bnxt_re/bnxt_re.map b/providers/bnxt_re/bnxt_re.map new file mode 100644 index 000000000..a25fbdafd --- /dev/null +++ b/providers/bnxt_re/bnxt_re.map @@ -0,0 +1,10 @@ +{ + global: + openib_driver_init; + bnxt_re_dv_alloc_db_region; + bnxt_re_dv_free_db_region; + bnxt_re_dv_umem_reg; + bnxt_re_dv_umem_dereg; + bnxt_re_dv_get_default_db_region; + local: *; +}; diff --git a/providers/bnxt_re/bnxt_re_dv.h b/providers/bnxt_re/bnxt_re_dv.h new file mode 100644 index 000000000..8eb8872ad --- /dev/null +++ b/providers/bnxt_re/bnxt_re_dv.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2025, Broadcom. All rights reserved. The term + * Broadcom refers to Broadcom Limited and/or its subsidiaries. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Description: Direct verb support user interface header + */ + +#ifndef __BNXT_RE_DV_H__ +#define __BNXT_RE_DV_H__ + +#include +#include +#ifdef __cplusplus +extern "C" { +#endif + +struct bnxt_re_dv_db_region_attr { + uint32_t handle; + uint32_t dpi; + uint64_t umdbr; + __u64 *dbr; +}; + +enum bnxt_re_dv_umem_in_flags { + BNXT_RE_DV_UMEM_FLAGS_DMABUF = 1 << 0, +}; + +struct bnxt_re_dv_umem_reg_attr { + void *addr; + size_t size; + uint32_t access_flags; + uint64_t pgsz_bitmap; + uint64_t comp_mask; + int dmabuf_fd; +}; + +struct bnxt_re_dv_db_region_attr * +bnxt_re_dv_alloc_db_region(struct ibv_context *ctx); +int bnxt_re_dv_free_db_region(struct ibv_context *ctx, + struct bnxt_re_dv_db_region_attr *attr); +int bnxt_re_dv_get_default_db_region(struct ibv_context *ibvctx, + struct bnxt_re_dv_db_region_attr *out); +void *bnxt_re_dv_umem_reg(struct ibv_context *ibvctx, + struct bnxt_re_dv_umem_reg_attr *in); +int bnxt_re_dv_umem_dereg(void *umem_handle); +#ifdef __cplusplus +} +#endif +#endif /* __BNXT_RE_DV_H__ */ diff --git a/providers/bnxt_re/dv.c b/providers/bnxt_re/dv.c new file mode 100644 index 000000000..999fbbfdd --- /dev/null +++ b/providers/bnxt_re/dv.c @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2025, Broadcom. All rights reserved. The term + * Broadcom refers to Broadcom Limited and/or its subsidiaries. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Description: Direct verbs API function definitions. + */ + +#include +#include + +#include "main.h" +#include "bnxt_re-abi.h" +#include "bnxt_re_dv.h" +#include "./verbs.h" +#include "dv_internal.h" + +/* Returns details about the default Doorbell page for ucontext */ +int bnxt_re_dv_get_default_db_region(struct ibv_context *ibvctx, + struct bnxt_re_dv_db_region_attr *out) +{ + struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx); + struct bnxt_re_dv_db_region_attr attr = {}; + int ret; + + DECLARE_COMMAND_BUFFER(cmd, + BNXT_RE_OBJECT_DBR, + BNXT_RE_METHOD_DBR_QUERY, + 1); + + fill_attr_out_ptr(cmd, BNXT_RE_DV_QUERY_DBR_ATTR, &attr); + + ret = execute_ioctl(ibvctx, cmd); + if (ret) { + fprintf(stderr, "%s: execute_ioctl() failed: %d\n", __func__, ret); + return ret; + } + out->dbr = cntx->udpi.dbpage; + out->dpi = attr.dpi; + out->umdbr = attr.umdbr; + return 0; +} + +int bnxt_re_dv_free_db_region(struct ibv_context *ctx, + struct bnxt_re_dv_db_region_attr *attr) +{ + struct bnxt_re_dev *dev = to_bnxt_re_dev(ctx->device); + int ret; + + DECLARE_COMMAND_BUFFER(cmd, + BNXT_RE_OBJECT_DBR, + BNXT_RE_METHOD_DBR_FREE, + 1); + + if (attr->dbr != MAP_FAILED) + munmap(attr->dbr, dev->pg_size); + + bnxt_trace_dv(NULL, DEV "%s: DV DBR: handle: 0x%x\n", __func__, attr->handle); + fill_attr_in_obj(cmd, BNXT_RE_DV_FREE_DBR_HANDLE, attr->handle); + + ret = execute_ioctl(ctx, cmd); + if (ret) { + fprintf(stderr, "%s: execute_ioctl() failed: %d\n", + __func__, ret); + errno = ret; + return ret; + } + + free(attr); + return 0; +} + +struct bnxt_re_dv_db_region_attr * +bnxt_re_dv_alloc_db_region(struct ibv_context *ctx) +{ + struct bnxt_re_dev *dev = to_bnxt_re_dev(ctx->device); + struct bnxt_re_dv_db_region_attr attr = {}, *out; + struct ib_uverbs_attr *handle; + uint64_t mmap_offset = 0; + int ret; + + DECLARE_COMMAND_BUFFER(cmd, + BNXT_RE_OBJECT_DBR, + BNXT_RE_METHOD_DBR_ALLOC, + 3); + + out = calloc(1, sizeof(*out)); + if (!out) { + errno = ENOMEM; + return NULL; + } + + handle = fill_attr_out_obj(cmd, BNXT_RE_DV_ALLOC_DBR_HANDLE); + fill_attr_out_ptr(cmd, BNXT_RE_DV_ALLOC_DBR_ATTR, &attr); + fill_attr_out_ptr(cmd, BNXT_RE_DV_ALLOC_DBR_OFFSET, &mmap_offset); + + ret = execute_ioctl(ctx, cmd); + if (ret) { + fprintf(stderr, "%s: execute_ioctl() failed: %d\n", + __func__, ret); + free(out); + errno = ret; + return NULL; + } + out->handle = read_attr_obj(BNXT_RE_DV_ALLOC_DBR_HANDLE, handle); + out->dpi = attr.dpi; + out->umdbr = attr.umdbr; + + out->dbr = mmap(NULL, dev->pg_size, PROT_WRITE, + MAP_SHARED, ctx->cmd_fd, mmap_offset); + if (out->dbr == MAP_FAILED) { + fprintf(stderr, DEV "%s: mmap failed\n", __func__); + bnxt_re_dv_free_db_region(ctx, out); + errno = ENOMEM; + return NULL; + } + bnxt_trace_dv(NULL, "%s: DV DBR: handle: 0x%x\n", __func__, out->handle); + + return out; +} + +void *bnxt_re_dv_umem_reg(struct ibv_context *ibvctx, struct bnxt_re_dv_umem_reg_attr *in) +{ + struct bnxt_re_dv_umem_internal *umem; + int ret; + + ret = ibv_dontfork_range(in->addr, in->size); + if (ret) { + errno = ret; + return NULL; + } + + if (in->comp_mask & BNXT_RE_DV_UMEM_FLAGS_DMABUF && + (in->dmabuf_fd == -1)) { + fprintf(stderr, "%s: failed: EBADF\n", __func__); + errno = EBADF; + goto err; + } + + umem = calloc(1, sizeof(*umem)); + if (!umem) { + errno = ENOMEM; + goto err; + } + + umem->context = ibvctx; + umem->addr = in->addr; + umem->size = in->size; + umem->access_flags = in->access_flags; + umem->pgsz_bitmap = in->pgsz_bitmap; + umem->dmabuf_fd = in->dmabuf_fd; + + bnxt_trace_dv(NULL, "%s: umem: %" PRIuPTR " addr: %" PRIuPTR " size: %zu\n", + __func__, (uintptr_t)umem, (uintptr_t)umem->addr, umem->size); + return (void *)umem; + +err: + ibv_dofork_range(in->addr, in->size); + return NULL; +} + +int bnxt_re_dv_umem_dereg(void *umem_handle) +{ + struct bnxt_re_dv_umem_internal *umem = umem_handle; + + bnxt_trace_dv(NULL, "%s: DV Umem Dereg: handle: %" PRIuPTR "\n", + __func__, (uintptr_t)umem); + ibv_dofork_range(umem->addr, umem->size); + free(umem); + return 0; +} diff --git a/providers/bnxt_re/dv_internal.h b/providers/bnxt_re/dv_internal.h new file mode 100644 index 000000000..52c8660bd --- /dev/null +++ b/providers/bnxt_re/dv_internal.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2025, Broadcom. All rights reserved. The term + * Broadcom refers to Broadcom Limited and/or its subsidiaries. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Description: Direct verb support user interface header + */ + +#ifndef __BNXT_RE_DV_INTERNAL_H__ +#define __BNXT_RE_DV_INTERNAL_H__ + +#include +#include + +struct bnxt_re_dv_umem_internal { + struct ibv_context *context; + void *addr; + size_t size; + uint32_t access_flags; + uint64_t pgsz_bitmap; + int dmabuf_fd; +}; + +#endif /* __BNXT_RE_DV_INTERNAL_H__ */ From 130c0417eb3b05d473c2c7793f9af925e662317c Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Tue, 17 Jun 2025 15:40:49 +0530 Subject: [PATCH 4/4] bnxt_re/lib: Direct Verbs: Support CQ and QP verbs The following Direct Verb (DV) APIs have been implemented in this patch. These are implemented by enhancing the driver specific udata in existing verbs. CQ Direct Verbs: ---------------- - bnxt_re_dv_create_cq(): Create a CQ of requested size (cqe). The application must have already registered this memory using bnxt_re_dv_umem_reg(). The CQ umem-handle and umem-offset provided by the application are translated into an address for mapping and passed to the driver. - bnxt_re_dv_destroy_cq(): Destroy the DV_CQ created earlier. QP Direct Verbs: ---------------- - bnxt_re_dv_create_qp(): Create a QP using specified params (struct bnxt_re_dv_qp_init_attr). The application must have already registered SQ/RQ memory using bnxt_re_dv_umem_reg(). The SQ/RQ umem-handle and umem-offset provided by the application are translated into an address for mapping and passed to the driver. - bnxt_re_dv_destroy_qp(): Destroy the DV_QP created earlier. - bnxt_re_dv_modify_qp(): Modify QP attributes of the DV_QP. - bnxt_re_dv_query_qp(): Return QP attributes of the DV_QP. Note: ----- Some applications might want to allocate memory for all resources of a given type (CQ/QP) in one big chunk and then register that entire memory once using dv_umem_reg(). At the time of creating each individual resource, the application should pass a specific offset/length in the umem registered memory. Signed-off-by: Sriharsha Basavapatna Co-developed-by: Kalesh AP Signed-off-by: Kalesh AP Co-developed-by: Selvin Xavier Signed-off-by: Selvin Xavier --- providers/bnxt_re/CMakeLists.txt | 14 +- providers/bnxt_re/bnxt_re.map | 11 +- providers/bnxt_re/bnxt_re_dv.h | 51 ++++ providers/bnxt_re/dv.c | 434 +++++++++++++++++++++++++++++++ providers/bnxt_re/main.h | 10 + providers/bnxt_re/memory.c | 3 + providers/bnxt_re/memory.h | 4 + providers/bnxt_re/verbs.c | 6 +- providers/bnxt_re/verbs.h | 2 + 9 files changed, 531 insertions(+), 4 deletions(-) diff --git a/providers/bnxt_re/CMakeLists.txt b/providers/bnxt_re/CMakeLists.txt index 74ff88a4d..447e8c4c2 100644 --- a/providers/bnxt_re/CMakeLists.txt +++ b/providers/bnxt_re/CMakeLists.txt @@ -1,7 +1,19 @@ -rdma_provider(bnxt_re +rdma_shared_provider(bnxt_re bnxt_re.map + 1 1.1.${PACKAGE_VERSION} db.c main.c memory.c verbs.c dv.c ) + +publish_headers(infiniband + bnxt_re_dv.h +) + +rdma_pkg_config("bnxt_re" "libibverbs" "${CMAKE_THREAD_LIBS_INIT}") + +if (ENABLE_LTTNG AND LTTNGUST_FOUND) + target_include_directories(bnxt_re PUBLIC ".") + target_link_libraries(bnxt_re LINK_PRIVATE LTTng::UST) +endif() diff --git a/providers/bnxt_re/bnxt_re.map b/providers/bnxt_re/bnxt_re.map index a25fbdafd..94737647a 100644 --- a/providers/bnxt_re/bnxt_re.map +++ b/providers/bnxt_re/bnxt_re.map @@ -1,10 +1,17 @@ -{ +/* Export symbols should be added below according to + Documentation/versioning.md document. */ +BNXT_RE_1.1 { global: - openib_driver_init; bnxt_re_dv_alloc_db_region; bnxt_re_dv_free_db_region; bnxt_re_dv_umem_reg; bnxt_re_dv_umem_dereg; bnxt_re_dv_get_default_db_region; + bnxt_re_dv_create_cq; + bnxt_re_dv_destroy_cq; + bnxt_re_dv_create_qp; + bnxt_re_dv_destroy_qp; + bnxt_re_dv_modify_qp; + bnxt_re_dv_query_qp; local: *; }; diff --git a/providers/bnxt_re/bnxt_re_dv.h b/providers/bnxt_re/bnxt_re_dv.h index 8eb8872ad..304a2eac4 100644 --- a/providers/bnxt_re/bnxt_re_dv.h +++ b/providers/bnxt_re/bnxt_re_dv.h @@ -63,6 +63,48 @@ struct bnxt_re_dv_umem_reg_attr { int dmabuf_fd; }; +struct bnxt_re_dv_cq_init_attr { + uint64_t cq_handle; + void *umem_handle; /* umem_handle from umem_reg */ + uint64_t cq_umem_offset; /* offset into umem */ + uint32_t ncqe; +}; + +struct bnxt_re_dv_cq_attr { + uint32_t ncqe; /* no. of entries */ + uint32_t cqe_size; /* size of entries */ +}; + +struct bnxt_re_dv_qp_init_attr { + /* Standard ibv params */ + enum ibv_qp_type qp_type; + uint32_t max_send_wr; + uint32_t max_recv_wr; + uint32_t max_send_sge; + uint32_t max_recv_sge; + uint32_t max_inline_data; + struct ibv_cq *send_cq; + struct ibv_cq *recv_cq; + struct ibv_srq *srq; + + /* DV params */ + uint64_t qp_handle; /* to match with cqe */ + void *dbr_handle; /* dbr_handle from alloc_dbr */ + void *sq_umem_handle; /* umem_handle from umem_reg */ + uint64_t sq_umem_offset; /* offset into umem */ + uint32_t sq_len; /* sq length including MSN area */ + uint32_t sq_slots; /* sq length in slots */ + void *rq_umem_handle; /* umem_handle from umem_reg */ + uint64_t rq_umem_offset; /* offset into umem */ + uint32_t sq_wqe_sz; /* sq wqe size */ + uint32_t sq_psn_sz; /* sq psn size */ + uint32_t sq_npsn; /* sq num psn entries */ + uint32_t rq_len; /* rq length */ + uint32_t rq_slots; /* rq length in slots */ + uint32_t rq_wqe_sz; /* rq wqe size */ + uint64_t comp_mask; /* compatibility mask for future updates */ +}; + struct bnxt_re_dv_db_region_attr * bnxt_re_dv_alloc_db_region(struct ibv_context *ctx); int bnxt_re_dv_free_db_region(struct ibv_context *ctx, @@ -72,6 +114,15 @@ int bnxt_re_dv_get_default_db_region(struct ibv_context *ibvctx, void *bnxt_re_dv_umem_reg(struct ibv_context *ibvctx, struct bnxt_re_dv_umem_reg_attr *in); int bnxt_re_dv_umem_dereg(void *umem_handle); +struct ibv_cq *bnxt_re_dv_create_cq(struct ibv_context *ibvctx, + struct bnxt_re_dv_cq_init_attr *cq_attr); +int bnxt_re_dv_destroy_cq(struct ibv_cq *ibv_cq); +struct ibv_qp *bnxt_re_dv_create_qp(struct ibv_pd *pd, + struct bnxt_re_dv_qp_init_attr *qp_attr); +int bnxt_re_dv_destroy_qp(struct ibv_qp *ibvqp); +int bnxt_re_dv_modify_qp(struct ibv_qp *ibv_qp, struct ibv_qp_attr *attr, + int attr_mask); +int bnxt_re_dv_query_qp(void *qp_handle, struct ib_uverbs_qp_attr *attr); #ifdef __cplusplus } #endif diff --git a/providers/bnxt_re/dv.c b/providers/bnxt_re/dv.c index 999fbbfdd..542cca40d 100644 --- a/providers/bnxt_re/dv.c +++ b/providers/bnxt_re/dv.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2025, Broadcom. All rights reserved. The term * Broadcom refers to Broadcom Limited and/or its subsidiaries. @@ -197,3 +198,436 @@ int bnxt_re_dv_umem_dereg(void *umem_handle) free(umem); return 0; } + +static bool bnxt_re_dv_is_valid_umem(struct bnxt_re_dev *dev, + struct bnxt_re_dv_umem_internal *umem, + uint64_t offset, uint32_t size) +{ + return ((offset == get_aligned(offset, dev->pg_size)) && + (offset + size <= umem->size)); +} + +static int bnxt_re_dv_create_cq_cmd(struct bnxt_re_dev *dev, + struct ibv_context *ibvctx, + struct bnxt_re_cq *cq, + struct bnxt_re_dv_cq_init_attr *cq_attr, + uint64_t comp_mask, + struct ubnxt_re_cq_resp *resp) +{ + struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx); + struct bnxt_re_dv_umem_internal *cq_umem = cq->cq_umem; + uint64_t offset = cq_attr->cq_umem_offset; + struct ubnxt_re_cq cmd; + uint32_t size; + int ret; + + size = cq_attr->ncqe * bnxt_re_get_cqe_sz(); + if (!bnxt_re_dv_is_valid_umem(dev, cq_umem, offset, size)) { + fprintf(stderr, + "Invalid cq_umem: %" PRIuPTR " offset: %" PRIx64 " size: 0x%x\n", + (uintptr_t)cq_umem, offset, size); + return -EINVAL; + } + + cmd.cq_va = (uintptr_t)(cq_umem->addr) + offset; + cmd.cq_handle = (uintptr_t)cq; + cmd.ncqe = cq_attr->ncqe; + cmd.dmabuf_fd = cq_umem->dmabuf_fd; + cmd.comp_mask = comp_mask; + + memset(resp, 0, sizeof(*resp)); + ret = ibv_cmd_create_cq(ibvctx, cq_attr->ncqe, NULL, 0, + &cq->ibvcq, &cmd.ibv_cmd, sizeof(cmd), + &resp->ibv_resp, sizeof(*resp)); + if (ret) { + fprintf(stderr, "%s: ibv_cmd_create_cq() failed: %d\n", __func__, ret); + return ret; + } + + cq->cqid = resp->cqid; + cq->phase = resp->phase; + cq->cqq->tail = resp->tail; + cq->udpi = &cntx->udpi; + cq->cntx = cntx; + cq->rand.seed = cq->cqid; + + bnxt_trace_dv(NULL, "%s: CQ handle: 0x%x\n", __func__, cq->ibvcq.handle); + bnxt_trace_dv(NULL, + "%s: CQ cqid: 0x%x tail: 0x%x phase: 0x%x comp_mask: 0x%llx\n", + __func__, resp->cqid, resp->tail, resp->phase, resp->comp_mask); + return 0; +} + +static int bnxt_re_dv_init_cq(struct ibv_context *ibvctx, struct bnxt_re_cq *cq, + struct ubnxt_re_cq_resp *resp) +{ + struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx); + + cq->cqid = resp->cqid; + cq->phase = resp->phase; + cq->cqq->tail = resp->tail; + cq->udpi = &cntx->udpi; + cq->cntx = cntx; + cq->rand.seed = cq->cqid; + if (resp->comp_mask & BNXT_RE_CQ_TOGGLE_PAGE_SUPPORT) { + bnxt_trace_dv(NULL, "%s: toggle page is unsupported, cqid: 0x%x\n", + __func__, resp->cqid); + return -EOPNOTSUPP; + } + pthread_spin_init(&cq->cqq->qlock, PTHREAD_PROCESS_PRIVATE); + list_head_init(&cq->sfhead); + list_head_init(&cq->rfhead); + list_head_init(&cq->prev_cq_head); + return 0; +} + +struct ibv_cq *bnxt_re_dv_create_cq(struct ibv_context *ibvctx, + struct bnxt_re_dv_cq_init_attr *cq_attr) +{ + struct bnxt_re_dev *dev = to_bnxt_re_dev(ibvctx->device); + struct bnxt_re_dv_umem_internal *cq_umem = cq_attr->umem_handle; + uint64_t comp_mask = BNXT_RE_CQ_DV_SUPPORT; + struct ubnxt_re_cq_resp resp = {}; + struct bnxt_re_cq *cq; + int ret; + + if (cq_attr->ncqe > dev->max_cq_depth) + return NULL; + + cq = calloc(1, (sizeof(*cq))); + if (!cq) + return NULL; + + cq->cqq = NULL; + cq->cq_umem = cq_umem; + + ret = bnxt_re_dv_create_cq_cmd(dev, ibvctx, cq, cq_attr, comp_mask, &resp); + if (ret) { + fprintf(stderr, "%s: bnxt_re_dv_create_cq_cmd() failed: %d\n", + __func__, ret); + goto fail; + } + + ret = bnxt_re_dv_init_cq(ibvctx, cq, &resp); + if (ret) { + fprintf(stderr, "%s: bnxt_re_dv_init_cq() failed: %d\n", + __func__, ret); + goto fail; + } + + cq->dv_cq_flags |= BNXT_DV_CQ_FLAGS_VALID; + return &cq->ibvcq; + +fail: + free(cq); + return NULL; +} + +int bnxt_re_dv_destroy_cq(struct ibv_cq *ibvcq) +{ + int ret; + + ret = bnxt_re_destroy_cq(ibvcq); + if (ret) + fprintf(stderr, "%s: bnxt_re_destroy_cq() failed: %d\n", + __func__, ret); + return ret; +} + +static void bnxt_re_dv_init_ib_qp(struct ibv_context *ibvctx, + struct ibv_qp_init_attr_ex *attr, + struct bnxt_re_qp *qp) +{ + struct ibv_qp *ibvqp = qp->ibvqp; + + ibvqp->qp_num = qp->qpid; + ibvqp->context = ibvctx; + ibvqp->qp_context = attr->qp_context; + ibvqp->pd = attr->pd; + ibvqp->send_cq = attr->send_cq; + ibvqp->recv_cq = attr->recv_cq; + ibvqp->srq = attr->srq; + ibvqp->qp_type = attr->qp_type; + ibvqp->state = IBV_QPS_RESET; + ibvqp->events_completed = 0; + pthread_mutex_init(&ibvqp->mutex, NULL); + pthread_cond_init(&ibvqp->cond, NULL); +} + +static void bnxt_re_dv_init_qp(struct ibv_context *ibvctx, + struct ibv_qp_init_attr_ex *attr, + struct bnxt_re_qp *qp, + struct ubnxt_re_qp_resp *resp) +{ + struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx); + struct ibv_device_attr *devattr; + struct bnxt_re_qpcap *cap; + struct bnxt_re_dev *rdev; + + qp->qpid = resp->qpid; + qp->qptyp = attr->qp_type; + qp->qpst = IBV_QPS_RESET; + qp->scq = to_bnxt_re_cq(attr->send_cq); + qp->rcq = to_bnxt_re_cq(attr->recv_cq); + if (attr->srq) + qp->srq = to_bnxt_re_srq(attr->srq); + qp->rand.seed = qp->qpid; + qp->sq_psn = 0; + + rdev = cntx->rdev; + devattr = &rdev->devattr; + cap = &qp->cap; + cap->max_ssge = attr->cap.max_send_sge; + cap->max_rsge = attr->cap.max_recv_sge; + cap->max_inline = attr->cap.max_inline_data; + cap->sqsig = attr->sq_sig_all; + cap->is_atomic_cap = devattr->atomic_cap; + fque_init_node(&qp->snode); + fque_init_node(&qp->rnode); + + bnxt_re_dv_init_ib_qp(ibvctx, attr, qp); +} + +static void fill_ib_attr_from_dv_qp_attr(struct bnxt_re_dv_qp_init_attr *dv_qp_attr, + struct ibv_qp_init_attr *attr) +{ + attr->send_cq = dv_qp_attr->send_cq; + attr->recv_cq = dv_qp_attr->recv_cq; + attr->srq = dv_qp_attr->srq; + attr->cap.max_send_wr = dv_qp_attr->max_send_wr; + attr->cap.max_send_sge = dv_qp_attr->max_send_sge; + attr->qp_type = dv_qp_attr->qp_type; + attr->cap.max_inline_data = dv_qp_attr->max_inline_data; + attr->cap.max_recv_wr = dv_qp_attr->max_recv_wr; + attr->cap.max_recv_sge = dv_qp_attr->max_recv_sge; +} + +static int +bnxt_re_dv_create_qp_cmd(struct ibv_context *ibvctx, + struct bnxt_re_dv_qp_init_attr *dv_qp_attr, + struct ibv_qp_init_attr_ex *attr_ex, + struct ubnxt_re_qp_resp *resp, + struct bnxt_re_qp *qp) +{ + struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx); + struct bnxt_re_dv_db_region_attr *db_attr = NULL; + struct bnxt_re_dv_umem_internal *sq_umem = NULL; + struct bnxt_re_dv_umem_internal *rq_umem = NULL; + struct ubnxt_re_qp req = {}; + uint64_t offset; + uint32_t size; + int ret; + + req.pd_id = qp->re_pd->pdid; + req.qp_handle = dv_qp_attr->qp_handle; + + sq_umem = dv_qp_attr->sq_umem_handle; + offset = dv_qp_attr->sq_umem_offset; + size = dv_qp_attr->sq_len; + if (!bnxt_re_dv_is_valid_umem(cntx->rdev, sq_umem, offset, size)) { + fprintf(stderr, + "Invalid sq_umem: %" PRIuPTR " offset: %" PRIx64 " size: 0x%x\n", + (uintptr_t)sq_umem, offset, size); + return -EINVAL; + } + bnxt_trace_dv(NULL, "%s: sq_umem: %" PRIuPTR " offset: %" PRIx64 " size: 0x%x\n", + __func__, (uintptr_t)sq_umem, offset, size); + req.sq_dmabuf_fd = sq_umem->dmabuf_fd; + req.qpsva = (uintptr_t)(sq_umem->addr) + offset; + req.sq_len = size; + req.sq_slots = dv_qp_attr->sq_slots; + req.sq_wqe_sz = dv_qp_attr->sq_wqe_sz; + req.sq_psn_sz = dv_qp_attr->sq_psn_sz; + req.sq_npsn = dv_qp_attr->sq_npsn; + + if (!dv_qp_attr->srq) { + rq_umem = dv_qp_attr->rq_umem_handle; + offset = dv_qp_attr->rq_umem_offset; + size = dv_qp_attr->rq_len; + if (!bnxt_re_dv_is_valid_umem(cntx->rdev, rq_umem, offset, size)) { + fprintf(stderr, + "Invalid rq_umem: %" PRIuPTR " offset: %" PRIx64 " size: 0x%x\n", + (uintptr_t)rq_umem, offset, size); + return -EINVAL; + } + bnxt_trace_dv(NULL, "%s: rq_umem: %" PRIuPTR " offset: %" PRIx64 " size: 0x%x\n", + __func__, (uintptr_t)rq_umem, offset, size); + + req.rq_dmabuf_fd = rq_umem->dmabuf_fd; + req.qprva = (uintptr_t)(rq_umem->addr) + offset; + req.rq_len = size; + req.rq_slots = dv_qp_attr->rq_slots; + req.rq_wqe_sz = dv_qp_attr->rq_wqe_sz; + } + + if (dv_qp_attr->dbr_handle) { + bnxt_trace_dv(NULL, "%s: using specific dpi\n", __func__); + db_attr = dv_qp_attr->dbr_handle; + qp->dv_dpi.dbpage = db_attr->dbr; + qp->dv_dpi.dpindx = db_attr->dpi; + qp->udpi = &qp->dv_dpi; + } else { + bnxt_trace_dv(NULL, "%s: using default dpi\n", __func__); + qp->udpi = &cntx->udpi; + } + req.dpi = qp->udpi->dpindx; + bnxt_trace_dv(NULL, "%s: dpi: %d\n", __func__, req.dpi); + + req.comp_mask = BNXT_RE_QP_DV_SUPPORT; + ret = ibv_cmd_create_qp_ex(ibvctx, &qp->vqp, attr_ex, + &req.ibv_cmd, sizeof(req), + &resp->ibv_resp, sizeof(*resp)); + if (ret) { + fprintf(stderr, "%s: ibv_cmd_create_qp_ex() failed: %d\n", + __func__, ret); + return ret; + } + + bnxt_trace_dv(NULL, "%s: QP handle: 0x%x qpid: 0x%x\n", + __func__, qp->ibvqp->handle, resp->qpid); + return 0; +} + +struct ibv_qp *bnxt_re_dv_create_qp(struct ibv_pd *ibvpd, + struct bnxt_re_dv_qp_init_attr *dv_qp_attr) +{ + struct bnxt_re_context *cntx = to_bnxt_re_context(ibvpd->context); + struct ibv_qp_init_attr_ex attr_ex; + struct ibv_qp_init_attr attr = {}; + struct ubnxt_re_qp_resp resp = {}; + struct bnxt_re_qp *qp; + int rc; + + qp = malloc(sizeof(*qp)); + if (!qp) + return NULL; + + memset(qp, 0, sizeof(*qp)); + qp->ibvqp = &qp->vqp.qp; + qp->mem = NULL; + qp->cctx = &cntx->cctx; + qp->cntx = cntx; + qp->qpmode = cntx->wqe_mode & BNXT_RE_WQE_MODE_VARIABLE; + qp->re_pd = to_bnxt_re_pd(ibvpd); + + dv_qp_attr->qp_handle = (uintptr_t)qp; + memset(&attr_ex, 0, sizeof(attr_ex)); + fill_ib_attr_from_dv_qp_attr(dv_qp_attr, &attr); + memcpy(&attr_ex, &attr, sizeof(attr)); + attr_ex.comp_mask = IBV_QP_INIT_ATTR_PD; + attr_ex.pd = ibvpd; + + rc = bnxt_re_dv_create_qp_cmd(ibvpd->context, dv_qp_attr, &attr_ex, &resp, qp); + if (rc) { + free(qp); + return NULL; + } + + bnxt_re_dv_init_qp(ibvpd->context, &attr_ex, qp, &resp); + return qp->ibvqp; +} + +int bnxt_re_dv_destroy_qp(struct ibv_qp *ibvqp) +{ + struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp); + struct bnxt_re_mem *mem; + int ret; + + qp->qpst = IBV_QPS_RESET; + ret = ibv_cmd_destroy_qp(ibvqp); + if (ret) { + fprintf(stderr, "%s: ibv_cmd_destroy_qp() failed: %d\n", + __func__, ret); + return ret; + } + bnxt_re_cleanup_cq(qp, qp->rcq); + if (qp->scq != qp->rcq) + bnxt_re_cleanup_cq(qp, qp->scq); + mem = qp->mem; + bnxt_re_free_mem(mem); + return 0; +} + +static void bnxt_re_dv_copy_to_uattr(struct ib_uverbs_qp_attr *dst, + struct ibv_qp_attr *src, int attr_mask) +{ + dst->qp_state = src->qp_state; + dst->cur_qp_state = src->cur_qp_state; + dst->path_mtu = src->path_mtu; + dst->path_mig_state = src->path_mig_state; + dst->qkey = src->qkey; + dst->rq_psn = src->rq_psn; + dst->sq_psn = src->sq_psn; + dst->dest_qp_num = src->dest_qp_num; + dst->qp_access_flags = src->qp_access_flags; + dst->max_send_wr = src->cap.max_send_wr; + dst->max_recv_wr = src->cap.max_recv_wr; + dst->max_send_sge = src->cap.max_send_sge; + dst->max_recv_sge = src->cap.max_recv_sge; + dst->max_inline_data = src->cap.max_inline_data; + dst->pkey_index = src->pkey_index; + dst->alt_pkey_index = src->alt_pkey_index; + dst->en_sqd_async_notify = src->en_sqd_async_notify; + dst->sq_draining = src->sq_draining; + dst->max_rd_atomic = src->max_rd_atomic; + dst->max_dest_rd_atomic = src->max_dest_rd_atomic; + dst->min_rnr_timer = src->min_rnr_timer; + dst->port_num = src->port_num; + dst->timeout = src->timeout; + dst->retry_cnt = src->retry_cnt; + dst->rnr_retry = src->rnr_retry; + dst->alt_port_num = src->alt_port_num; + dst->alt_timeout = src->alt_timeout; + + dst->qp_attr_mask = attr_mask; + + dst->ah_attr.sl = src->ah_attr.sl; + dst->ah_attr.src_path_bits = src->ah_attr.src_path_bits; + dst->ah_attr.port_num = src->ah_attr.port_num; + dst->ah_attr.dlid = src->ah_attr.dlid; + dst->ah_attr.is_global = src->ah_attr.is_global; + memcpy(&dst->ah_attr.grh.dgid, &src->ah_attr.grh.dgid, 16); + dst->ah_attr.grh.sgid_index = src->ah_attr.grh.sgid_index; + dst->ah_attr.grh.hop_limit = src->ah_attr.grh.hop_limit; + dst->ah_attr.grh.traffic_class = src->ah_attr.grh.traffic_class; + dst->ah_attr.grh.flow_label = src->ah_attr.grh.flow_label; +} + +int bnxt_re_dv_query_qp(void *qp_handle, struct ib_uverbs_qp_attr *qp_attr) +{ + struct ibv_qp *ibvqp = qp_handle; + struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp); + struct ibv_qp_init_attr init_attr = {}; + struct ibv_qp_attr attr = {}; + struct ibv_query_qp cmd; + int rc; + + bnxt_trace_dv(NULL, "%s: handle: 0x%x\n", __func__, ibvqp->handle); + rc = ibv_cmd_query_qp(ibvqp, &attr, qp_attr->qp_attr_mask, &init_attr, + &cmd, sizeof(cmd)); + if (!rc) { + qp->qpst = ibvqp->state; + bnxt_re_dv_copy_to_uattr(qp_attr, &attr, qp_attr->qp_attr_mask); + } + return rc; +} + +int bnxt_re_dv_modify_qp(struct ibv_qp *ibvqp, struct ibv_qp_attr *attr, int attr_mask) +{ + struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp); + struct ibv_modify_qp cmd = {}; + int rc; + + bnxt_trace_dv(NULL, "%s: handle: 0x%x\n", __func__, ibvqp->handle); + rc = ibv_cmd_modify_qp(ibvqp, attr, attr_mask, &cmd, sizeof(cmd)); + if (rc) { + fprintf(stderr, "DV Modify QP error: %d\n", rc); + return rc; + } + + if (attr_mask & IBV_QP_SQ_PSN) + qp->sq_psn = attr->sq_psn; + if (attr_mask & IBV_QP_PATH_MTU) + qp->mtu = (0x80 << attr->path_mtu); + return rc; +} diff --git a/providers/bnxt_re/main.h b/providers/bnxt_re/main.h index 7e123df94..2bdbfc926 100644 --- a/providers/bnxt_re/main.h +++ b/providers/bnxt_re/main.h @@ -91,6 +91,11 @@ struct bnxt_re_pd { uint32_t pdid; }; +enum bnxt_dv_cq_flags { + BNXT_DV_CQ_FLAGS_NONE = 0, + BNXT_DV_CQ_FLAGS_VALID = 0x1, +}; + struct bnxt_re_cq { struct ibv_cq ibvcq; uint32_t cqid; @@ -111,6 +116,8 @@ struct bnxt_re_cq { uint8_t resize_tog; bool deffered_db_sup; uint32_t hw_cqes; + struct bnxt_re_dv_umem_internal *cq_umem; + int dv_cq_flags; }; struct bnxt_re_push_buffer { @@ -235,6 +242,9 @@ struct bnxt_re_qp { uint8_t qptyp; struct bnxt_re_mem *mem; struct bnxt_re_wr_send_qp wr_sq; + struct bnxt_re_pd *re_pd; + struct bnxt_re_dpi dv_dpi; + uint32_t qp_handle; }; struct bnxt_re_mr { diff --git a/providers/bnxt_re/memory.c b/providers/bnxt_re/memory.c index 30d972fea..9cf336cf7 100644 --- a/providers/bnxt_re/memory.c +++ b/providers/bnxt_re/memory.c @@ -46,6 +46,9 @@ void bnxt_re_free_mem(struct bnxt_re_mem *mem) { + if (!mem) + return; + if (mem->va_head) { ibv_dofork_range(mem->va_head, mem->size); munmap(mem->va_head, mem->size); diff --git a/providers/bnxt_re/memory.h b/providers/bnxt_re/memory.h index 7412e7353..1009b1dae 100644 --- a/providers/bnxt_re/memory.h +++ b/providers/bnxt_re/memory.h @@ -97,6 +97,10 @@ struct bnxt_re_queue { uint32_t old_head; }; +static inline unsigned long get_aligned(uint32_t size, uint32_t al_size) +{ + return (unsigned long)(size + al_size - 1) & ~(al_size - 1); +} /* Basic queue operation */ static inline void *bnxt_re_get_hwqe(struct bnxt_re_queue *que, uint32_t idx) diff --git a/providers/bnxt_re/verbs.c b/providers/bnxt_re/verbs.c index 051c80b7f..7631b2611 100644 --- a/providers/bnxt_re/verbs.c +++ b/providers/bnxt_re/verbs.c @@ -54,6 +54,7 @@ #include "main.h" #include "verbs.h" +#include "bnxt_re_dv.h" static int bnxt_re_poll_one(struct bnxt_re_cq *cq, int nwc, struct ibv_wc *wc, uint32_t *resize); @@ -1080,7 +1081,7 @@ int bnxt_re_poll_cq(struct ibv_cq *ibvcq, int nwc, struct ibv_wc *wc) return dqed; } -static void bnxt_re_cleanup_cq(struct bnxt_re_qp *qp, struct bnxt_re_cq *cq) +void bnxt_re_cleanup_cq(struct bnxt_re_qp *qp, struct bnxt_re_cq *cq) { struct bnxt_re_queue *que = cq->cqq; struct bnxt_re_bcqe *hdr; @@ -1089,6 +1090,9 @@ static void bnxt_re_cleanup_cq(struct bnxt_re_qp *qp, struct bnxt_re_cq *cq) void *cqe; int indx, type; + if (cq->dv_cq_flags & BNXT_DV_CQ_FLAGS_VALID) + return; + pthread_spin_lock(&que->qlock); for (indx = 0; indx < que->depth; indx++) { cqe = que->va + indx * bnxt_re_get_cqe_sz(); diff --git a/providers/bnxt_re/verbs.h b/providers/bnxt_re/verbs.h index 7927755c0..cd17c8add 100644 --- a/providers/bnxt_re/verbs.h +++ b/providers/bnxt_re/verbs.h @@ -125,6 +125,8 @@ struct ibv_flow *bnxt_re_create_flow(struct ibv_qp *qp, int bnxt_re_destroy_flow(struct ibv_flow *flow); void bnxt_re_async_event(struct ibv_context *context, struct ibv_async_event *event); +void bnxt_re_cleanup_cq(struct bnxt_re_qp *qp, + struct bnxt_re_cq *cq); static inline __le64 bnxt_re_update_msn_tbl(uint32_t st_idx, uint32_t npsn, uint32_t start_psn) {