From 4929d243c60bd9cb431310d725445f396cf3808d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 2 Nov 2022 17:53:31 +0000 Subject: [PATCH] drivers/amazon/net/efa: update to 2.1.0 Update EFA Amazon downstream driver to 2.1.0, to support new features of Elastic Fabric Adapter. In detail, copy '*.{c,h}' files from the EFA github repo's 'src/' directory and 'build/src/config.h' that auto-generated from the github repo on a machine running AL 5.15 kernel to 'drivers/amazon/net/efa/', and adds new EFA object files to build in the Makefile. [1] https://github.com/amzn/amzn-drivers/releases/tag/efa_linux_2.1.0 Signed-off-by: SeongJae Park --- drivers/amazon/net/efa/Makefile | 3 +- drivers/amazon/net/efa/config.h | 43 +-- drivers/amazon/net/efa/efa-abi.h | 4 +- drivers/amazon/net/efa/efa.h | 12 +- drivers/amazon/net/efa/efa_admin_cmds_defs.h | 6 +- drivers/amazon/net/efa/efa_com_cmd.c | 5 +- drivers/amazon/net/efa/efa_com_cmd.h | 3 +- drivers/amazon/net/efa/efa_gdr.c | 190 +++++------- drivers/amazon/net/efa/efa_gdr.h | 46 --- drivers/amazon/net/efa/efa_io_defs.h | 289 ++++++++++++++++++ drivers/amazon/net/efa/efa_main.c | 24 +- drivers/amazon/net/efa/efa_neuron.c | 176 +++++++++++ drivers/amazon/net/efa/efa_p2p.c | 121 ++++++++ drivers/amazon/net/efa/efa_p2p.h | 57 ++++ drivers/amazon/net/efa/efa_sysfs.c | 13 +- drivers/amazon/net/efa/efa_verbs.c | 302 ++++++++++++------- drivers/amazon/net/efa/kcompat.h | 56 +++- drivers/amazon/net/efa/neuron_p2p.h | 43 +++ 18 files changed, 1077 insertions(+), 316 deletions(-) delete mode 100644 drivers/amazon/net/efa/efa_gdr.h create mode 100644 drivers/amazon/net/efa/efa_io_defs.h create mode 100644 drivers/amazon/net/efa/efa_neuron.c create mode 100644 drivers/amazon/net/efa/efa_p2p.c create mode 100644 drivers/amazon/net/efa/efa_p2p.h create mode 100644 drivers/amazon/net/efa/neuron_p2p.h diff --git a/drivers/amazon/net/efa/Makefile b/drivers/amazon/net/efa/Makefile index cc40c911d8455..4399f594a93bf 100644 --- a/drivers/amazon/net/efa/Makefile +++ b/drivers/amazon/net/efa/Makefile @@ -4,7 +4,8 @@ obj-$(CONFIG_AMAZON_EFA_INFINIBAND) += efa.o -efa-y := efa_com.o efa_com_cmd.o efa_main.o efa_verbs.o efa_gdr.o +efa-y := efa_com.o efa_com_cmd.o efa_gdr.o efa_main.o efa_neuron.o efa_p2p.o +efa-y += efa_verbs.o efa-$(CONFIG_SYSFS) += efa_sysfs.o diff --git a/drivers/amazon/net/efa/config.h b/drivers/amazon/net/efa/config.h index ad4cbad1272d6..96c10dfc11d69 100644 --- a/drivers/amazon/net/efa/config.h +++ b/drivers/amazon/net/efa/config.h @@ -1,51 +1,52 @@ #define HAVE_UMEM_SCATTERLIST_IF 1 -#define HAVE_MAX_SEND_RCV_SGE 1 #define HAVE_CREATE_CQ_ATTR 1 -#define HAVE_IB_MODIFY_QP_IS_OK_FOUR_PARAMS 1 +#define HAVE_CREATE_AH_RDMA_ATTR 1 #define HAVE_DEV_PARENT 1 #define HAVE_POST_CONST_WR 1 -#define HAVE_CREATE_AH_RDMA_ATTR 1 -#define HAVE_PD_CORE_ALLOCATION 1 +#define HAVE_MAX_SEND_RCV_SGE 1 +#define HAVE_IB_MODIFY_QP_IS_OK_FOUR_PARAMS 1 #define HAVE_IB_DEV_OPS 1 +#define HAVE_PD_CORE_ALLOCATION 1 #define HAVE_UCONTEXT_CORE_ALLOCATION 1 #define HAVE_NO_KVERBS_DRIVERS 1 #define HAVE_UDATA_TO_DRV_CONTEXT 1 -#define HAVE_AH_CORE_ALLOCATION 1 #define HAVE_SAFE_IB_ALLOC_DEVICE 1 +#define HAVE_AH_CORE_ALLOCATION 1 #define HAVE_ALLOC_PD_NO_UCONTEXT 1 #define HAVE_DEREG_MR_UDATA 1 #define HAVE_DESTROY_CQ_UDATA 1 #define HAVE_DESTROY_QP_UDATA 1 -#define HAVE_IB_DEVICE_OPS_COMMON 1 +#define HAVE_IB_UMEM_FIND_SINGLE_PG_SIZE 1 #define HAVE_UPSTREAM_EFA 1 -#define HAVE_IB_QPT_DRIVER 1 -#define HAVE_KVZALLOC 1 +#define HAVE_IB_DEVICE_OPS_COMMON 1 #define HAVE_CQ_CORE_ALLOCATION 1 -#define HAVE_IB_IS_UDATA_CLEARED 1 -#define HAVE_IBDEV_PRINT 1 -#define HAVE_IB_UMEM_FIND_SINGLE_PG_SIZE 1 #define HAVE_IB_PORT_PHYS_STATE_LINK_UP 1 +#define HAVE_KVZALLOC 1 #define HAVE_IBDEV_PRINT_RATELIMITED 1 +#define HAVE_IBDEV_PRINT 1 +#define HAVE_IB_QPT_DRIVER 1 +#define HAVE_IB_IS_UDATA_CLEARED 1 #define HAVE_IB_MR_LENGTH 1 #define HAVE_PCI_VENDOR_ID_AMAZON 1 +#define HAVE_IB_UMEM_GET_NO_DMASYNC 1 +#define HAVE_CORE_MMAP_XA 1 +#define HAVE_RDMA_NODE_UNSPECIFIED 1 #define HAVE_BITFIELD_H 1 #define HAVE_IB_UMEM_GET_DEVICE_PARAM 1 -#define HAVE_ATOMIC64_FETCH_INC 1 #define HAVE_IB_ACCESS_OPTIONAL 1 -#define HAVE_RDMA_NODE_UNSPECIFIED 1 -#define HAVE_CORE_MMAP_XA 1 -#define HAVE_IB_UMEM_GET_NO_DMASYNC 1 +#define HAVE_CREATE_AH_INIT_ATTR 1 +#define HAVE_ATOMIC64_FETCH_INC 1 #define HAVE_DEALLOC_PD_UDATA_RC 1 -#define HAVE_IB_INT_DESTROY_CQ 1 #define HAVE_AH_CORE_ALLOCATION_DESTROY_RC 1 -#define HAVE_CREATE_AH_INIT_ATTR 1 +#define HAVE_IB_INT_DESTROY_CQ 1 #define HAVE_RDMA_UMEM_FOR_EACH_DMA_BLOCK 1 -#define HAVE_UVERBS_CMD_MASK_NOT_NEEDED 1 #define HAVE_IB_UMEM_NUM_DMA_BLOCKS 1 +#define HAVE_IB_REGISTER_DEVICE_DMA_DEVICE_PARAM 1 +#define HAVE_UVERBS_CMD_MASK_NOT_NEEDED 1 #define HAVE_U32_PORT 1 +#define HAVE_SPLIT_STATS_ALLOC 1 #define HAVE_SYSFS_EMIT 1 -#define HAVE_IB_REGISTER_DEVICE_DMA_DEVICE_PARAM 1 #define HAVE_XARRAY 1 -#define HAVE_SPLIT_STATS_ALLOC 1 #define HAVE_QP_CORE_ALLOCATION 1 -#define HAVE_EFA_GDR 1 \ No newline at end of file +#define HAVE_MR_DMABUF 1 +#define HAVE_EFA_P2P 1 \ No newline at end of file diff --git a/drivers/amazon/net/efa/efa-abi.h b/drivers/amazon/net/efa/efa-abi.h index 08035ccf1fff4..163ac79556d68 100644 --- a/drivers/amazon/net/efa/efa-abi.h +++ b/drivers/amazon/net/efa/efa-abi.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ /* - * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef EFA_ABI_USER_H @@ -54,6 +54,7 @@ struct efa_ibv_alloc_pd_resp { enum { EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL = 1 << 0, + EFA_CREATE_CQ_WITH_SGID = 1 << 1, }; struct efa_ibv_create_cq { @@ -118,6 +119,7 @@ enum { EFA_QUERY_DEVICE_CAPS_RDMA_READ = 1 << 0, EFA_QUERY_DEVICE_CAPS_RNR_RETRY = 1 << 1, EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS = 1 << 2, + EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID = 1 << 3, }; struct efa_ibv_ex_query_device_resp { diff --git a/drivers/amazon/net/efa/efa.h b/drivers/amazon/net/efa/efa.h index b0c8538218cb0..34ccbac76b451 100644 --- a/drivers/amazon/net/efa/efa.h +++ b/drivers/amazon/net/efa/efa.h @@ -97,9 +97,9 @@ struct efa_pd { struct efa_mr { struct ib_mr ibmr; struct ib_umem *umem; -#ifdef HAVE_EFA_GDR - struct efa_nvmem *nvmem; - u64 nvmem_ticket; +#ifdef HAVE_EFA_P2P + struct efa_p2pmem *p2pmem; + u64 p2p_ticket; #endif }; @@ -219,6 +219,12 @@ struct ib_cq *efa_kzalloc_cq(struct ib_device *ibdev, struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); +#ifdef HAVE_MR_DMABUF +struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, + u64 length, u64 virt_addr, + int fd, int access_flags, + struct ib_udata *udata); +#endif #ifdef HAVE_DEREG_MR_UDATA int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); #else diff --git a/drivers/amazon/net/efa/efa_admin_cmds_defs.h b/drivers/amazon/net/efa/efa_admin_cmds_defs.h index 0b0b93b529f37..d4b9226088bd0 100644 --- a/drivers/amazon/net/efa/efa_admin_cmds_defs.h +++ b/drivers/amazon/net/efa/efa_admin_cmds_defs.h @@ -444,7 +444,10 @@ struct efa_admin_create_cq_cmd { /* * 4:0 : cq_entry_size_words - size of CQ entry in * 32-bit words, valid values: 4, 8. - * 7:5 : reserved7 - MBZ + * 5 : set_src_addr - If set, source address will be + * filled on RX completions from unknown senders. + * Requires 8 words CQ entry size. + * 7:6 : reserved7 - MBZ */ u8 cq_caps_2; @@ -980,6 +983,7 @@ struct efa_admin_host_info { #define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5) #define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6) #define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) +#define EFA_ADMIN_CREATE_CQ_CMD_SET_SRC_ADDR_MASK BIT(5) /* create_cq_resp */ #define EFA_ADMIN_CREATE_CQ_RESP_DB_VALID_MASK BIT(0) diff --git a/drivers/amazon/net/efa/efa_com_cmd.c b/drivers/amazon/net/efa/efa_com_cmd.c index 573f12400190f..e107c354bc349 100644 --- a/drivers/amazon/net/efa/efa_com_cmd.c +++ b/drivers/amazon/net/efa/efa_com_cmd.c @@ -168,7 +168,10 @@ int efa_com_create_cq(struct efa_com_dev *edev, EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED, 1); create_cmd.eqn = params->eqn; } - + if (params->set_src_addr) { + EFA_SET(&create_cmd.cq_caps_2, + EFA_ADMIN_CREATE_CQ_CMD_SET_SRC_ADDR, 1); + } efa_com_set_dma_addr(params->dma_addr, &create_cmd.cq_ba.mem_addr_high, &create_cmd.cq_ba.mem_addr_low); diff --git a/drivers/amazon/net/efa/efa_com_cmd.h b/drivers/amazon/net/efa/efa_com_cmd.h index c33010bbf9e83..0898ad5bc3405 100644 --- a/drivers/amazon/net/efa/efa_com_cmd.h +++ b/drivers/amazon/net/efa/efa_com_cmd.h @@ -75,7 +75,8 @@ struct efa_com_create_cq_params { u16 uarn; u16 eqn; u8 entry_size_in_bytes; - bool interrupt_mode_enabled; + u8 interrupt_mode_enabled : 1; + u8 set_src_addr : 1; }; struct efa_com_create_cq_result { diff --git a/drivers/amazon/net/efa/efa_gdr.c b/drivers/amazon/net/efa/efa_gdr.c index 5bb25b695a31a..24f8a082d10d5 100644 --- a/drivers/amazon/net/efa/efa_gdr.c +++ b/drivers/amazon/net/efa/efa_gdr.c @@ -5,29 +5,41 @@ #include -#include "efa_gdr.h" +#include "efa_p2p.h" +#include "nv-p2p.h" #define GPU_PAGE_SHIFT 16 #define GPU_PAGE_SIZE BIT_ULL(GPU_PAGE_SHIFT) -static struct mutex nvmem_list_lock; -static struct list_head nvmem_list; -static atomic64_t next_nvmem_ticket; +struct efa_nvmem_ops { + int (*get_pages)(u64 p2p_token, u32 va_space, u64 virtual_address, + u64 length, struct nvidia_p2p_page_table **page_table, + void (*free_callback)(void *data), void *data); + int (*dma_map_pages)(struct pci_dev *peer, + struct nvidia_p2p_page_table *page_table, + struct nvidia_p2p_dma_mapping **dma_mapping); + int (*put_pages)(u64 p2p_token, u32 va_space, u64 virtual_address, + struct nvidia_p2p_page_table *page_table); + int (*dma_unmap_pages)(struct pci_dev *peer, + struct nvidia_p2p_page_table *page_table, + struct nvidia_p2p_dma_mapping *dma_mapping); +}; + +struct efa_nvmem { + struct efa_p2pmem p2pmem; + struct efa_nvmem_ops ops; + struct nvidia_p2p_page_table *pgtbl; + struct nvidia_p2p_dma_mapping *dma_mapping; + u64 virt_start; +}; -void nvmem_init(void) +static unsigned int nvmem_pgsz(struct efa_dev *dev, struct efa_p2pmem *p2pmem) { - mutex_init(&nvmem_list_lock); - INIT_LIST_HEAD(&nvmem_list); - /* - * Ideally, first ticket would be zero, but that would make callback - * data NULL which is invalid. - */ - atomic64_set(&next_nvmem_ticket, 1); -} + struct efa_nvmem *nvmem; -static int nvmem_pgsz(enum nvidia_p2p_page_size_type pgszt) -{ - switch (pgszt) { + nvmem = container_of(p2pmem, struct efa_nvmem, p2pmem); + + switch (nvmem->pgtbl->page_size) { case NVIDIA_P2P_PAGE_SIZE_4KB: return SZ_4K; case NVIDIA_P2P_PAGE_SIZE_64KB: @@ -39,19 +51,6 @@ static int nvmem_pgsz(enum nvidia_p2p_page_size_type pgszt) } } -static struct efa_nvmem *ticket_to_nvmem(u64 ticket) -{ - struct efa_nvmem *nvmem; - - lockdep_assert_held(&nvmem_list_lock); - list_for_each_entry(nvmem, &nvmem_list, list) { - if (nvmem->ticket == ticket) - return nvmem; - } - - return NULL; -} - static int nvmem_get_fp(struct efa_nvmem *nvmem) { nvmem->ops.get_pages = symbol_get(nvidia_p2p_get_pages); @@ -90,71 +89,19 @@ static void nvmem_put_fp(void) symbol_put(nvidia_p2p_get_pages); } -static void nvmem_release(struct efa_dev *dev, struct efa_nvmem *nvmem) -{ - if (nvmem->dma_mapping) - nvmem->ops.dma_unmap_pages(dev->pdev, nvmem->pgtbl, - nvmem->dma_mapping); - - if (nvmem->pgtbl) - nvmem->ops.put_pages(0, 0, nvmem->virt_start, nvmem->pgtbl); -} - -int nvmem_put(u64 ticket, bool in_cb) -{ - struct efa_com_dereg_mr_params params = {}; - struct efa_nvmem *nvmem; - struct efa_dev *dev; - int err; - - mutex_lock(&nvmem_list_lock); - nvmem = ticket_to_nvmem(ticket); - if (!nvmem) { - pr_debug("Ticket %llu not found in the nvmem list\n", ticket); - mutex_unlock(&nvmem_list_lock); - return 0; - } - - dev = nvmem->dev; - if (nvmem->needs_dereg) { - params.l_key = nvmem->lkey; - err = efa_com_dereg_mr(&dev->edev, ¶ms); - if (err) { - mutex_unlock(&nvmem_list_lock); - return err; - } - nvmem->needs_dereg = false; - } - - if (in_cb) { - nvmem->pgtbl = NULL; - nvmem->dma_mapping = NULL; - mutex_unlock(&nvmem_list_lock); - return 0; - } - - list_del(&nvmem->list); - mutex_unlock(&nvmem_list_lock); - nvmem_release(dev, nvmem); - nvmem_put_fp(); - kfree(nvmem); - - return 0; -} - static void nvmem_free_cb(void *data) { pr_debug("Free callback ticket %llu\n", (u64)data); - nvmem_put((u64)data, true); + efa_p2p_put((u64)data, true); } static int nvmem_get_pages(struct efa_dev *dev, struct efa_nvmem *nvmem, - u64 addr, u64 size) + u64 addr, u64 size, u64 ticket) { int err; err = nvmem->ops.get_pages(0, 0, addr, size, &nvmem->pgtbl, - nvmem_free_cb, (void *)nvmem->ticket); + nvmem_free_cb, (void *)ticket); if (err) { ibdev_dbg(&dev->ibdev, "nvidia_p2p_get_pages failed %d\n", err); return err; @@ -195,8 +142,8 @@ static int nvmem_dma_map(struct efa_dev *dev, struct efa_nvmem *nvmem) return 0; } -struct efa_nvmem *nvmem_get(struct efa_dev *dev, struct efa_mr *mr, u64 start, - u64 length, unsigned int *pgsz) +static struct efa_p2pmem *nvmem_get(struct efa_dev *dev, u64 ticket, u64 start, + u64 length) { struct efa_nvmem *nvmem; u64 virt_start; @@ -208,10 +155,6 @@ struct efa_nvmem *nvmem_get(struct efa_dev *dev, struct efa_mr *mr, u64 start, if (!nvmem) return NULL; - nvmem->ticket = atomic64_fetch_inc(&next_nvmem_ticket); - mr->nvmem_ticket = nvmem->ticket; - nvmem->dev = dev; - virt_start = ALIGN_DOWN(start, GPU_PAGE_SIZE); virt_end = ALIGN(start + length, GPU_PAGE_SIZE); pinsz = virt_end - virt_start; @@ -222,28 +165,17 @@ struct efa_nvmem *nvmem_get(struct efa_dev *dev, struct efa_mr *mr, u64 start, /* Nvidia module is not loaded */ goto err_free; - err = nvmem_get_pages(dev, nvmem, virt_start, pinsz); - if (err) { - /* Most likely cpu pages */ + err = nvmem_get_pages(dev, nvmem, virt_start, pinsz, ticket); + if (err) + /* Most likely not our pages */ goto err_put_fp; - } err = nvmem_dma_map(dev, nvmem); if (err) goto err_put; - *pgsz = nvmem_pgsz(nvmem->pgtbl->page_size); - if (!*pgsz) - goto err_unmap; + return &nvmem->p2pmem; - mutex_lock(&nvmem_list_lock); - list_add(&nvmem->list, &nvmem_list); - mutex_unlock(&nvmem_list_lock); - - return nvmem; - -err_unmap: - nvmem->ops.dma_unmap_pages(dev->pdev, nvmem->pgtbl, nvmem->dma_mapping); err_put: nvmem->ops.put_pages(0, 0, virt_start, nvmem->pgtbl); err_put_fp: @@ -253,18 +185,39 @@ struct efa_nvmem *nvmem_get(struct efa_dev *dev, struct efa_mr *mr, u64 start, return NULL; } -int nvmem_to_page_list(struct efa_dev *dev, struct efa_nvmem *nvmem, - u64 *page_list) +static int nvmem_to_page_list(struct efa_dev *dev, struct efa_p2pmem *p2pmem, + u64 *page_list) { - struct nvidia_p2p_dma_mapping *dma_mapping = nvmem->dma_mapping; + struct nvidia_p2p_dma_mapping *dma_mapping; + struct efa_nvmem *nvmem; int i; + nvmem = container_of(p2pmem, struct efa_nvmem, p2pmem); + dma_mapping = nvmem->dma_mapping; + for (i = 0; i < dma_mapping->entries; i++) page_list[i] = dma_mapping->dma_addresses[i]; return 0; } +static void nvmem_release(struct efa_dev *dev, struct efa_p2pmem *p2pmem, + bool in_cb) +{ + struct efa_nvmem *nvmem; + + nvmem = container_of(p2pmem, struct efa_nvmem, p2pmem); + + if (!in_cb) { + nvmem->ops.dma_unmap_pages(dev->pdev, nvmem->pgtbl, + nvmem->dma_mapping); + nvmem->ops.put_pages(0, 0, nvmem->virt_start, nvmem->pgtbl); + } + + nvmem_put_fp(); + kfree(nvmem); +} + bool nvmem_is_supported(void) { struct efa_nvmem dummynv = {}; @@ -275,3 +228,24 @@ bool nvmem_is_supported(void) return true; } + +struct nvmem_provider { + struct efa_p2p_provider p2p; +}; + +static const struct nvmem_provider prov = { + .p2p = { + .ops = { + .try_get = nvmem_get, + .to_page_list = nvmem_to_page_list, + .release = nvmem_release, + .get_page_size = nvmem_pgsz, + }, + .type = EFA_P2P_PROVIDER_NVMEM, + }, +}; + +const struct efa_p2p_provider *nvmem_get_provider(void) +{ + return &prov.p2p; +} diff --git a/drivers/amazon/net/efa/efa_gdr.h b/drivers/amazon/net/efa/efa_gdr.h deleted file mode 100644 index d649a3504cd0e..0000000000000 --- a/drivers/amazon/net/efa/efa_gdr.h +++ /dev/null @@ -1,46 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ -/* - * Copyright 2019-2021 Amazon.com, Inc. or its affiliates. All rights reserved. - */ - -#ifndef _EFA_GDR_H_ -#define _EFA_GDR_H_ - -#include "efa.h" -#include "nv-p2p.h" - -struct efa_nvmem_ops { - int (*get_pages)(u64 p2p_token, u32 va_space, u64 virtual_address, - u64 length, struct nvidia_p2p_page_table **page_table, - void (*free_callback)(void *data), void *data); - int (*dma_map_pages)(struct pci_dev *peer, - struct nvidia_p2p_page_table *page_table, - struct nvidia_p2p_dma_mapping **dma_mapping); - int (*put_pages)(u64 p2p_token, u32 va_space, u64 virtual_address, - struct nvidia_p2p_page_table *page_table); - int (*dma_unmap_pages)(struct pci_dev *peer, - struct nvidia_p2p_page_table *page_table, - struct nvidia_p2p_dma_mapping *dma_mapping); -}; - -struct efa_nvmem { - struct efa_dev *dev; - struct efa_nvmem_ops ops; - struct nvidia_p2p_page_table *pgtbl; - struct nvidia_p2p_dma_mapping *dma_mapping; - u64 virt_start; - u64 ticket; - u32 lkey; - bool needs_dereg; - struct list_head list; /* member of nvmem_list */ -}; - -void nvmem_init(void); -struct efa_nvmem *nvmem_get(struct efa_dev *dev, struct efa_mr *mr, u64 start, - u64 length, unsigned int *pgsz); -int nvmem_to_page_list(struct efa_dev *dev, struct efa_nvmem *nvmem, - u64 *page_list); -int nvmem_put(u64 ticket, bool in_cb); -bool nvmem_is_supported(void); - -#endif /* _EFA_GDR_H_ */ diff --git a/drivers/amazon/net/efa/efa_io_defs.h b/drivers/amazon/net/efa/efa_io_defs.h new file mode 100644 index 0000000000000..17ba8984b11e9 --- /dev/null +++ b/drivers/amazon/net/efa/efa_io_defs.h @@ -0,0 +1,289 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef _EFA_IO_H_ +#define _EFA_IO_H_ + +#define EFA_IO_TX_DESC_NUM_BUFS 2 +#define EFA_IO_TX_DESC_NUM_RDMA_BUFS 1 +#define EFA_IO_TX_DESC_INLINE_MAX_SIZE 32 +#define EFA_IO_TX_DESC_IMM_DATA_SIZE 4 + +enum efa_io_queue_type { + /* send queue (of a QP) */ + EFA_IO_SEND_QUEUE = 1, + /* recv queue (of a QP) */ + EFA_IO_RECV_QUEUE = 2, +}; + +enum efa_io_send_op_type { + /* send message */ + EFA_IO_SEND = 0, + /* RDMA read */ + EFA_IO_RDMA_READ = 1, +}; + +enum efa_io_comp_status { + /* Successful completion */ + EFA_IO_COMP_STATUS_OK = 0, + /* Flushed during QP destroy */ + EFA_IO_COMP_STATUS_FLUSHED = 1, + /* Internal QP error */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_QP_INTERNAL_ERROR = 2, + /* Bad operation type */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_OP_TYPE = 3, + /* Bad AH */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_AH = 4, + /* LKEY not registered or does not match IOVA */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_LKEY = 5, + /* Message too long */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_BAD_LENGTH = 6, + /* Destination ENI is down or does not run EFA */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_ADDRESS = 7, + /* Connection was reset by remote side */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_ABORT = 8, + /* Bad dest QP number (QP does not exist or is in error state) */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_DEST_QPN = 9, + /* Destination resource not ready (no WQEs posted on RQ) */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_RNR = 10, + /* Receiver SGL too short */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_LENGTH = 11, + /* Unexpected status returned by responder */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_STATUS = 12, + /* Unresponsive remote - detected locally */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_UNRESP_REMOTE = 13, +}; + +struct efa_io_tx_meta_desc { + /* Verbs-generated Request ID */ + u16 req_id; + + /* + * control flags + * 3:0 : op_type - operation type: send/rdma/fast mem + * ops/etc + * 4 : has_imm - immediate_data field carries valid + * data. + * 5 : inline_msg - inline mode - inline message data + * follows this descriptor (no buffer descriptors). + * Note that it is different from immediate data + * 6 : meta_extension - Extended metadata. MBZ + * 7 : meta_desc - Indicates metadata descriptor. + * Must be set. + */ + u8 ctrl1; + + /* + * control flags + * 0 : phase + * 1 : reserved25 - MBZ + * 2 : first - Indicates first descriptor in + * transaction. Must be set. + * 3 : last - Indicates last descriptor in + * transaction. Must be set. + * 4 : comp_req - Indicates whether completion should + * be posted, after packet is transmitted. Valid only + * for the first descriptor + * 7:5 : reserved29 - MBZ + */ + u8 ctrl2; + + u16 dest_qp_num; + + /* + * If inline_msg bit is set, length of inline message in bytes, + * otherwise length of SGL (number of buffers). + */ + u16 length; + + /* + * immediate data: if has_imm is set, then this field is included + * within Tx message and reported in remote Rx completion. + */ + u32 immediate_data; + + u16 ah; + + u16 reserved; + + /* Queue key */ + u32 qkey; + + u8 reserved2[12]; +}; + +/* + * Tx queue buffer descriptor, for any transport type. Preceded by metadata + * descriptor. + */ +struct efa_io_tx_buf_desc { + /* length in bytes */ + u32 length; + + /* + * 23:0 : lkey - local memory translation key + * 31:24 : reserved - MBZ + */ + u32 lkey; + + /* Buffer address bits[31:0] */ + u32 buf_addr_lo; + + /* Buffer address bits[63:32] */ + u32 buf_addr_hi; +}; + +struct efa_io_remote_mem_addr { + /* length in bytes */ + u32 length; + + /* remote memory translation key */ + u32 rkey; + + /* Buffer address bits[31:0] */ + u32 buf_addr_lo; + + /* Buffer address bits[63:32] */ + u32 buf_addr_hi; +}; + +struct efa_io_rdma_req { + /* Remote memory address */ + struct efa_io_remote_mem_addr remote_mem; + + /* Local memory address */ + struct efa_io_tx_buf_desc local_mem[1]; +}; + +/* + * Tx WQE, composed of tx meta descriptors followed by either tx buffer + * descriptors or inline data + */ +struct efa_io_tx_wqe { + /* TX meta */ + struct efa_io_tx_meta_desc meta; + + union { + /* Send buffer descriptors */ + struct efa_io_tx_buf_desc sgl[2]; + + u8 inline_data[32]; + + /* RDMA local and remote memory addresses */ + struct efa_io_rdma_req rdma_req; + } data; +}; + +/* + * Rx buffer descriptor; RX WQE is composed of one or more RX buffer + * descriptors. + */ +struct efa_io_rx_desc { + /* Buffer address bits[31:0] */ + u32 buf_addr_lo; + + /* Buffer Pointer[63:32] */ + u32 buf_addr_hi; + + /* Verbs-generated request id. */ + u16 req_id; + + /* Length in bytes. */ + u16 length; + + /* + * LKey and control flags + * 23:0 : lkey + * 29:24 : reserved - MBZ + * 30 : first - Indicates first descriptor in WQE + * 31 : last - Indicates last descriptor in WQE + */ + u32 lkey_ctrl; +}; + +/* Common IO completion descriptor */ +struct efa_io_cdesc_common { + /* + * verbs-generated request ID, as provided in the completed tx or rx + * descriptor. + */ + u16 req_id; + + u8 status; + + /* + * flags + * 0 : phase - Phase bit + * 2:1 : q_type - enum efa_io_queue_type: send/recv + * 3 : has_imm - indicates that immediate data is + * present - for RX completions only + * 7:4 : reserved28 - MBZ + */ + u8 flags; + + /* local QP number */ + u16 qp_num; + + /* Transferred length */ + u16 length; +}; + +/* Tx completion descriptor */ +struct efa_io_tx_cdesc { + /* Common completion info */ + struct efa_io_cdesc_common common; +}; + +/* Rx Completion Descriptor */ +struct efa_io_rx_cdesc { + /* Common completion info */ + struct efa_io_cdesc_common common; + + /* Remote Address Handle FW index, 0xFFFF indicates invalid ah */ + u16 ah; + + u16 src_qp_num; + + /* Immediate data */ + u32 imm; +}; + +/* Extended Rx Completion Descriptor */ +struct efa_io_rx_cdesc_ex { + /* Base RX completion info */ + struct efa_io_rx_cdesc rx_cdesc_base; + + /* + * Valid only in case of unknown AH (0xFFFF) and CQ set_src_addr is + * enabled. + */ + u8 src_addr[16]; +}; + +/* tx_meta_desc */ +#define EFA_IO_TX_META_DESC_OP_TYPE_MASK GENMASK(3, 0) +#define EFA_IO_TX_META_DESC_HAS_IMM_MASK BIT(4) +#define EFA_IO_TX_META_DESC_INLINE_MSG_MASK BIT(5) +#define EFA_IO_TX_META_DESC_META_EXTENSION_MASK BIT(6) +#define EFA_IO_TX_META_DESC_META_DESC_MASK BIT(7) +#define EFA_IO_TX_META_DESC_PHASE_MASK BIT(0) +#define EFA_IO_TX_META_DESC_FIRST_MASK BIT(2) +#define EFA_IO_TX_META_DESC_LAST_MASK BIT(3) +#define EFA_IO_TX_META_DESC_COMP_REQ_MASK BIT(4) + +/* tx_buf_desc */ +#define EFA_IO_TX_BUF_DESC_LKEY_MASK GENMASK(23, 0) + +/* rx_desc */ +#define EFA_IO_RX_DESC_LKEY_MASK GENMASK(23, 0) +#define EFA_IO_RX_DESC_FIRST_MASK BIT(30) +#define EFA_IO_RX_DESC_LAST_MASK BIT(31) + +/* cdesc_common */ +#define EFA_IO_CDESC_COMMON_PHASE_MASK BIT(0) +#define EFA_IO_CDESC_COMMON_Q_TYPE_MASK GENMASK(2, 1) +#define EFA_IO_CDESC_COMMON_HAS_IMM_MASK BIT(3) + +#endif /* _EFA_IO_H_ */ diff --git a/drivers/amazon/net/efa/efa_main.c b/drivers/amazon/net/efa/efa_main.c index 9aea1490561f5..34a8e13273556 100644 --- a/drivers/amazon/net/efa/efa_main.c +++ b/drivers/amazon/net/efa/efa_main.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include @@ -13,8 +13,8 @@ #include "efa.h" #include "efa_sysfs.h" -#ifdef HAVE_EFA_GDR -#include "efa_gdr.h" +#ifdef HAVE_EFA_P2P +#include "efa_p2p.h" #endif #ifndef HAVE_PCI_VENDOR_ID_AMAZON @@ -22,16 +22,18 @@ #endif #define PCI_DEV_ID_EFA0_VF 0xefa0 #define PCI_DEV_ID_EFA1_VF 0xefa1 +#define PCI_DEV_ID_EFA2_VF 0xefa2 static const struct pci_device_id efa_pci_tbl[] = { { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA0_VF) }, { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA1_VF) }, + { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA2_VF) }, { } }; -#define DRV_MODULE_VER_MAJOR 1 -#define DRV_MODULE_VER_MINOR 14 -#define DRV_MODULE_VER_SUBMINOR 2 +#define DRV_MODULE_VER_MAJOR 2 +#define DRV_MODULE_VER_MINOR 1 +#define DRV_MODULE_VER_SUBMINOR 0 #ifndef DRV_MODULE_VERSION #define DRV_MODULE_VERSION \ @@ -307,7 +309,7 @@ static void efa_set_host_info(struct efa_dev *dev) EFA_COMMON_SPEC_VERSION_MAJOR); EFA_SET(&hinf->spec_ver, EFA_ADMIN_HOST_INFO_SPEC_MINOR, EFA_COMMON_SPEC_VERSION_MINOR); -#ifdef HAVE_EFA_GDR +#ifdef HAVE_EFA_P2P EFA_SET(&hinf->flags, EFA_ADMIN_HOST_INFO_GDR, 1); #endif @@ -455,6 +457,9 @@ static const struct ib_device_ops efa_dev_ops = { .query_port = efa_query_port, .query_qp = efa_query_qp, .reg_user_mr = efa_reg_mr, +#ifdef HAVE_MR_DMABUF + .reg_user_mr_dmabuf = efa_reg_user_mr_dmabuf, +#endif #ifndef HAVE_NO_KVERBS_DRIVERS .req_notify_cq = efa_req_notify_cq, #endif @@ -683,6 +688,7 @@ static int efa_device_init(struct efa_com_dev *edev, struct pci_dev *pdev) dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", err); return err; } + dma_set_max_seg_size(&pdev->dev, UINT_MAX); return 0; } @@ -867,8 +873,8 @@ static int __init efa_init(void) return err; } -#ifdef HAVE_EFA_GDR - nvmem_init(); +#ifdef HAVE_EFA_P2P + efa_p2p_init(); #endif return 0; diff --git a/drivers/amazon/net/efa/efa_neuron.c b/drivers/amazon/net/efa/efa_neuron.c new file mode 100644 index 0000000000000..ec2644e3079c4 --- /dev/null +++ b/drivers/amazon/net/efa/efa_neuron.c @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright 2021 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#include + +#include "efa_p2p.h" +#include "neuron_p2p.h" + +#define NEURON_PAGE_SHIFT 12 +#define NEURON_PAGE_SIZE BIT_ULL(NEURON_PAGE_SHIFT) + +struct efa_neuronmem_ops { + int (*register_va)(u64 virtual_address, u64 length, + struct neuron_p2p_va_info **vainfo, + void (*free_callback)(void *data), + void *data); + int (*unregister_va)(struct neuron_p2p_va_info *vainfo); +}; + +struct efa_neuronmem { + struct efa_p2pmem p2pmem; + struct efa_neuronmem_ops ops; + struct neuron_p2p_va_info *va_info; + u64 virt_start; +}; + +static unsigned int neuronmem_pgsz(struct efa_dev *dev, + struct efa_p2pmem *p2pmem) +{ + struct efa_neuronmem *neuronmem; + + neuronmem = container_of(p2pmem, struct efa_neuronmem, p2pmem); + return BIT(neuronmem->va_info->shift_page_size); +} + +static int neuronmem_get_fp(struct efa_neuronmem *neuronmem) +{ + neuronmem->ops.register_va = symbol_get(neuron_p2p_register_va); + if (!neuronmem->ops.register_va) + goto err_out; + + neuronmem->ops.unregister_va = symbol_get(neuron_p2p_unregister_va); + if (!neuronmem->ops.unregister_va) + goto err_put_register_va; + + return 0; + +err_put_register_va: + symbol_put(neuron_p2p_register_va); +err_out: + return -EINVAL; +} + +static void neuronmem_put_fp(void) +{ + symbol_put(neuron_p2p_unregister_va); + symbol_put(neuron_p2p_register_va); +} + +static void neuronmem_free_cb(void *data) +{ + pr_debug("Free callback ticket %llu\n", (u64)data); + efa_p2p_put((u64)data, true); +} + +static int neuronmem_register_va(struct efa_dev *dev, struct efa_neuronmem *neuronmem, + u64 addr, u64 size, u64 ticket) +{ + int err; + + err = neuronmem->ops.register_va(addr, size, &neuronmem->va_info, + neuronmem_free_cb, (void *)ticket); + if (err) { + ibdev_dbg(&dev->ibdev, "neuron_p2p_register_va failed %d\n", err); + return err; + } + + return 0; +} + +static struct efa_p2pmem *neuronmem_get(struct efa_dev *dev, u64 ticket, u64 start, + u64 length) +{ + struct efa_neuronmem *neuronmem; + u64 virt_start; + u64 virt_end; + u64 pinsz; + int err; + + neuronmem = kzalloc(sizeof(*neuronmem), GFP_KERNEL); + if (!neuronmem) + return NULL; + + virt_start = ALIGN_DOWN(start, NEURON_PAGE_SIZE); + virt_end = ALIGN(start + length, NEURON_PAGE_SIZE); + pinsz = virt_end - virt_start; + neuronmem->virt_start = virt_start; + + err = neuronmem_get_fp(neuronmem); + if (err) + /* Neuron module is not loaded */ + goto err_free; + + err = neuronmem_register_va(dev, neuronmem, virt_start, pinsz, ticket); + if (err) + /* Most likely not our pages */ + goto err_put_fp; + + return &neuronmem->p2pmem; + +err_put_fp: + neuronmem_put_fp(); +err_free: + kfree(neuronmem); + return NULL; +} + +static int neuronmem_to_page_list(struct efa_dev *dev, struct efa_p2pmem *p2pmem, + u64 *page_list) +{ + struct neuron_p2p_page_info *pg_info; + struct neuron_p2p_va_info *va_info; + struct efa_neuronmem *neuronmem; + int ent_idx, pa_idx; + int pg_idx = 0; + u64 pa; + + neuronmem = container_of(p2pmem, struct efa_neuronmem, p2pmem); + va_info = neuronmem->va_info; + + for (ent_idx = 0; ent_idx < va_info->entries; ent_idx++) { + pg_info = va_info->page_info + ent_idx; + pa = pg_info->physical_address; + for (pa_idx = 0; pa_idx < pg_info->page_count; pa_idx++) { + page_list[pg_idx++] = pa; + pa += BIT(va_info->shift_page_size); + } + } + + return 0; +} + +static void neuronmem_release(struct efa_dev *dev, struct efa_p2pmem *p2pmem, + bool in_cb) +{ + struct efa_neuronmem *neuronmem; + + neuronmem = container_of(p2pmem, struct efa_neuronmem, p2pmem); + + neuronmem->ops.unregister_va(neuronmem->va_info); + neuronmem_put_fp(); + kfree(neuronmem); +} + +struct neuronmem_provider { + struct efa_p2p_provider p2p; +}; + +static const struct neuronmem_provider prov = { + .p2p = { + .ops = { + .try_get = neuronmem_get, + .to_page_list = neuronmem_to_page_list, + .release = neuronmem_release, + .get_page_size = neuronmem_pgsz, + }, + .type = EFA_P2P_PROVIDER_NEURON, + }, +}; + +const struct efa_p2p_provider *neuronmem_get_provider(void) +{ + return &prov.p2p; +} diff --git a/drivers/amazon/net/efa/efa_p2p.c b/drivers/amazon/net/efa/efa_p2p.c new file mode 100644 index 0000000000000..9daf101288f43 --- /dev/null +++ b/drivers/amazon/net/efa/efa_p2p.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright 2019-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#include "efa_p2p.h" + +static struct mutex p2p_list_lock; +static struct list_head p2p_list; +static atomic64_t next_p2p_ticket; + +static const struct efa_p2p_provider *prov_arr[EFA_P2P_PROVIDER_MAX]; + +/* Register all providers here */ +static void p2p_providers_init(void) +{ + prov_arr[EFA_P2P_PROVIDER_NVMEM] = nvmem_get_provider(); + prov_arr[EFA_P2P_PROVIDER_NEURON] = neuronmem_get_provider(); +} + +void efa_p2p_init(void) +{ + mutex_init(&p2p_list_lock); + INIT_LIST_HEAD(&p2p_list); + /* + * Ideally, first ticket would be zero, but that would make callback + * data NULL which is invalid. + */ + atomic64_set(&next_p2p_ticket, 1); + + p2p_providers_init(); +} + +static struct efa_p2pmem *ticket_to_p2p(u64 ticket) +{ + struct efa_p2pmem *p2pmem; + + lockdep_assert_held(&p2p_list_lock); + list_for_each_entry(p2pmem, &p2p_list, list) { + if (p2pmem->ticket == ticket) + return p2pmem; + } + + return NULL; +} + +int efa_p2p_put(u64 ticket, bool in_cb) +{ + struct efa_com_dereg_mr_params params = {}; + struct efa_p2pmem *p2pmem; + struct efa_dev *dev; + int err; + + mutex_lock(&p2p_list_lock); + p2pmem = ticket_to_p2p(ticket); + if (!p2pmem) { + pr_debug("Ticket %llu not found in the p2pmem list\n", ticket); + mutex_unlock(&p2p_list_lock); + return 0; + } + + dev = p2pmem->dev; + if (p2pmem->needs_dereg) { + params.l_key = p2pmem->lkey; + err = efa_com_dereg_mr(&dev->edev, ¶ms); + if (err) { + mutex_unlock(&p2p_list_lock); + return err; + } + p2pmem->needs_dereg = false; + } + + list_del(&p2pmem->list); + mutex_unlock(&p2p_list_lock); + p2pmem->prov->ops.release(dev, p2pmem, in_cb); + + return 0; +} + +struct efa_p2pmem *efa_p2p_get(struct efa_dev *dev, struct efa_mr *mr, u64 start, + u64 length) +{ + const struct efa_p2p_provider *prov; + struct efa_p2pmem *p2pmem; + u64 ticket; + int i; + + ticket = atomic64_fetch_inc(&next_p2p_ticket); + for (i = 0; i < EFA_P2P_PROVIDER_MAX; i++) { + prov = prov_arr[i]; + p2pmem = prov->ops.try_get(dev, ticket, start, length); + if (p2pmem) + break; + } + if (!p2pmem) + /* No provider was found, most likely cpu pages */ + return NULL; + + p2pmem->dev = dev; + p2pmem->ticket = ticket; + p2pmem->prov = prov; + mr->p2p_ticket = p2pmem->ticket; + + mutex_lock(&p2p_list_lock); + list_add(&p2pmem->list, &p2p_list); + mutex_unlock(&p2p_list_lock); + + return p2pmem; +} + +int efa_p2p_to_page_list(struct efa_dev *dev, struct efa_p2pmem *p2pmem, + u64 *page_list) +{ + return p2pmem->prov->ops.to_page_list(dev, p2pmem, page_list); +} + +unsigned int efa_p2p_get_page_size(struct efa_dev *dev, + struct efa_p2pmem *p2pmem) +{ + return p2pmem->prov->ops.get_page_size(dev, p2pmem); +} diff --git a/drivers/amazon/net/efa/efa_p2p.h b/drivers/amazon/net/efa/efa_p2p.h new file mode 100644 index 0000000000000..89ee7a9935c11 --- /dev/null +++ b/drivers/amazon/net/efa/efa_p2p.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright 2019-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef _EFA_P2P_H_ +#define _EFA_P2P_H_ + +#include "efa.h" + +struct efa_p2p_ops { + struct efa_p2pmem *(*try_get)(struct efa_dev *dev, u64 ticket, u64 start, + u64 length); + int (*to_page_list)(struct efa_dev *dev, struct efa_p2pmem *p2pmem, + u64 *page_list); + void (*release)(struct efa_dev *dev, struct efa_p2pmem *p2pmem, + bool in_cb); + unsigned int (*get_page_size)(struct efa_dev *dev, + struct efa_p2pmem *p2pmem); +}; + +enum efa_p2p_prov { + EFA_P2P_PROVIDER_NVMEM, + EFA_P2P_PROVIDER_NEURON, + EFA_P2P_PROVIDER_MAX, +}; + +struct efa_p2p_provider { + const struct efa_p2p_ops ops; + enum efa_p2p_prov type; +}; + +struct efa_p2pmem { + struct efa_dev *dev; + const struct efa_p2p_provider *prov; + u64 ticket; + u32 lkey; + bool needs_dereg; + struct list_head list; /* member of efa_p2p_list */ +}; + +void efa_p2p_init(void); +struct efa_p2pmem *efa_p2p_get(struct efa_dev *dev, struct efa_mr *mr, u64 start, + u64 length); +unsigned int efa_p2p_get_page_size(struct efa_dev *dev, + struct efa_p2pmem *p2pmem); +int efa_p2p_to_page_list(struct efa_dev *dev, struct efa_p2pmem *p2pmem, + u64 *page_list); +int efa_p2p_put(u64 ticket, bool in_cb); + +/* Provider specific stuff go here */ +const struct efa_p2p_provider *nvmem_get_provider(void); +bool nvmem_is_supported(void); + +const struct efa_p2p_provider *neuronmem_get_provider(void); + +#endif /* _EFA_P2P_H_ */ diff --git a/drivers/amazon/net/efa/efa_sysfs.c b/drivers/amazon/net/efa/efa_sysfs.c index 98add51e4be81..8e8b2bd210db1 100644 --- a/drivers/amazon/net/efa/efa_sysfs.c +++ b/drivers/amazon/net/efa/efa_sysfs.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_sysfs.h" @@ -17,8 +17,7 @@ static int sysfs_emit(char *buf, const char *fmt, ...) va_list args; int len; - if (WARN(!buf || offset_in_page(buf), - "invalid sysfs_emit: buf:%p\n", buf)) + if (!buf) return 0; va_start(args, fmt); @@ -29,8 +28,8 @@ static int sysfs_emit(char *buf, const char *fmt, ...) } #endif -#ifdef HAVE_EFA_GDR -#include "efa_gdr.h" +#ifdef HAVE_EFA_P2P +#include "efa_p2p.h" static ssize_t gdr_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -46,7 +45,7 @@ static DEVICE_ATTR_RO(gdr); int efa_sysfs_init(struct efa_dev *dev) { -#ifdef HAVE_EFA_GDR +#ifdef HAVE_EFA_P2P struct device *device = &dev->pdev->dev; if (device_create_file(device, &dev_attr_gdr)) @@ -57,7 +56,7 @@ int efa_sysfs_init(struct efa_dev *dev) void efa_sysfs_destroy(struct efa_dev *dev) { -#ifdef HAVE_EFA_GDR +#ifdef HAVE_EFA_P2P device_remove_file(&dev->pdev->dev, &dev_attr_gdr); #endif } diff --git a/drivers/amazon/net/efa/efa_verbs.c b/drivers/amazon/net/efa/efa_verbs.c index d1822e6bc4a7c..c9535ee90108b 100644 --- a/drivers/amazon/net/efa/efa_verbs.c +++ b/drivers/amazon/net/efa/efa_verbs.c @@ -1,9 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "kcompat.h" +#ifdef HAVE_MR_DMABUF +#include +#include +#endif #include #include @@ -16,9 +20,10 @@ #endif #include "efa.h" +#include "efa_io_defs.h" -#ifdef HAVE_EFA_GDR -#include "efa_gdr.h" +#ifdef HAVE_EFA_P2P +#include "efa_p2p.h" #endif enum { @@ -339,6 +344,7 @@ int efa_query_device(struct ib_device *ibdev, resp.max_rq_wr = dev_attr->max_rq_depth; resp.max_rdma_size = dev_attr->max_rdma_size; + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID; if (EFA_DEV_CAP(dev, RDMA_READ)) resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ; @@ -1384,6 +1390,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct efa_ibv_create_cq cmd = {}; struct efa_cq *cq = to_ecq(ibcq); int entries = attr->cqe; + bool set_src_addr; int err; ibdev_dbg(ibdev, "create_cq entries %d\n", entries); @@ -1437,7 +1444,9 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_out; } - if (!cmd.cq_entry_size) { + set_src_addr = !!(cmd.flags & EFA_CREATE_CQ_WITH_SGID); + if ((cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc_ex)) && + (set_src_addr || cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc))) { ibdev_dbg(ibdev, "Invalid entry size [%u]\n", cmd.cq_entry_size); err = -EINVAL; @@ -1466,6 +1475,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, params.dma_addr = cq->dma_addr; params.entry_size_in_bytes = cmd.cq_entry_size; params.num_sub_cqs = cmd.num_sub_cqs; + params.set_src_addr = set_src_addr; if (cmd.flags & EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL) { cq->eq = efa_vec2eq(dev, attr->comp_vector); params.eqn = cq->eq->eeq.eqn; @@ -1908,7 +1918,7 @@ static void pbl_indirect_terminate(struct efa_dev *dev, struct pbl_context *pbl) /* create a page buffer list from a mapped user memory region */ static int pbl_create(struct efa_dev *dev, struct pbl_context *pbl, -#ifdef HAVE_EFA_GDR +#ifdef HAVE_EFA_P2P struct efa_mr *mr, #else struct ib_umem *umem, @@ -1925,12 +1935,12 @@ static int pbl_create(struct efa_dev *dev, if (is_vmalloc_addr(pbl->pbl_buf)) { pbl->physically_continuous = 0; -#ifdef HAVE_EFA_GDR - if (mr->umem) +#ifdef HAVE_EFA_P2P + if (mr->p2pmem) + err = efa_p2p_to_page_list(dev, mr->p2pmem, pbl->pbl_buf); + else err = umem_to_page_list(dev, mr->umem, pbl->pbl_buf, hp_cnt, hp_shift); - else - err = nvmem_to_page_list(dev, mr->nvmem, pbl->pbl_buf); #else err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt, hp_shift); @@ -1943,12 +1953,12 @@ static int pbl_create(struct efa_dev *dev, goto err_free; } else { pbl->physically_continuous = 1; -#ifdef HAVE_EFA_GDR - if (mr->umem) +#ifdef HAVE_EFA_P2P + if (mr->p2pmem) + err = efa_p2p_to_page_list(dev, mr->p2pmem, pbl->pbl_buf); + else err = umem_to_page_list(dev, mr->umem, pbl->pbl_buf, hp_cnt, hp_shift); - else - err = nvmem_to_page_list(dev, mr->nvmem, pbl->pbl_buf); #else err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt, hp_shift); @@ -1989,13 +1999,13 @@ static int efa_create_inline_pbl(struct efa_dev *dev, struct efa_mr *mr, int err; params->inline_pbl = 1; -#ifdef HAVE_EFA_GDR - if (mr->umem) +#ifdef HAVE_EFA_P2P + if (mr->p2pmem) + err = efa_p2p_to_page_list(dev, mr->p2pmem, + params->pbl.inline_pbl_array); + else err = umem_to_page_list(dev, mr->umem, params->pbl.inline_pbl_array, params->page_num, params->page_shift); - else - err = nvmem_to_page_list(dev, mr->nvmem, - params->pbl.inline_pbl_array); #else err = umem_to_page_list(dev, mr->umem, params->pbl.inline_pbl_array, params->page_num, params->page_shift); @@ -2016,7 +2026,7 @@ static int efa_create_pbl(struct efa_dev *dev, { int err; -#ifdef HAVE_EFA_GDR +#ifdef HAVE_EFA_P2P err = pbl_create(dev, pbl, mr, params->page_num, params->page_shift); #else @@ -2096,25 +2106,17 @@ static unsigned long efa_cont_pages(struct ib_umem *umem, } #endif -struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, - u64 virt_addr, int access_flags, - struct ib_udata *udata) +static struct efa_mr *efa_alloc_mr(struct ib_pd *ibpd, int access_flags, + struct ib_udata *udata) { struct efa_dev *dev = to_edev(ibpd->device); - struct efa_com_reg_mr_params params = {}; - struct efa_com_reg_mr_result result = {}; - struct pbl_context pbl; int supp_access_flags; - unsigned int pg_sz; struct efa_mr *mr; - int inline_size; - int err; #ifndef HAVE_NO_KVERBS_DRIVERS if (!udata) { ibdev_dbg(&dev->ibdev, "udata is NULL\n"); - err = -EOPNOTSUPP; - goto err_out; + return ERR_PTR(-EINVAL); } #endif @@ -2122,8 +2124,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) { ibdev_dbg(&dev->ibdev, "Incompatible ABI params, udata not cleared\n"); - err = -EINVAL; - goto err_out; + return ERR_PTR(-EINVAL); } supp_access_flags = @@ -2137,103 +2138,65 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, ibdev_dbg(&dev->ibdev, "Unsupported access flags[%#x], supported[%#x]\n", access_flags, supp_access_flags); - err = -EOPNOTSUPP; - goto err_out; + return ERR_PTR(-EOPNOTSUPP); } mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) { - err = -ENOMEM; - goto err_out; - } + if (!mr) + return ERR_PTR(-ENOMEM); -#ifdef HAVE_EFA_GDR - mr->nvmem = nvmem_get(dev, mr, start, length, &pg_sz); - if (!mr->nvmem) { -#ifdef HAVE_IB_UMEM_GET_DEVICE_PARAM - mr->umem = ib_umem_get(ibpd->device, start, length, - access_flags); -#elif defined(HAVE_IB_UMEM_GET_NO_DMASYNC) - mr->umem = ib_umem_get(udata, start, length, access_flags); -#elif defined(HAVE_IB_UMEM_GET_UDATA) - mr->umem = ib_umem_get(udata, start, length, access_flags, 0); -#else - mr->umem = ib_umem_get(ibpd->uobject->context, start, length, - access_flags, 0); -#endif - if (IS_ERR(mr->umem)) { - err = PTR_ERR(mr->umem); - ibdev_dbg(&dev->ibdev, - "Failed to pin and map user space memory[%d]\n", - err); - goto err_free; - } + return mr; +} -#ifdef HAVE_IB_UMEM_FIND_SINGLE_PG_SIZE - pg_sz = ib_umem_find_best_pgsz(mr->umem, - dev->dev_attr.page_size_cap, - virt_addr); - if (!pg_sz) { - err = -EOPNOTSUPP; - ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n", - dev->dev_attr.page_size_cap); - goto err_unmap; - } -#else - pg_sz = efa_cont_pages(mr->umem, dev->dev_attr.page_size_cap, - virt_addr); -#endif - } -#else /* !defined(HAVE_EFA_GDR) */ -#ifdef HAVE_IB_UMEM_GET_DEVICE_PARAM - mr->umem = ib_umem_get(ibpd->device, start, length, access_flags); -#elif defined(HAVE_IB_UMEM_GET_NO_DMASYNC) - mr->umem = ib_umem_get(udata, start, length, access_flags); -#elif defined(HAVE_IB_UMEM_GET_UDATA) - mr->umem = ib_umem_get(udata, start, length, access_flags, 0); -#else - mr->umem = ib_umem_get(ibpd->uobject->context, start, length, - access_flags, 0); -#endif - if (IS_ERR(mr->umem)) { - err = PTR_ERR(mr->umem); - ibdev_dbg(&dev->ibdev, - "Failed to pin and map user space memory[%d]\n", err); - goto err_free; - } -#endif /* defined(HAVE_EFA_GDR) */ +static int efa_register_mr(struct ib_pd *ibpd, struct efa_mr *mr, u64 start, + u64 length, u64 virt_addr, int access_flags) +{ + struct efa_dev *dev = to_edev(ibpd->device); + struct efa_com_reg_mr_params params = {}; + struct efa_com_reg_mr_result result = {}; + struct pbl_context pbl; + unsigned int pg_sz; + int inline_size; + int err; params.pd = to_epd(ibpd)->pdn; params.iova = virt_addr; params.mr_length_in_bytes = length; params.permissions = access_flags; -#ifndef HAVE_EFA_GDR +#ifdef HAVE_EFA_P2P + if (mr->p2pmem) { + pg_sz = efa_p2p_get_page_size(dev, mr->p2pmem); + goto skip_umem_pg_sz; + } +#endif + #ifdef HAVE_IB_UMEM_FIND_SINGLE_PG_SIZE pg_sz = ib_umem_find_best_pgsz(mr->umem, dev->dev_attr.page_size_cap, virt_addr); if (!pg_sz) { - err = -EOPNOTSUPP; ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n", dev->dev_attr.page_size_cap); - goto err_unmap; + return -EOPNOTSUPP; } #else pg_sz = efa_cont_pages(mr->umem, dev->dev_attr.page_size_cap, virt_addr); #endif /* defined(HAVE_IB_UMEM_FIND_SINGLE_PG_SIZE) */ -#endif /* !defined(HAVE_EFA_GDR) */ +#ifdef HAVE_EFA_P2P +skip_umem_pg_sz: +#endif params.page_shift = order_base_2(pg_sz); #ifdef HAVE_IB_UMEM_NUM_DMA_BLOCKS -#ifdef HAVE_EFA_GDR - if (mr->umem) - params.page_num = ib_umem_num_dma_blocks(mr->umem, pg_sz); - else +#ifdef HAVE_EFA_P2P + if (mr->p2pmem) params.page_num = DIV_ROUND_UP(length + (virt_addr & (pg_sz - 1)), pg_sz); + else + params.page_num = ib_umem_num_dma_blocks(mr->umem, pg_sz); #else params.page_num = ib_umem_num_dma_blocks(mr->umem, pg_sz); #endif @@ -2250,21 +2213,21 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, if (params.page_num <= inline_size) { err = efa_create_inline_pbl(dev, mr, ¶ms); if (err) - goto err_unmap; + return err; err = efa_com_register_mr(&dev->edev, ¶ms, &result); if (err) - goto err_unmap; + return err; } else { err = efa_create_pbl(dev, &pbl, mr, ¶ms); if (err) - goto err_unmap; + return err; err = efa_com_register_mr(&dev->edev, ¶ms, &result); pbl_destroy(dev, &pbl); if (err) - goto err_unmap; + return err; } mr->ibmr.lkey = result.l_key; @@ -2272,20 +2235,116 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, #ifdef HAVE_IB_MR_LENGTH mr->ibmr.length = length; #endif -#ifdef HAVE_EFA_GDR - if (mr->nvmem) { - mr->nvmem->lkey = result.l_key; - mr->nvmem->needs_dereg = true; +#ifdef HAVE_EFA_P2P + if (mr->p2pmem) { + mr->p2pmem->lkey = result.l_key; + mr->p2pmem->needs_dereg = true; } #endif ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey); + return 0; +} + +#ifdef HAVE_MR_DMABUF +struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, + u64 length, u64 virt_addr, + int fd, int access_flags, + struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibpd->device); + struct ib_umem_dmabuf *umem_dmabuf; + struct efa_mr *mr; + int err; + + mr = efa_alloc_mr(ibpd, access_flags, udata); + if (IS_ERR(mr)) { + err = PTR_ERR(mr); + goto err_out; + } + + umem_dmabuf = ib_umem_dmabuf_get_pinned(ibpd->device, start, length, fd, + access_flags); + if (IS_ERR(umem_dmabuf)) { + err = PTR_ERR(umem_dmabuf); + ibdev_dbg(&dev->ibdev, "Failed to get dmabuf umem[%d]\n", err); + goto err_free; + } + + mr->umem = &umem_dmabuf->umem; + err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags); + if (err) + goto err_release; + + return &mr->ibmr; + +err_release: +#ifndef HAVE_IB_UMEM_DMABUF_PINNED + dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL); + dma_buf_unpin(umem_dmabuf->attach); + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); +#endif + ib_umem_release(mr->umem); +err_free: + kfree(mr); +err_out: + atomic64_inc(&dev->stats.reg_mr_err); + return ERR_PTR(err); +} +#endif + +struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata) +{ + struct efa_dev *dev = to_edev(ibpd->device); + struct efa_mr *mr; + int err; + + mr = efa_alloc_mr(ibpd, access_flags, udata); + if (IS_ERR(mr)) { + err = PTR_ERR(mr); + goto err_out; + } + +#ifdef HAVE_IB_UMEM_GET_DEVICE_PARAM + mr->umem = ib_umem_get(ibpd->device, start, length, access_flags); +#elif defined(HAVE_IB_UMEM_GET_NO_DMASYNC) + mr->umem = ib_umem_get(udata, start, length, access_flags); +#elif defined(HAVE_IB_UMEM_GET_UDATA) + mr->umem = ib_umem_get(udata, start, length, access_flags, 0); +#else + mr->umem = ib_umem_get(ibpd->uobject->context, start, length, + access_flags, 0); +#endif + if (IS_ERR(mr->umem)) { +#ifdef HAVE_EFA_P2P + mr->p2pmem = efa_p2p_get(dev, mr, start, length); + if (mr->p2pmem) { + /* Avoid referencing an error-pointer later on */ + mr->umem = NULL; + goto reg_mr; + } +#endif + err = PTR_ERR(mr->umem); + ibdev_dbg(&dev->ibdev, + "Failed to pin and map user space memory[%d]\n", err); + goto err_free; + } + +#ifdef HAVE_EFA_P2P +reg_mr: +#endif + err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags); + if (err) + goto err_release; + return &mr->ibmr; -err_unmap: -#ifdef HAVE_EFA_GDR - if (mr->nvmem) - nvmem_put(mr->nvmem->ticket, false); +err_release: +#ifdef HAVE_EFA_P2P + if (mr->p2pmem) + efa_p2p_put(mr->p2pmem->ticket, false); else ib_umem_release(mr->umem); #else @@ -2311,9 +2370,9 @@ int efa_dereg_mr(struct ib_mr *ibmr) ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey); -#ifdef HAVE_EFA_GDR - if (mr->nvmem){ - err = nvmem_put(mr->nvmem_ticket, false); +#ifdef HAVE_EFA_P2P + if (mr->p2pmem) { + err = efa_p2p_put(mr->p2p_ticket, false); if (err) return err; @@ -2326,6 +2385,17 @@ int efa_dereg_mr(struct ib_mr *ibmr) if (err) return err; +#if defined(HAVE_MR_DMABUF) && !defined(HAVE_IB_UMEM_DMABUF_PINNED) + if (mr->umem->is_dmabuf) { + struct ib_umem_dmabuf *umem_dmabuf; + + umem_dmabuf = to_ib_umem_dmabuf(mr->umem); + dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL); + dma_buf_unpin(umem_dmabuf->attach); + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + } +#endif + ib_umem_release(mr->umem); kfree(mr); @@ -2741,7 +2811,7 @@ int efa_destroy_ah(struct ib_ah *ibah) { struct efa_dev *dev = to_edev(ibah->pd->device); struct efa_ah *ah = to_eah(ibah); -#ifndef HAVE_AH_CORE_ALLOCATION +#if !defined(HAVE_AH_CORE_ALLOCATION) && !defined(HAVE_AH_CORE_ALLOCATION_DESTROY_RC) int err; #endif diff --git a/drivers/amazon/net/efa/kcompat.h b/drivers/amazon/net/efa/kcompat.h index d0887952d8c92..713dcc00b394c 100644 --- a/drivers/amazon/net/efa/kcompat.h +++ b/drivers/amazon/net/efa/kcompat.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _KCOMPAT_H_ @@ -186,4 +186,58 @@ typedef u32 port_t; typedef u8 port_t; #endif +#if defined(HAVE_MR_DMABUF) && !defined(HAVE_IB_UMEM_DMABUF_PINNED) +#include +#include +#include + +static inline void +ib_umem_dmabuf_unsupported_move_notify(struct dma_buf_attachment *attach) +{ + struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv; + + ibdev_warn_ratelimited(umem_dmabuf->umem.ibdev, + "Invalidate callback should not be called when memory is pinned\n"); +} + +static struct dma_buf_attach_ops ib_umem_dmabuf_attach_pinned_ops = { + .allow_peer2peer = true, + .move_notify = ib_umem_dmabuf_unsupported_move_notify, +}; + +static inline +struct ib_umem_dmabuf *ib_umem_dmabuf_get_pinned(struct ib_device *device, + unsigned long offset, + size_t size, int fd, + int access) +{ + struct ib_umem_dmabuf *umem_dmabuf; + int err; + + umem_dmabuf = ib_umem_dmabuf_get(device, offset, size, fd, access, + &ib_umem_dmabuf_attach_pinned_ops); + if (IS_ERR(umem_dmabuf)) + return umem_dmabuf; + + dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL); + err = dma_buf_pin(umem_dmabuf->attach); + if (err) + goto err_release; + + err = ib_umem_dmabuf_map_pages(umem_dmabuf); + if (err) + goto err_unpin; + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + + return umem_dmabuf; + +err_unpin: + dma_buf_unpin(umem_dmabuf->attach); +err_release: + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + ib_umem_release(&umem_dmabuf->umem); + return ERR_PTR(err); +} +#endif /* !HAVE_IB_UMEM_DMABUF_PINNED */ + #endif /* _KCOMPAT_H_ */ diff --git a/drivers/amazon/net/efa/neuron_p2p.h b/drivers/amazon/net/efa/neuron_p2p.h new file mode 100644 index 0000000000000..a1ce44003463f --- /dev/null +++ b/drivers/amazon/net/efa/neuron_p2p.h @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2021 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef __NEURON_P2P_H__ +#define __NEURON_P2P_H__ + +struct neuron_p2p_page_info { + u64 physical_address; // PA's that map to the VA (page aligned as defined in va_info) + u32 page_count; // page count each page is shift_page_size size +}; + +struct neuron_p2p_va_info { + void *virtual_address; // Virtual address for which the PA's need to be obtained + u64 size; // The actual size of the memory pointed by the virtual_address + u32 shift_page_size; // log2 of the page size + u32 device_index; // Neuron Device index. + u32 entries; // Number of page_info entries + struct neuron_p2p_page_info page_info[]; +}; + +/** Given the virtual address and length returns the physical address + * + * @param[in] virtual_address - Virtual address of device memory + * @param[in] length - Length of the memory + * @param[out] va_info - Set of physical addresses + * @param[in] free_callback - Callback function to be called. This will be called with a lock held. + * @param[in] data - Data to be used for the callback + * + * @return 0 - Success. + */ +int neuron_p2p_register_va(u64 virtual_address, u64 length, struct neuron_p2p_va_info **vainfo, void (*free_callback) (void *data), void *data); + +/** Give the pa, release the pa from being used by third-party device + * + * @param[in] va_info - Set of physical addresses + * + * @return 0 - Success. + */ +int neuron_p2p_unregister_va(struct neuron_p2p_va_info *vainfo); + +#endif