Skip to content

Commit

Permalink
Merge pull request openucx#9 from bureddy/gdrcopy-rcache
Browse files Browse the repository at this point in the history
gdr_copy uct registration cache
  • Loading branch information
bureddy authored Sep 13, 2017
2 parents b2a20af + ffbe868 commit 59b7a5b
Show file tree
Hide file tree
Showing 3 changed files with 220 additions and 41 deletions.
2 changes: 1 addition & 1 deletion src/uct/cuda/gdr_copy/gdr_copy_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ ucs_status_t uct_gdr_copy_ep_put_zcopy(uct_ep_h tl_ep, const uct_iov_t *iov, siz
{
uct_gdr_copy_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_gdr_copy_iface_t);
uct_gdr_copy_md_t *md = (uct_gdr_copy_md_t *)iface->super.md;
uct_gdr_copy_mem_h *mem_hndl = (uct_gdr_copy_mem_h *) rkey;
uct_gdr_copy_mem_t *mem_hndl = (uct_gdr_copy_mem_t *) rkey;
gdr_info_t gdr_info;
size_t bar_off;

Expand Down
234 changes: 196 additions & 38 deletions src/uct/cuda/gdr_copy/gdr_copy_md.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,28 @@
#include <cuda_runtime.h>
#include <cuda.h>

#define UCT_GDR_COPY_MD_RCACHE_DEFAULT_ALIGN (GPU_PAGE_SIZE)

static ucs_config_field_t uct_gdr_copy_md_config_table[] = {
{"", "", NULL,
ucs_offsetof(uct_gdr_copy_md_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_md_config_table)},
ucs_offsetof(uct_gdr_copy_md_config_t, super), UCS_CONFIG_TYPE_TABLE(uct_md_config_table)},

{"RCACHE_ADDR_ALIGN", UCS_PP_MAKE_STRING(UCT_GDR_COPY_MD_RCACHE_DEFAULT_ALIGN),
"Registration cache address alignment, must be power of 2\n"
"between "UCS_PP_MAKE_STRING(UCS_PGT_ADDR_ALIGN)"and system page size",
ucs_offsetof(uct_gdr_copy_md_config_t, rcache.alignment), UCS_CONFIG_TYPE_UINT},

{"RCACHE_MEM_PRIO", "1000", "Registration cache memory event priority",
ucs_offsetof(uct_gdr_copy_md_config_t, rcache.event_prio), UCS_CONFIG_TYPE_UINT},

{"RCACHE_OVERHEAD", "90ns", "Registration cache lookup overhead",
ucs_offsetof(uct_gdr_copy_md_config_t, rcache.overhead), UCS_CONFIG_TYPE_TIME},

{"MEM_REG_OVERHEAD", "16us", "Memory registration overhead", /* TODO take default from device */
ucs_offsetof(uct_gdr_copy_md_config_t, uc_reg_cost.overhead), UCS_CONFIG_TYPE_TIME},

{"MEM_REG_GROWTH", "0.06ns", "Memory registration growth rate", /* TODO take default from device */
ucs_offsetof(uct_gdr_copy_md_config_t, uc_reg_cost.growth), UCS_CONFIG_TYPE_TIME},

{NULL}
};
Expand Down Expand Up @@ -56,53 +75,41 @@ static ucs_status_t uct_gdr_copy_rkey_release(uct_md_component_t *mdc, uct_rkey_
return UCS_OK;
}


static ucs_status_t uct_gdr_copy_mem_reg(uct_md_h uct_md, void *address, size_t length,
unsigned flags, uct_mem_h *memh_p)
static ucs_status_t uct_gdr_copy_mem_reg_internal(uct_md_h uct_md, void *address, size_t length,
unsigned flags, uct_gdr_copy_mem_t *mem_hndl)
{
uct_gdr_copy_mem_h * mem_hndl = NULL;
uct_gdr_copy_md_t *md = ucs_derived_of(uct_md, uct_gdr_copy_md_t);
CUdeviceptr d_ptr = ((CUdeviceptr )(char *) address);
gdr_mh_t mh;
size_t reg_size;
void *bar_ptr;

CUdeviceptr d_ptr = ((CUdeviceptr )(char *) address);

mem_hndl = ucs_malloc(sizeof(uct_gdr_copy_mem_h), "gdr_copy handle");
if (NULL == mem_hndl) {
ucs_error("Failed to allocate memory for uct_gdr_copy_mem_h");
return UCS_ERR_NO_MEMORY;
}

reg_size = (length + GPU_PAGE_SIZE - 1) & GPU_PAGE_MASK;

if (gdr_pin_buffer(md->gdrcpy_ctx, (d_ptr & GPU_PAGE_MASK), reg_size, 0, 0, &mh) != 0) {
ucs_error("gdr_pin_buffer Failed. length :%lu pin_size:%lu ", length, reg_size);
if (gdr_pin_buffer(md->gdrcpy_ctx, d_ptr, length, 0, 0, &mh) != 0) {
ucs_error("gdr_pin_buffer Failed. length :%lu ", length);
return UCS_ERR_IO_ERROR;

}
if (mh == 0) {
ucs_error("gdr_pin_buffer Failed. length :%lu pin_size:%lu ", length, reg_size);
ucs_error("gdr_pin_buffer Failed. length :%lu ", length);
return UCS_ERR_IO_ERROR;
}

if (gdr_map(md->gdrcpy_ctx, mh, &bar_ptr, reg_size) !=0) {
ucs_error("gdr_map failed. length :%lu pin_size:%lu ", length, reg_size);
if (gdr_map(md->gdrcpy_ctx, mh, &bar_ptr, length) !=0) {
ucs_error("gdr_map failed. length :%lu ", length);
return UCS_ERR_IO_ERROR;
}

mem_hndl->mh = mh;
mem_hndl->bar_ptr = bar_ptr;
mem_hndl->reg_size = reg_size;

*memh_p = mem_hndl;
mem_hndl->reg_size = length;

return UCS_OK;

}

static ucs_status_t uct_gdr_copy_mem_dereg(uct_md_h uct_md, uct_mem_h memh)
static ucs_status_t uct_gdr_copy_mem_dereg_internal(uct_md_h uct_md, uct_gdr_copy_mem_t *mem_hndl)
{

uct_gdr_copy_md_t *md = ucs_derived_of(uct_md, uct_gdr_copy_md_t);
uct_gdr_copy_mem_h *mem_hndl = memh;

if (gdr_unmap(md->gdrcpy_ctx, mem_hndl->mh, mem_hndl->bar_ptr, mem_hndl->reg_size) !=0) {
ucs_error("gdr_unmap Failed. unpin_size:%lu ", mem_hndl->reg_size);
Expand All @@ -112,11 +119,47 @@ static ucs_status_t uct_gdr_copy_mem_dereg(uct_md_h uct_md, uct_mem_h memh)
ucs_error("gdr_unpin_buffer failed ");
return UCS_ERR_IO_ERROR;
}
return UCS_OK;
}

free(mem_hndl);
static ucs_status_t uct_gdr_copy_mem_reg(uct_md_h uct_md, void *address, size_t length,
unsigned flags, uct_mem_h *memh_p)
{
uct_gdr_copy_mem_t * mem_hndl = NULL;
size_t reg_size;
void *ptr;
ucs_status_t status;


mem_hndl = ucs_malloc(sizeof(uct_gdr_copy_mem_t), "gdr_copy handle");
if (NULL == mem_hndl) {
ucs_error("Failed to allocate memory for uct_gdr_copy_mem_t");
return UCS_ERR_NO_MEMORY;
}

reg_size = (length + GPU_PAGE_SIZE - 1) & GPU_PAGE_MASK;
ptr = (void *) ((uintptr_t)address & GPU_PAGE_MASK);

status = uct_gdr_copy_mem_reg_internal(uct_md, ptr, reg_size, 0, mem_hndl);
if (status != UCS_OK) {
free(mem_hndl);
return status;
}

*memh_p = mem_hndl;
return UCS_OK;
}

static ucs_status_t uct_gdr_copy_mem_dereg(uct_md_h uct_md, uct_mem_h memh)
{
uct_gdr_copy_mem_t *mem_hndl = memh;
ucs_status_t status;

status = uct_gdr_copy_mem_dereg_internal(uct_md, mem_hndl);
free(mem_hndl);
return status;
}

static ucs_status_t uct_gdr_copy_mem_detect(uct_md_h md, void *addr, uint64_t *dn_mask)
{
int memory_type;
Expand Down Expand Up @@ -165,19 +208,105 @@ static void uct_gdr_copy_md_close(uct_md_h uct_md)
ucs_free(md);
}

static ucs_status_t uct_gdr_copy_md_open(const char *md_name, const uct_md_config_t *md_config,
uct_md_h *md_p)
static uct_md_ops_t md_ops = {
.close = uct_gdr_copy_md_close,
.query = uct_gdr_copy_md_query,
.mkey_pack = uct_gdr_copy_mkey_pack,
.mem_reg = uct_gdr_copy_mem_reg,
.mem_dereg = uct_gdr_copy_mem_dereg,
.mem_detect = uct_gdr_copy_mem_detect
};

static inline uct_gdr_copy_rcache_region_t* uct_gdr_copy_rache_region_from_memh(uct_mem_h memh)
{
uct_gdr_copy_md_t *md;
return ucs_container_of(memh, uct_gdr_copy_rcache_region_t, memh);
}

static ucs_status_t uct_gdr_copy_mem_rcache_reg(uct_md_h uct_md, void *address,
size_t length, unsigned flags,
uct_mem_h *memh_p)
{
uct_gdr_copy_md_t *md = ucs_derived_of(uct_md, uct_gdr_copy_md_t);
ucs_rcache_region_t *rregion;
ucs_status_t status;
uct_gdr_copy_mem_t *memh;

status = ucs_rcache_get(md->rcache, address, length, PROT_READ|PROT_WRITE,
&flags, &rregion);
if (status != UCS_OK) {
return status;
}

static uct_md_ops_t md_ops = {
.close = uct_gdr_copy_md_close,
.query = uct_gdr_copy_md_query,
.mkey_pack = uct_gdr_copy_mkey_pack,
.mem_reg = uct_gdr_copy_mem_reg,
.mem_dereg = uct_gdr_copy_mem_dereg,
.mem_detect = uct_gdr_copy_mem_detect
};
ucs_assert(rregion->refcount > 0);
memh = &ucs_derived_of(rregion, uct_gdr_copy_rcache_region_t)->memh;
*memh_p = memh;
return UCS_OK;
}

static ucs_status_t uct_gdr_copy_mem_rcache_dereg(uct_md_h uct_md, uct_mem_h memh)
{
uct_gdr_copy_md_t *md = ucs_derived_of(uct_md, uct_gdr_copy_md_t);
uct_gdr_copy_rcache_region_t *region = uct_gdr_copy_rache_region_from_memh(memh);

ucs_rcache_region_put(md->rcache, &region->super);
return UCS_OK;
}

static uct_md_ops_t md_rcache_ops = {
.close = uct_gdr_copy_md_close,
.query = uct_gdr_copy_md_query,
.mkey_pack = uct_gdr_copy_mkey_pack,
.mem_reg = uct_gdr_copy_mem_rcache_reg,
.mem_dereg = uct_gdr_copy_mem_rcache_dereg,
.mem_detect = uct_gdr_copy_mem_detect
};
static ucs_status_t uct_gdr_copy_rcache_mem_reg_cb(void *context, ucs_rcache_t *rcache,
void *arg, ucs_rcache_region_t *rregion)
{
uct_gdr_copy_rcache_region_t *region = ucs_derived_of(rregion, uct_gdr_copy_rcache_region_t);
uct_gdr_copy_md_t *md = context;
int *flags = arg;
ucs_status_t status;

status = uct_gdr_copy_mem_reg_internal(&md->super, (void*)region->super.super.start,
region->super.super.end - region->super.super.start,
*flags, &region->memh);
if (status != UCS_OK) {
return status;
}

return UCS_OK;
}

static void uct_gdr_copy_rcache_mem_dereg_cb(void *context, ucs_rcache_t *rcache,
ucs_rcache_region_t *rregion)
{
uct_gdr_copy_rcache_region_t *region = ucs_derived_of(rregion, uct_gdr_copy_rcache_region_t);
uct_gdr_copy_md_t *md = context;

(void)uct_gdr_copy_mem_dereg_internal(&md->super, &region->memh);
}

static void uct_gdr_copy_rcache_dump_region_cb(void *context, ucs_rcache_t *rcache,
ucs_rcache_region_t *rregion, char *buf,
size_t max)
{

}

static ucs_rcache_ops_t uct_gdr_copy_rcache_ops = {
.mem_reg = uct_gdr_copy_rcache_mem_reg_cb,
.mem_dereg = uct_gdr_copy_rcache_mem_dereg_cb,
.dump_region = uct_gdr_copy_rcache_dump_region_cb
};

static ucs_status_t uct_gdr_copy_md_open(const char *md_name, const uct_md_config_t *uct_md_config,
uct_md_h *md_p)
{
ucs_status_t status;
uct_gdr_copy_md_t *md;
const uct_gdr_copy_md_config_t *md_config = ucs_derived_of(uct_md_config, uct_gdr_copy_md_config_t);
ucs_rcache_params_t rcache_params;

md = ucs_malloc(sizeof(uct_gdr_copy_md_t), "uct_gdr_copy_md_t");
if (NULL == md) {
Expand All @@ -187,13 +316,42 @@ static ucs_status_t uct_gdr_copy_md_open(const char *md_name, const uct_md_confi

md->super.ops = &md_ops;
md->super.component = &uct_gdr_copy_md_component;
md->rcache = NULL;
md->reg_cost = md_config->uc_reg_cost;



md->gdrcpy_ctx = gdr_open();
if (md->gdrcpy_ctx == (void *)0) {
ucs_error("Failed to open gdrcopy ");
return UCS_ERR_IO_ERROR;
}

if (md_config->rcache.enable != UCS_NO) {
// UCS_STATIC_ASSERT(UCS_PGT_ADDR_ALIGN >= UCT_GDR_COPY_MD_RCACHE_DEFAULT_ALIGN);
rcache_params.region_struct_size = sizeof(uct_gdr_copy_rcache_region_t);
rcache_params.alignment = md_config->rcache.alignment;
rcache_params.ucm_event_priority = md_config->rcache.event_prio;
rcache_params.context = md;
rcache_params.ops = &uct_gdr_copy_rcache_ops;
status = ucs_rcache_create(&rcache_params, "gdr_copy" UCS_STATS_ARG(NULL), &md->rcache);
if (status == UCS_OK) {
md->super.ops = &md_rcache_ops;
md->reg_cost.overhead = 0;
md->reg_cost.growth = 0; /* It's close enough to 0 */
} else {
ucs_assert(md->rcache == NULL);
if (md_config->rcache.enable == UCS_YES) {
ucs_error("Failed to create registration cache: %s",
ucs_status_string(status));
return UCS_ERR_IO_ERROR;
} else {
ucs_debug("Could not create registration cache for: %s",
ucs_status_string(status));
}
}
}

*md_p = (uct_md_h) md;
return UCS_OK;
}
Expand Down
25 changes: 23 additions & 2 deletions src/uct/cuda/gdr_copy/gdr_copy_md.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#define UCT_CUDA_CONTEXT_H

#include <uct/base/uct_md.h>
#include <ucs/sys/rcache.h>
#include "gdrapi.h"

#define UCT_GDR_COPY_MD_NAME "gdr_copy"
Expand All @@ -19,14 +20,27 @@ extern uct_md_component_t uct_gdr_copy_md_component;
*/
typedef struct uct_gdr_copy_md {
struct uct_md super; /**< Domain info */
gdr_t gdrcpy_ctx; /**< gdr copy context */
gdr_t gdrcpy_ctx; /**< gdr copy context */
ucs_rcache_t *rcache; /**< Registration cache (can be NULL) */
uct_linear_growth_t reg_cost; /**< Memory registration cost */
} uct_gdr_copy_md_t;

/**
* gdr copy domain configuration.
*/
typedef struct uct_gdr_copy_md_config {
uct_md_config_t super;
struct {
ucs_ternary_value_t enable; /**< Enable registration cache */
size_t alignment; /**< Force address alignment */
unsigned event_prio; /**< Memory events priority */
double overhead; /**< Lookup overhead estimation */
} rcache;

uct_linear_growth_t uc_reg_cost; /**< Memory registration cost estimation
without using the cache */


} uct_gdr_copy_md_config_t;


Expand All @@ -37,8 +51,15 @@ typedef struct uct_gdr_copy_mem {
gdr_mh_t mh;
void *bar_ptr;
size_t reg_size;
} uct_gdr_copy_mem_h;
} uct_gdr_copy_mem_t;

/**
* cuda memory region in the registration cache.
*/
typedef struct uct_gdr_copy_rcache_region {
ucs_rcache_region_t super;
uct_gdr_copy_mem_t memh; /**< mr exposed to the user as the memh */
} uct_gdr_copy_rcache_region_t;


#endif

0 comments on commit 59b7a5b

Please sign in to comment.