Skip to content

Commit

Permalink
Merge pull request #9637 from tvegas1/mad_rte_module
Browse files Browse the repository at this point in the history
TOOLS/PERF: Implement MAD RTE as a module
  • Loading branch information
yosefe authored Feb 28, 2024
2 parents bda8750 + 5048f90 commit 1c25669
Show file tree
Hide file tree
Showing 17 changed files with 296 additions and 236 deletions.
1 change: 1 addition & 0 deletions contrib/buildrpm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ if [ $opt_binrpm -eq 1 ]; then
with_args+=" $(with_arg ugni)"
with_args+=" $(with_arg xpmem)"
with_args+=" $(with_arg fuse)"
with_args+=" $(with_arg mad)"

echo rpmbuild -bb $rpmmacros $rpmopts $rpmspec $defines $with_args | bash -eEx
fi
18 changes: 5 additions & 13 deletions src/tools/perf/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2014. ALL RIGHTS RESERVED.
# Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2024. ALL RIGHTS RESERVED.
# Copyright (C) UT-Battelle, LLC. 2015-2017. ALL RIGHTS RESERVED.
# Copyright (C) The University of Tennessee and The University
# of Tennessee Research Foundation. 2016. ALL RIGHTS RESERVED.
Expand All @@ -9,12 +9,12 @@
# See file LICENSE for terms.
#

SUBDIRS = cuda rocm lib
SUBDIRS = cuda rocm lib mad
CC = $(UCX_PERFTEST_CC)

noinst_HEADERS = \
perftest.h \
perftest_mad.h \
perftest_context.h \
api/libperf.h

bin_PROGRAMS = ucx_perftest ucx_perftest_daemon
Expand All @@ -24,17 +24,9 @@ ucx_perftest_SOURCES = \
perftest_run.c \
perftest_params.c

if HAVE_MAD
ucx_perftest_SOURCES += \
perftest_mad.c
endif

ucx_perftest_CPPFLAGS = $(BASE_CPPFLAGS)
ucx_perftest_CFLAGS = $(BASE_CFLAGS) $(OPENMP_CFLAGS) $(MAD_CFLAGS)
ucx_perftest_LDFLAGS = \
$(LDFLAGS_DYNAMIC_LIST_DATA) \
$(MAD_LDFLAGS) \
$(MAD_LIBS)
ucx_perftest_CFLAGS = $(BASE_CFLAGS) $(OPENMP_CFLAGS)
ucx_perftest_LDFLAGS = $(LDFLAGS_DYNAMIC_LIST_DATA)

ucx_perftest_LDADD = \
$(abs_top_builddir)/src/uct/libuct.la \
Expand Down
27 changes: 21 additions & 6 deletions src/tools/perf/api/libperf.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ typedef struct ucx_perf_result {

typedef void (*ucx_perf_rte_progress_cb_t)(void *arg);

typedef ucs_status_t (*ucx_perf_rte_setup_func_t)(void *arg);
typedef void (*ucx_perf_rte_cleanup_func_t)(void *arg);
typedef unsigned (*ucx_perf_rte_group_size_func_t)(void *rte_group);
typedef unsigned (*ucx_perf_rte_group_index_func_t)(void *rte_group);
typedef void (*ucx_perf_rte_barrier_func_t)(void *rte_group,
Expand All @@ -202,15 +204,18 @@ typedef void (*ucx_perf_rte_post_vec_func_t)(void *rte_group,
typedef void (*ucx_perf_rte_recv_func_t)(void *rte_group, unsigned src,
void *buffer, size_t max, void *req);
typedef void (*ucx_perf_rte_exchange_vec_func_t)(void *rte_group, void *req);
typedef void (*ucx_perf_rte_report_func_t)(void *rte_group,
const ucx_perf_result_t *result,
void *arg, const char *extra_info,
int is_final, int is_multi_thread);


/**
* RTE used to bring-up the test
*/
typedef struct ucx_perf_rte {
/* @return UCS_OK, UCS_ERR_UNSUPPORTED or actual error */
ucx_perf_rte_setup_func_t setup;

/* Cleanup on successfully setup RTE */
ucx_perf_rte_cleanup_func_t cleanup;

/* @return Group size */
ucx_perf_rte_group_size_func_t group_size;

Expand All @@ -225,12 +230,20 @@ typedef struct ucx_perf_rte {
ucx_perf_rte_recv_func_t recv;
ucx_perf_rte_exchange_vec_func_t exchange_vec;

/* Handle results */
ucx_perf_rte_report_func_t report;
/* List of supported RTE */
ucs_list_link_t list;

} ucx_perf_rte_t;


/**
* Common report function
*/
typedef void (*ucx_perf_report_func_t)(void *rte_group,
const ucx_perf_result_t *result,
void *arg, const char *extra_info,
int is_final, int is_multi_thread);

/**
* Describes a performance test.
*/
Expand Down Expand Up @@ -265,6 +278,8 @@ typedef struct ucx_perf_params {

void *rte_group; /* Opaque RTE group handle */
ucx_perf_rte_t *rte; /* RTE functions used to exchange data */

ucx_perf_report_func_t report_func; /* Report function callback */
void *report_arg; /* Custom argument for report function */

struct {
Expand Down
3 changes: 2 additions & 1 deletion src/tools/perf/configure.m4
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2018. ALL RIGHTS RESERVED.
# Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2024. ALL RIGHTS RESERVED.
#
# See file LICENSE for terms.
#
Expand All @@ -8,6 +8,7 @@ ucx_perftest_modules=""
m4_include([src/tools/perf/lib/configure.m4])
m4_include([src/tools/perf/cuda/configure.m4])
m4_include([src/tools/perf/rocm/configure.m4])
m4_include([src/tools/perf/mad/configure.m4])
AC_DEFINE_UNQUOTED([ucx_perftest_MODULES], ["${ucx_perftest_modules}"],
[Perftest loadable modules])

Expand Down
8 changes: 5 additions & 3 deletions src/tools/perf/lib/libperf.c
Original file line number Diff line number Diff line change
Expand Up @@ -2156,8 +2156,9 @@ ucs_status_t ucx_perf_run(const ucx_perf_params_t *params,
ucx_perf_funcs[params->api].barrier(perf);
if (status == UCS_OK) {
ucx_perf_calc_result(perf, result);
rte_call(perf, report, result, perf->params.report_arg,
perf->extra_info, 1, 0);
perf->params.report_func(perf->params.rte_group, result,
perf->params.report_arg, perf->extra_info,
1, 0);
}
} else {
status = ucx_perf_thread_spawn(perf, result);
Expand Down Expand Up @@ -2199,6 +2200,7 @@ void ucx_perf_report(ucx_perf_context_t *perf)

ucx_perf_get_time(perf);
ucx_perf_calc_result(perf, &result);
rte_call(perf, report, &result, perf->params.report_arg, "", 0, 0);
perf->params.report_func(perf->params.rte_group, &result,
perf->params.report_arg, "", 0, 0);
perf->prev = perf->current;
}
3 changes: 2 additions & 1 deletion src/tools/perf/lib/libperf_thread.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ static void ucx_perf_thread_report_aggregated_results(ucx_perf_context_t *perf)

agg_result.latency.total_average = lat_sum_total_avegare / thread_count;

rte_call(perf, report, &agg_result, perf->params.report_arg, "", 1, 1);
perf->params.report_func(perf->params.rte_group, &agg_result,
perf->params.report_arg, "", 1, 1);
}

ucs_status_t ucx_perf_thread_spawn(ucx_perf_context_t *perf,
Expand Down
18 changes: 18 additions & 0 deletions src/tools/perf/mad/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#
# Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2024. ALL RIGHTS RESERVED.
#
# See file LICENSE for terms.
#

if HAVE_MAD

module_LTLIBRARIES = libucx_perftest_mad.la
libucx_perftest_mad_la_CPPFLAGS = $(BASE_CPPFLAGS)
libucx_perftest_mad_la_CFLAGS = $(BASE_CFLAGS) $(MAD_CFLAGS) $(OPENMP_CFLAGS)
libucx_perftest_mad_la_LDFLAGS = $(MAD_LDFLAGS) -version-info $(SOVERSION)
libucx_perftest_mad_la_LIBADD = $(MAD_LIBS)
libucx_perftest_mad_la_SOURCES = perftest_mad.c

include $(top_srcdir)/config/module.am

endif
9 changes: 9 additions & 0 deletions src/tools/perf/mad/configure.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#
# Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2024. ALL RIGHTS RESERVED.
#
# See file LICENSE for terms.
#

AS_IF([test "x$mad_happy" = "xyes"], [ucx_perftest_modules="${ucx_perftest_modules}:mad"])

AC_CONFIG_FILES([src/tools/perf/mad/Makefile])
43 changes: 23 additions & 20 deletions src/tools/perf/perftest_mad.c → src/tools/perf/mad/perftest_mad.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2023. ALL RIGHTS RESERVED.
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2023-2024. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
Expand All @@ -8,9 +8,6 @@
#include "config.h"
#endif

#include "perftest_mad.h"
#include "perftest.h"

#include <ucs/sys/string.h>
#include <ucs/sys/sys.h>
#include <ucs/sys/sock.h>
Expand All @@ -29,6 +26,8 @@
#include <infiniband/umad.h>
#include <infiniband/umad_types.h>

#include "../perftest_context.h"

#define PERFTEST_RTE_CLASS (IB_VENDOR_RANGE2_START_CLASS + 0x10)
#define PERFTEST_RTE_MAD_QP_NUM 1 /* Don't use MAD on QP0 to not disturb SMI */

Expand Down Expand Up @@ -375,25 +374,18 @@ rte_mad_recv(void *rte_group, unsigned src, void *buffer, size_t max, void *req)
}
}

static void rte_mad_report(void *rte_group, const ucx_perf_result_t *result,
void *arg, const char *extra_info, int is_final,
int is_multi_thread)
{
struct perftest_context *ctx = arg;

print_progress(ctx->test_names, ctx->num_batch_files, result, extra_info,
ctx->flags, is_final, ctx->server_addr == NULL,
is_multi_thread);
}
static ucs_status_t rte_mad_setup(void *arg);
static void rte_mad_cleanup(void *arg);

static ucx_perf_rte_t mad_rte = {
.setup = rte_mad_setup,
.cleanup = rte_mad_cleanup,
.group_size = rte_mad_group_size,
.group_index = rte_mad_group_index,
.barrier = rte_mad_barrier,
.post_vec = rte_mad_post_vec,
.recv = rte_mad_recv,
.exchange_vec = (ucx_perf_rte_exchange_vec_func_t)ucs_empty_function,
.report = rte_mad_report,
.exchange_vec = (ucx_perf_rte_exchange_vec_func_t)ucs_empty_function
};

static struct ibmad_port *perftest_mad_open(char *ca, int ca_port)
Expand Down Expand Up @@ -633,13 +625,18 @@ static ucs_status_t perftest_mad_parse_ca_and_port(const char *mad_port,
return UCS_OK;
}

ucs_status_t setup_mad_rte(struct perftest_context *ctx)
static ucs_status_t rte_mad_setup(void *arg)
{
struct perftest_context *ctx = arg;
perftest_mad_rte_group_t *rte_group;
ucs_status_t status;
int ca_port;
char ca[32];

if (ctx->mad_port == NULL) {
return UCS_ERR_UNSUPPORTED;
}

status = perftest_mad_parse_ca_and_port(ctx->mad_port, ca, sizeof(ca),
&ca_port);
if (status != UCS_OK) {
Expand Down Expand Up @@ -686,7 +683,6 @@ ucs_status_t setup_mad_rte(struct perftest_context *ctx)

ctx->params.super.rte_group = rte_group;
ctx->params.super.rte = &mad_rte;
ctx->params.super.report_arg = ctx;

if (rte_group->is_server) {
ctx->flags |= TEST_FLAG_PRINT_TEST;
Expand All @@ -703,15 +699,22 @@ ucs_status_t setup_mad_rte(struct perftest_context *ctx)
return UCS_ERR_NO_DEVICE;
}

ucs_status_t cleanup_mad_rte(struct perftest_context *ctx)
static void rte_mad_cleanup(void *arg)
{
struct perftest_context *ctx = arg;
perftest_mad_rte_group_t *group = ctx->params.super.rte_group;

ctx->params.super.rte_group = NULL;
if (group != NULL) {
mad_rpc_close_port(group->mad_port);
free(group);
}
}

return UCS_OK;
UCS_STATIC_INIT {
ucs_list_add_head(&rte_list, &mad_rte.list);
}

UCS_STATIC_CLEANUP {
ucs_list_del(&mad_rte.list);
}
Loading

0 comments on commit 1c25669

Please sign in to comment.