Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add initial MPI_T-like profiling support in Mercury #350

Open
wants to merge 40 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
6d26e00
Add initial MPI_T-like profiling support in Mercury
srini009 Mar 4, 2020
16c6512
Add in Mercury-style comments
srini009 Mar 11, 2020
05b796b
Add intiial change to remove use of global variables
srini009 Mar 26, 2020
a7e5988
Add atomics
srini009 Mar 31, 2020
0948b0f
Add atomics and remove all global variables
srini009 Apr 1, 2020
efa77e9
Add comments
srini009 Apr 1, 2020
237c6ee
Add note pointing to the MPI spec that this profiling interface is ba…
srini009 Apr 8, 2020
61df90a
Add PVARs for serialization/deserialization time
srini009 May 27, 2020
a784eca
Correct PVAR data gathering logic
srini009 May 27, 2020
7060882
Merge branch 'master' into mercury_profiling_interface
srini009 May 27, 2020
c8d73b9
Adding PVAR for backfill queue size
srini009 Jun 30, 2020
b7fdd9b
Merging from master
srini009 Jun 30, 2020
c174d8d
Merge branch 'master' into mercury_profiling_interface
srini009 Jul 22, 2020
6dbd7c2
Merge branch 'master' into mercury_profiling_interface
srini009 Jul 24, 2020
e168c03
Add checksum
srini009 Jul 30, 2020
ce29621
Add checksum
srini009 Jul 30, 2020
8ee302f
Merge branch 'mercury_profiling_interface' of https://github.com/srin…
srini009 Aug 15, 2020
c223544
Add more data printf
srini009 Aug 21, 2020
d607861
Add apex calls for extra payload
srini009 Aug 24, 2020
329dcba
Add PVAR for current handles posted
srini009 Aug 25, 2020
7e46221
Remove APEX instrumentation
srini009 Aug 26, 2020
5c62fd2
Add new PVAR definitions
srini009 Aug 26, 2020
7573858
Add implementation for new PVARs inside mercury.c
srini009 Aug 27, 2020
f31582b
Add more changes
srini009 Aug 27, 2020
3b9768b
Update the number of PVARs exported
srini009 Aug 27, 2020
8a11087
Remove the PVAR hash table
srini009 Aug 27, 2020
8a05135
Merge branch 'mercury_profiling_interface' of https://github.com/srin…
srini009 Aug 31, 2020
1fa40af
Integrate additional PVARs into Mercury
srini009 Aug 31, 2020
97203e3
Add new counter PVARs
srini009 Aug 31, 2020
73688a2
Remove print statement
srini009 Aug 31, 2020
9457bd6
Add the correct PVAR for internal RDMA transfer size
srini009 Sep 1, 2020
fb148c8
Add config file
srini009 Sep 1, 2020
4e39c83
Merge conflicts
srini009 Oct 4, 2020
e96b7a0
Fix issue with handle modifications
srini009 Oct 5, 2020
f69ddad
Add correction
srini009 Oct 6, 2020
d5f7c1a
Removing config file
srini009 Oct 6, 2020
ace64be
Merge branch 'mercury_profiling_interface' of https://github.com/srin…
srini009 Oct 6, 2020
c0e2321
Fix merge conflicts while reverting
srini009 Oct 12, 2020
a55bc1e
Add typo correction
srini009 Oct 12, 2020
d98a378
Merge branch 'mercury_profiling_interface' of https://github.com/srin…
srini009 Oct 13, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ set(MERCURY_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/mercury_core_header.c
${CMAKE_CURRENT_SOURCE_DIR}/mercury_header.c
${CMAKE_CURRENT_SOURCE_DIR}/mercury_proc.c
${CMAKE_CURRENT_SOURCE_DIR}/mercury_prof_interface.c
${CMAKE_CURRENT_SOURCE_DIR}/mercury_prof_pvar_impl.c
${CMAKE_CURRENT_SOURCE_DIR}/proc_extra/mercury_string_object.c
)
set(MERCURY_HL_SRCS
Expand Down Expand Up @@ -240,6 +242,8 @@ set(MERCURY_HEADERS
${CMAKE_CURRENT_BINARY_DIR}/mercury_config.h
${CMAKE_CURRENT_SOURCE_DIR}/mercury.h
${CMAKE_CURRENT_SOURCE_DIR}/mercury_bulk.h
${CMAKE_CURRENT_SOURCE_DIR}/mercury_prof_interface.h
${CMAKE_CURRENT_SOURCE_DIR}/mercury_prof_types.h
${CMAKE_CURRENT_SOURCE_DIR}/mercury_core.h
${CMAKE_CURRENT_SOURCE_DIR}/mercury_core_header.h
${CMAKE_CURRENT_SOURCE_DIR}/mercury_core_types.h
Expand Down
16 changes: 16 additions & 0 deletions src/mercury.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
#include <string.h>
#include <assert.h>

/* PVAR profiling support */
#include "mercury_prof_pvar_impl.h"

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a general comment I think it would be good to see if/how we can avoid declaring global variables.

/****************/
/* Local Macros */
/****************/
Expand Down Expand Up @@ -987,6 +990,11 @@ HG_Init_opt(const char *na_info_string, hg_bool_t na_listen,
HG_Core_set_more_data_callback(hg_class->hg_class.core_class,
hg_more_data_cb, hg_more_data_free_cb);


/* Initialize PVAR profiling data structures */
int ret = hg_prof_pvar_init();
assert(ret == HG_SUCCESS);

return (hg_class_t *) hg_class;

error:
Expand All @@ -1011,6 +1019,9 @@ HG_Finalize(hg_class_t *hg_class)
hg_thread_spin_destroy(&private_class->register_lock);
free(private_class);

/* Finalize PVAR data structures */
hg_prof_pvar_finalize();

done:
return ret;
}
Expand Down Expand Up @@ -1883,6 +1894,8 @@ HG_Forward(hg_handle_t handle, hg_cb_t callback, void *arg, void *in_struct)
hg_uint8_t flags = 0;
hg_return_t ret = HG_SUCCESS;

HG_PROF_PVAR_UINT_COUNTER(hg_pvar_hg_forward_count);

HG_CHECK_ERROR(handle == HG_HANDLE_NULL, done, ret, HG_INVALID_ARG,
"NULL HG handle");

Expand Down Expand Up @@ -1918,6 +1931,9 @@ HG_Forward(hg_handle_t handle, hg_cb_t callback, void *arg, void *in_struct)
HG_CHECK_HG_ERROR(done, ret, "Could not forward call (%s)",
HG_Error_to_string(ret));

/* PVAR profiling support: Increment the value of the PVAR with the name "hg_pvar_hg_forward_count" */
HG_PROF_PVAR_COUNTER_INC(hg_pvar_hg_forward_count, 1);

done:
return ret;
}
Expand Down
281 changes: 281 additions & 0 deletions src/mercury_prof_interface.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,281 @@
/*
* Copyright (C) 2013-2020 Argonne National Laboratory, Department of Energy,
* UChicago Argonne, LLC and The HDF Group.
* All rights reserved.
*
* The full copyright notice, including terms governing use, modification,
* and redistribution, is contained in the COPYING file that can be
* found at the root of the source code distribution tree.
*/

#include "mercury.h"
#include "mercury_bulk.h"
#include "mercury_core.h"
#include "mercury_private.h"
#include "mercury_error.h"

#include "mercury_atomic.h"
#include "mercury_types.h"
#include "mercury_prof_interface.h"
#include "mercury_prof_pvar_impl.h"

#include <stdlib.h>
#include <string.h>
#include <assert.h>

/************************************/
/* Local Type and Struct Definition */
/************************************/

/* HG profiling PVAR handle object concrete definition */
struct hg_prof_pvar_handle {
hg_prof_class_t pvar_class; /* PVAR class */
hg_prof_datatype_t pvar_datatype; /* PVAR datatype */
hg_prof_bind_t pvar_bind; /* PVAR binding */
int continuous; /* is PVAR continuous or not */
void * addr; /* actual address of PVAR */
int is_started; /* if continuous, has PVAR been started? */
int count; /* number of values associated with PVAR */
char name[128]; /* PVAR name */
char description[128]; /* PVAR description */
};

/* HG profiling session object concrete definition */
struct hg_prof_pvar_session {
hg_prof_pvar_handle_t * pvar_handle_array; /* PVAR handle array */
int is_initialized; /* Is profiling initialized */
};

/* HG class */
struct hg_private_class {
struct hg_class hg_class; /* Must remain as first field */
int hg_prof_is_initialized; /* Is profiling initialized */
int num_pvars; /* No of PVARs currently supported */
hg_prof_pvar_session_t session; /* Is profiling initialized on the session */
};
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I am missing something but you should have a struct hg_prof_class here, not hg_private_class.


/*******************/
/* Local Variables */
/*******************/

/*---------------------------------------------------------------------------*/
static void
hg_prof_set_is_initialized(struct hg_private_class * hg_private_class)
{
hg_private_class->hg_prof_is_initialized = 1;
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a detail but instead of using 1 or 0, there are HG_TRUE and HG_FALSE macros


/*---------------------------------------------------------------------------*/
static int
hg_prof_get_is_initialized(struct hg_private_class * hg_private_class) {
return hg_private_class->hg_prof_is_initialized;
}

/*---------------------------------------------------------------------------*/
static void
hg_prof_set_session_is_initialized(struct hg_prof_pvar_session * session)
{
session->is_initialized = 1;
}

/*---------------------------------------------------------------------------*/
static int
hg_prof_get_session_is_initialized(struct hg_prof_pvar_session * session) {
return session->is_initialized;
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here, you could return an hg_bool_t instead.


/*---------------------------------------------------------------------------*/
hg_return_t
HG_Prof_init(hg_class_t *hg_class) {

struct hg_private_class *hg_private_class = (struct hg_private_class *)(hg_class);

hg_prof_set_is_initialized(hg_private_class);
hg_private_class->num_pvars = NUM_PVARS;
hg_private_class->session = NULL;

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you are allowed to do that here :) HG_Prof_init() should return a new hg_prof_class_t * so I would expect you to malloc a new struct hg_prof_class in that routine, fill it and return it.

return HG_SUCCESS;
}

/*---------------------------------------------------------------------------*/
hg_return_t
HG_Prof_finalize(hg_class_t *hg_class) {

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here instead it should take an hg_prof_class_t *

struct hg_private_class *hg_private_class = (struct hg_private_class *)(hg_class);

if(hg_prof_get_is_initialized(hg_private_class)) {
hg_prof_set_is_initialized(hg_private_class);
hg_private_class->session = NULL;
}

return HG_SUCCESS;
}

/*---------------------------------------------------------------------------*/
int
HG_Prof_pvar_get_num(hg_class_t *hg_class) {
struct hg_private_class *hg_private_class = (struct hg_private_class *)(hg_class);

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here and following routines

if(hg_prof_get_is_initialized(hg_private_class)) {
return hg_private_class->num_pvars;
} else {
return 0;
}
}

/*---------------------------------------------------------------------------*/
hg_return_t
HG_Prof_pvar_get_info(hg_class_t *hg_class, int pvar_index, char *name, int *name_len, hg_prof_class_t *var_class, hg_prof_datatype_t *datatype, char *desc, int *desc_len, hg_prof_bind_t *bind, int *continuous) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So that means hg_prof_class_t becomes hg_prof_var_class_t ?


struct hg_private_class *hg_private_class = (struct hg_private_class *)(hg_class);

if(!hg_prof_get_is_initialized(hg_private_class))
return HG_NA_ERROR;

assert(pvar_index < NUM_PVARS);

unsigned int key = pvar_index;
hg_prof_pvar_data_t * val;

/* Lookup the internal PVAR hash table to gather information about this PVAR */
val = hg_prof_pvar_table_lookup(key);
strcpy(name, (*val).name);
*name_len = strlen(name);
strcpy(desc, (*val).description);
*desc_len = strlen(desc);
*var_class = (*val).pvar_class;
*datatype = (*val).pvar_datatype;
*bind = (*val).pvar_bind;
*continuous = (*val).continuous;

return HG_SUCCESS;
}

/*---------------------------------------------------------------------------*/
hg_return_t
HG_Prof_pvar_session_create(hg_class_t *hg_class, hg_prof_pvar_session_t *session) {

struct hg_private_class *hg_private_class = (struct hg_private_class *)(hg_class);

if(!hg_prof_get_is_initialized(hg_private_class))
return HG_NA_ERROR;

(*session) = (hg_prof_pvar_session_t)malloc(sizeof(struct hg_prof_pvar_session));
(*session)->pvar_handle_array = (hg_prof_pvar_handle_t *)malloc(sizeof(hg_prof_pvar_handle_t)*NUM_PVARS);
hg_private_class->session = (*session);

hg_prof_set_session_is_initialized((*session));

return HG_SUCCESS;
}

/*---------------------------------------------------------------------------*/
hg_return_t
HG_Prof_pvar_session_destroy(hg_class_t *hg_class, hg_prof_pvar_session_t *session) {

struct hg_private_class *hg_private_class = (struct hg_private_class *)(hg_class);

if(!hg_prof_get_is_initialized(hg_private_class))
return HG_NA_ERROR;

free((*session)->pvar_handle_array);
free((*session));
hg_private_class->session = NULL;

return HG_SUCCESS;
}
/*---------------------------------------------------------------------------*/
hg_return_t
HG_Prof_pvar_handle_alloc(hg_prof_pvar_session_t session, int pvar_index, void *obj_handle, hg_prof_pvar_handle_t *handle, int *count) {

if(!hg_prof_get_session_is_initialized(session))
return HG_NA_ERROR;

/* Only supporting a null bind type at the moment */
assert(obj_handle == NULL);

struct hg_prof_pvar_session s = *session;
unsigned int key = pvar_index;
hg_prof_pvar_data_t * val;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please keep declarations at beginning of blocks


s.pvar_handle_array[pvar_index] = (hg_prof_pvar_handle_t)malloc(sizeof(struct hg_prof_pvar_handle));
val = hg_prof_pvar_table_lookup(key);

/* Copy out information from the internal PVAR hash table */
(*s.pvar_handle_array[pvar_index]).pvar_class = (*val).pvar_class;
(*s.pvar_handle_array[pvar_index]).pvar_datatype = (*val).pvar_datatype;
(*s.pvar_handle_array[pvar_index]).pvar_bind = (*val).pvar_bind;
(*s.pvar_handle_array[pvar_index]).continuous = (*val).continuous;
(*s.pvar_handle_array[pvar_index]).is_started = 0;
(*s.pvar_handle_array[pvar_index]).addr = (*val).addr;
if((*val).continuous)
(*s.pvar_handle_array[pvar_index]).is_started = 1;
strcpy((*s.pvar_handle_array[pvar_index]).name, (*val).name);
strcpy((*s.pvar_handle_array[pvar_index]).description, (*val).description);
*count = (*val).count;

/* Return handle */
*handle = s.pvar_handle_array[pvar_index];

return HG_SUCCESS;
}

/*---------------------------------------------------------------------------*/
hg_return_t
HG_Prof_pvar_handle_free(hg_prof_pvar_session_t session, int pvar_index, hg_prof_pvar_handle_t *handle) {

if(!hg_prof_get_session_is_initialized(session))
return HG_NA_ERROR;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that seems like the wrong error code :) HG_NA_ERROR means it's an NA layer error so maybe just HG_INVALID_ARG?



struct hg_prof_pvar_session s = *session;
assert(s.pvar_handle_array[pvar_index] == *handle);
free(s.pvar_handle_array[pvar_index]);
s.pvar_handle_array[pvar_index] = NULL;
*handle = NULL;

return HG_SUCCESS;
}

/*---------------------------------------------------------------------------*/
hg_return_t
HG_Prof_pvar_start(hg_prof_pvar_session_t session, hg_prof_pvar_handle_t handle) {
if(!hg_prof_get_session_is_initialized(session))
return HG_NA_ERROR;

if (!(*handle).continuous && !((*handle).is_started))
(*handle).is_started = 1;
return HG_SUCCESS;
}

/*---------------------------------------------------------------------------*/
hg_return_t
HG_Prof_pvar_stop(hg_prof_pvar_session_t session, hg_prof_pvar_handle_t handle) {
if(!hg_prof_get_session_is_initialized(session))
return HG_NA_ERROR;

if (!(*handle).continuous && ((*handle).is_started))
(*handle).is_started = 0;
return HG_SUCCESS;
}

/*---------------------------------------------------------------------------*/
hg_return_t
HG_Prof_pvar_read(hg_prof_pvar_session_t session, hg_prof_pvar_handle_t handle, void *buf) {
if(!hg_prof_get_session_is_initialized(session))
return HG_NA_ERROR;

/* Assert first that handle belongs to the session provided. NOT DOING THIS HERE FOR NOW */
struct hg_prof_pvar_handle h = (*handle);
switch(h.pvar_datatype) {
case HG_UINT:
/*for(int i = 0; i < h.count; h++)*/ /* Need to support PVAR arrays, just a placeholder that assumes PVAR count is 1 */
*((unsigned int *)buf) = *((unsigned int *)h.addr);
break;
case HG_DOUBLE:
*((double *)buf) = *((double *)h.addr);
break;
}
return HG_SUCCESS;
}
/*---------------------------------------------------------------------------*/
Loading