Skip to content

Commit

Permalink
Merge pull request #4106 from rhc54/topic/hwloc
Browse files Browse the repository at this point in the history
Add diagnostics for hwloc get_topology
  • Loading branch information
Ralph Castain authored Aug 16, 2017
2 parents cd8db53 + 41df973 commit 1f799af
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 9 deletions.
30 changes: 21 additions & 9 deletions opal/mca/hwloc/base/hwloc_base_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -277,8 +277,8 @@ int opal_hwloc_base_get_topology(void)
char *shmemfile;
#endif

OPAL_OUTPUT_VERBOSE((2, opal_hwloc_base_framework.framework_output,
"hwloc:base:get_topology"));
opal_output_verbose(2, opal_hwloc_base_framework.framework_output,
"hwloc:base:get_topology");

/* see if we already have it */
if (NULL != opal_hwloc_topology) {
Expand All @@ -289,8 +289,8 @@ int opal_hwloc_base_get_topology(void)

if (NULL != opal_pmix.get) {
#if HWLOC_API_VERSION >= 0x20000
OPAL_OUTPUT_VERBOSE((2, opal_hwloc_base_framework.framework_output,
"hwloc:base: looking for topology in shared memory"));
opal_output_verbose(2, opal_hwloc_base_framework.framework_output,
"hwloc:base: looking for topology in shared memory");

/* first try to get the shmem link, if available */
aptr = &addr;
Expand All @@ -304,30 +304,36 @@ int opal_hwloc_base_get_topology(void)
if (OPAL_SUCCESS == rc && OPAL_SUCCESS == rc2 && OPAL_SUCCESS == rc3) {
if (0 > (fd = open(shmemfile, O_RDONLY))) {
free(shmemfile);
return OPAL_ERROR;
OPAL_ERROR_LOG(OPAL_ERR_FILE_OPEN_FAILURE)
return OPAL_ERR_FILE_OPEN_FAILURE;
}
free(shmemfile);
if (0 != hwloc_shmem_topology_adopt(&opal_hwloc_topology, fd,
0, (void*)addr, size, 0)) {
return OPAL_ERROR;
OPAL_ERROR_LOG(OPAL_ERR_FILE_READ_FAILURE);
return OPAL_ERR_FILE_READ_FAILURE;
}
OPAL_OUTPUT_VERBOSE((2, opal_hwloc_base_framework.framework_output,
"hwloc:base: topology in shared memory"));
opal_output_verbose(2, opal_hwloc_base_framework.framework_output,
"hwloc:base: topology in shared memory");
topo_in_shmem = true;
return OPAL_SUCCESS;
}
#endif
/* if that isn't available, then try to retrieve
* the xml representation from the PMIx data store */
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
"hwloc:base instantiating topology");
"hwloc:base getting topology XML string");
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_LOCAL_TOPO,
&wildcard_rank, &val, OPAL_STRING);
} else {
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
"hwloc:base PMIx not available");
rc = OPAL_ERR_NOT_SUPPORTED;
}

if (OPAL_SUCCESS == rc && NULL != val) {
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
"hwloc:base loading topology from XML");
/* load the topology */
if (0 != hwloc_topology_init(&opal_hwloc_topology)) {
free(val);
Expand Down Expand Up @@ -361,9 +367,12 @@ int opal_hwloc_base_get_topology(void)
return rc;
}
} else if (NULL == opal_hwloc_base_topo_file) {
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
"hwloc:base discovering topology");
if (0 != hwloc_topology_init(&opal_hwloc_topology) ||
0 != opal_hwloc_base_topology_set_flags(opal_hwloc_topology, 0, true) ||
0 != hwloc_topology_load(opal_hwloc_topology)) {
OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED);
return OPAL_ERR_NOT_SUPPORTED;
}
/* filter the cpus thru any default cpu set */
Expand All @@ -372,6 +381,9 @@ int opal_hwloc_base_get_topology(void)
return rc;
}
} else {
opal_output_verbose(1, opal_hwloc_base_framework.framework_output,
"hwloc:base loading topology from file %s",
opal_hwloc_base_topo_file);
if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology(opal_hwloc_base_topo_file))) {
return rc;
}
Expand Down
4 changes: 4 additions & 0 deletions orte/test/mpi/hello.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,15 @@ int main(int argc, char* argv[])
int rank, size, rc;
hwloc_cpuset_t cpus;
char *bindings = NULL;
pid_t pid;

MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
pid = getpid();

printf("[%lu] Rank %d: getting topology\n", (unsigned long)pid, rank);
fflush(stdout);
if (OPAL_SUCCESS == opal_hwloc_base_get_topology()) {
cpus = hwloc_bitmap_alloc();
rc = hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS);
Expand Down

0 comments on commit 1f799af

Please sign in to comment.