Skip to content

Commit

Permalink
Merge pull request #466 from lattice/feature/fast-qmp-p2p-setup
Browse files Browse the repository at this point in the history
QMP peer-to-peer initialization now uses MPI_Allgather
  • Loading branch information
mathiaswagner committed May 4, 2016
2 parents 6b33be0 + 175ccd3 commit fa3164e
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 42 deletions.
3 changes: 2 additions & 1 deletion include/comm_quda.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,9 @@ extern "C" {

/**
Enabled peer-to-peer communication.
@param hostname_buf Array that holds all process hostnames
*/
void comm_peer2peer_init();
void comm_peer2peer_init(const char *hostname_recv_buf);

/**
Query if peer-to-peer communication is enabled
Expand Down
15 changes: 7 additions & 8 deletions lib/comm_mpi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ void comm_init(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *m
gpuid++;
}
}
host_free(hostname_recv_buf);

int device_count;
cudaGetDeviceCount(&device_count);
Expand All @@ -92,9 +91,13 @@ void comm_init(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *m
if (gpuid >= device_count) {
errorQuda("Too few GPUs available on %s", hostname);
}

comm_peer2peer_init(hostname_recv_buf);

host_free(hostname_recv_buf);
}

void comm_peer2peer_init()
void comm_peer2peer_init(const char* hostname_recv_buf)
{
if (peer2peer_init) return;

Expand All @@ -115,12 +118,9 @@ void comm_peer2peer_init()
comm_set_neighbor_ranks();

char *hostname = comm_hostname();
char *hostname_recv_buf = (char *)safe_malloc(128*size);

int *gpuid_recv_buf = (int *)safe_malloc(sizeof(int)*size);

MPI_CHECK( MPI_Allgather(hostname, 128, MPI_CHAR, hostname_recv_buf, 128, MPI_CHAR, MPI_COMM_WORLD) );

// There are more efficient ways to do the following,
// but it doesn't really matter since this function should be
// called just once.
Expand All @@ -140,14 +140,13 @@ void comm_peer2peer_init()
if(canAccessPeer[0]*canAccessPeer[1]){
peer2peer_enabled[dir][dim] = true;
if (getVerbosity() > QUDA_SILENT)
printf("Peer-to-peer enabled for rank %d with neighbor %d dir=%d, dim=%d\n",
rank, neighbor_rank, dir, dim);
printf("Peer-to-peer enabled for rank %d gpu=%d with neighbor %d gpu=%d dir=%d, dim=%d\n",
comm_rank(), gpuid, neighbor_rank, neighbor_gpuid, dir, dim);
}
} // on the same node
} // different dimensions - x, y, z, t
} // different directions - forward/backward

host_free(hostname_recv_buf);
host_free(gpuid_recv_buf);
}

Expand Down
58 changes: 30 additions & 28 deletions lib/comm_qmp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,52 +19,51 @@ static bool peer2peer_enabled[2][4] = { {false,false,false,false},
{false,false,false,false} };
static bool peer2peer_init = false;

// While we can emulate an all-gather using QMP reductions, this
// scales horribly as the number of nodes increases, so for
// performance we just call MPI directly
#define USE_MPI_GATHER

// this is a work around (in the absence of C++11) to do a compile
// time check that the size of float and int are the same. Since we
// are reinterpretting a float as an int, this property is required.
template <typename A, typename B>
inline void static_assert_equal_size()
{
typedef char sizeof_float_must_equal_sizeof_int[sizeof(A) == sizeof(B) ? 1 : -1];
(void) sizeof(sizeof_float_must_equal_sizeof_int);
}

#ifdef USE_MPI_GATHER
#include <mpi.h>
#endif

void get_hostnames(char *hostname_recv_buf) {
// determine which GPU this rank will use
char *hostname = comm_hostname();

#ifdef USE_MPI_GATHER
MPI_Allgather(hostname, 128, MPI_CHAR, hostname_recv_buf, 128, MPI_CHAR, MPI_COMM_WORLD);
#else
// Abuse reductions to emulate all-gather. We need to copy the
// local hostname to all other nodes
// this isn't very scalable though
for (int i=0; i<comm_size(); i++) {
int data[128];
for (int j=0; j<128; j++) {
data[j] = (i == comm_rank()) ? hostname[j] : 0;
}

static_assert_equal_size<float,int>();
QMP_sum_float_array(reinterpret_cast<float*>(&data), 128);

for (int j=0; j<128; j++) {
QMP_sum_int(data+j);
hostname_recv_buf[i*128 + j] = data[j];
}
}
#endif

}


void get_gpuid(int *gpuid_recv_buf) {

#ifdef USE_MPI_GATHER
MPI_Allgather(&gpuid, 1, MPI_INT, gpuid_recv_buf, 1, MPI_INT, MPI_COMM_WORLD);
#else
// Abuse reductions to emulate all-gather. We need to copy the
// local hostname to all other nodes
for (int i=0; i<comm_size(); i++) {
int data = (i == comm_rank()) ? gpuid : 0;

static_assert_equal_size<float,int>();
QMP_sum_float_array(reinterpret_cast<float*>(&data), 1);

QMP_sum_int(&data);
gpuid_recv_buf[i] = data;
}
#endif
}


Expand Down Expand Up @@ -96,19 +95,23 @@ void comm_init(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *m
gpuid++;
}
}
host_free(hostname_recv_buf);

int device_count;
cudaGetDeviceCount(&device_count);
if (device_count == 0) {
errorQuda("No CUDA devices found");
}
if (gpuid >= device_count) {
errorQuda("Too few GPUs available on %s", comm_hostname());
}

gpuid = (comm_rank() % device_count);
comm_peer2peer_init(hostname_recv_buf);

host_free(hostname_recv_buf);
}


void comm_peer2peer_init()
void comm_peer2peer_init(const char* hostname_recv_buf)
{
if (peer2peer_init) return;

Expand All @@ -130,10 +133,8 @@ void comm_peer2peer_init()
comm_set_neighbor_ranks();

char *hostname = comm_hostname();
char *hostname_recv_buf = (char *)safe_malloc(128*comm_size());
int *gpuid_recv_buf = (int *)safe_malloc(sizeof(int)*comm_size());

get_hostnames(hostname_recv_buf);
get_gpuid(gpuid_recv_buf);

for(int dir=0; dir<2; ++dir){ // forward/backward directions
Expand All @@ -150,18 +151,19 @@ void comm_peer2peer_init()
if(canAccessPeer[0]*canAccessPeer[1]){
peer2peer_enabled[dir][dim] = true;
if (getVerbosity() > QUDA_SILENT)
printf("Peer-to-peer enabled for rank %d with neighbor %d dir=%d, dim=%d\n",
comm_rank(), neighbor_rank, dir, dim);
printf("Peer-to-peer enabled for rank %d gpu=%d with neighbor %d gpu=%d dir=%d, dim=%d\n",
comm_rank(), gpuid, neighbor_rank, neighbor_gpuid, dir, dim);
}
} // on the same node
} // different dimensions - x, y, z, t
} // different directions - forward/backward

host_free(hostname_recv_buf);
host_free(gpuid_recv_buf);
}

peer2peer_init = true;

checkCudaError();
return;
}

Expand Down
2 changes: 1 addition & 1 deletion lib/comm_single.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ void comm_init(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *m
comm_set_default_topology(topo);
}

void comm_peer2peer_init() {}
void comm_peer2peer_init(const char *hostname_buf) {}

bool comm_peer2peer_enabled(int die, int dim) { return false; }

Expand Down
4 changes: 0 additions & 4 deletions lib/cuda_color_spinor_field.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1205,10 +1205,6 @@ namespace quda {
if (!initComms) errorQuda("Can only be called after create comms");
if (!ghost_field) errorQuda("ghost_field appears not to be allocated");

comm_peer2peer_init();

checkCudaError();

// handles for obtained ghost pointers
cudaIpcMemHandle_t ipcRemoteGhostDestHandle[2][QUDA_MAX_DIM];

Expand Down

0 comments on commit fa3164e

Please sign in to comment.