Skip to content

Commit

Permalink
Distribute fixed width data (#3240)
Browse files Browse the repository at this point in the history
* Send fixed-width topology, not AdjacencyList

* Count owned size

* Improve docs make shape explicit

* Fix typo
  • Loading branch information
chrisrichardson authored Jun 2, 2024
1 parent 73238da commit bd5ba2c
Show file tree
Hide file tree
Showing 3 changed files with 209 additions and 18 deletions.
167 changes: 167 additions & 0 deletions cpp/dolfinx/graph/partition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,173 @@ graph::build::distribute(MPI_Comm comm,
global_indices0, ghost_index_owner};
}
//-----------------------------------------------------------------------------
std::tuple<std::vector<std::int64_t>, std::vector<std::int64_t>,
std::vector<int>>
graph::build::distribute(MPI_Comm comm, std::span<const std::int64_t> list,
std::array<std::size_t, 2> shape,
const graph::AdjacencyList<std::int32_t>& destinations)
{
common::Timer timer("Distribute fixed size nodes to destination ranks");

assert(list.size() == shape[0] * shape[1]);
assert(destinations.num_nodes() == (std::int32_t)shape[0]);
int rank = dolfinx::MPI::rank(comm);
std::int64_t num_owned = destinations.num_nodes();

// Get global offset for converting local index to global index for
// nodes in 'list'
std::int64_t offset_global = 0;
MPI_Exscan(&num_owned, &offset_global, 1, MPI_INT64_T, MPI_SUM, comm);

// Buffer size (max number of edges + 2 for owning rank,
// and node global index)
const std::size_t buffer_shape1 = shape[1] + 2;

// Build (dest, index, owning rank) list and sort
std::vector<std::array<int, 3>> dest_to_index;
dest_to_index.reserve(destinations.array().size());
for (std::int32_t i = 0; i < destinations.num_nodes(); ++i)
{
auto di = destinations.links(i);
for (auto d : di)
dest_to_index.push_back({d, i, di[0]});
}
std::sort(dest_to_index.begin(), dest_to_index.end());

// Build list of unique dest ranks and count number of rows to send to
// each dest (by neighbourhood rank)
std::vector<int> dest;
std::vector<std::int32_t> num_items_per_dest;
{
auto it = dest_to_index.begin();
while (it != dest_to_index.end())
{
// Store global rank and find iterator to next global rank
dest.push_back((*it)[0]);
auto it1
= std::find_if(it, dest_to_index.end(),
[r = dest.back()](auto& idx) { return idx[0] != r; });

// Store number of items for current rank
num_items_per_dest.push_back(std::distance(it, it1));

// Advance iterator
it = it1;
}
}

// Determine source ranks. Sort ranks to make distribution
// deterministic.
std::vector<int> src = dolfinx::MPI::compute_graph_edges_nbx(comm, dest);
std::sort(src.begin(), src.end());

// Create neighbourhood communicator
MPI_Comm neigh_comm;
MPI_Dist_graph_create_adjacent(comm, src.size(), src.data(), MPI_UNWEIGHTED,
dest.size(), dest.data(), MPI_UNWEIGHTED,
MPI_INFO_NULL, false, &neigh_comm);

// Send number of nodes to receivers
std::vector<int> num_items_recv(src.size());
num_items_per_dest.reserve(1);
num_items_recv.reserve(1);
MPI_Request request_size;
MPI_Ineighbor_alltoall(num_items_per_dest.data(), 1, MPI_INT,
num_items_recv.data(), 1, MPI_INT, neigh_comm,
&request_size);

// Compute send displacements
std::vector<std::int32_t> send_disp(num_items_per_dest.size() + 1, 0);
std::partial_sum(num_items_per_dest.begin(), num_items_per_dest.end(),
std::next(send_disp.begin()));

// Pack send buffer
std::vector<std::int64_t> send_buffer(buffer_shape1 * send_disp.back(), -1);
{
assert(send_disp.back() == (std::int32_t)dest_to_index.size());
for (std::size_t i = 0; i < dest_to_index.size(); ++i)
{
const std::array<int, 3>& dest_data = dest_to_index[i];
const std::size_t pos = dest_data[1];

std::span b(send_buffer.data() + i * buffer_shape1, buffer_shape1);
std::span row(list.data() + pos * shape[1], shape[1]);
std::copy(row.begin(), row.end(), b.begin());

auto info = b.last(2);
info[0] = dest_data[2]; // Owning rank
info[1] = pos + offset_global; // Original global index
}
}

// Prepare receive displacement
MPI_Wait(&request_size, MPI_STATUS_IGNORE);
std::vector<std::int32_t> recv_disp(num_items_recv.size() + 1, 0);
std::partial_sum(num_items_recv.begin(), num_items_recv.end(),
std::next(recv_disp.begin()));

// Send/receive data facet
MPI_Datatype compound_type;
MPI_Type_contiguous(buffer_shape1, MPI_INT64_T, &compound_type);
MPI_Type_commit(&compound_type);
std::vector<std::int64_t> recv_buffer(buffer_shape1 * recv_disp.back());
MPI_Neighbor_alltoallv(send_buffer.data(), num_items_per_dest.data(),
send_disp.data(), compound_type, recv_buffer.data(),
num_items_recv.data(), recv_disp.data(), compound_type,
neigh_comm);
MPI_Type_free(&compound_type);
MPI_Comm_free(&neigh_comm);

spdlog::debug("Received {} data on {} [{}]", recv_disp.back(), rank,
shape[1]);

// Count number of owned entries
std::int32_t num_owned_r = 0;
for (std::int32_t i = 0; i < recv_disp.back(); ++i)
{
std::span row(recv_buffer.data() + i * buffer_shape1, buffer_shape1);
auto info = row.last(2);
int owner = info[0];
if (owner == rank)
num_owned_r++;
}

// Unpack receive buffer
std::vector<std::int64_t> data(shape[1] * recv_disp.back());
std::vector<std::int64_t> global_indices(recv_disp.back());
std::vector<int> ghost_index_owner(recv_disp.back() - num_owned_r);

std::int32_t i_owned = 0;
std::int32_t i_ghost = 0;
for (std::int32_t i = 0; i < recv_disp.back(); ++i)
{
std::span row(recv_buffer.data() + i * buffer_shape1, buffer_shape1);
auto info = row.last(2);
int owner = info[0];
std::int64_t orig_global_index = info[1];
auto edges = row.first(shape[1]);
if (owner == rank)
{
std::copy(edges.begin(), edges.end(),
std::next(data.begin(), i_owned * shape[1]));
global_indices[i_owned] = orig_global_index;
++i_owned;
}
else
{
std::copy(edges.begin(), edges.end(),
std::next(data.begin(), (i_ghost + num_owned_r) * shape[1]));
global_indices[i_ghost + num_owned_r] = orig_global_index;
ghost_index_owner[i_ghost] = owner;
++i_ghost;
}
}
assert(i_owned == num_owned_r);

spdlog::debug("data.size = {}", data.size());
return {data, global_indices, ghost_index_owner};
}
//-----------------------------------------------------------------------------
std::vector<std::int64_t>
graph::build::compute_ghost_indices(MPI_Comm comm,
std::span<const std::int64_t> owned_indices,
Expand Down
20 changes: 20 additions & 0 deletions cpp/dolfinx/graph/partition.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,26 @@ std::tuple<graph::AdjacencyList<std::int64_t>, std::vector<int>,
distribute(MPI_Comm comm, const graph::AdjacencyList<std::int64_t>& list,
const graph::AdjacencyList<std::int32_t>& destinations);

/// @brief Distribute fixed size nodes to destination ranks.
///
/// The global index of each node is assumed to be the local index plus
/// the offset for this rank.
///
/// @param[in] comm MPI Communicator
/// @param[in] list A flattened 2D row major array
/// @param[in] shape The shape of the array
/// @param[in] destinations Destination ranks for the ith row of the
/// array. The first rank is the 'owner' of the node.
/// @return
/// 1. Received list for this process
/// 2. Original global index for each node
/// 3. Owner rank of ghost nodes
std::tuple<std::vector<std::int64_t>, std::vector<std::int64_t>,
std::vector<int>>
distribute(MPI_Comm comm, std::span<const std::int64_t> list,
std::array<std::size_t, 2> shape,
const graph::AdjacencyList<std::int32_t>& destinations);

/// @brief Take a set of distributed input global indices, including
/// ghosts, and determine the new global indices after remapping.
///
Expand Down
40 changes: 22 additions & 18 deletions cpp/dolfinx/mesh/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -763,7 +763,7 @@ Mesh<typename std::remove_reference_t<typename U::value_type>> create_mesh(
const fem::ElementDofLayout doflayout = element.create_dof_layout();

const int num_cell_vertices = mesh::num_cell_vertices(element.cell_shape());
const int num_cell_nodes = doflayout.num_dofs();
std::size_t num_cell_nodes = doflayout.num_dofs();

// Note: `extract_topology` extracts topology data, i.e. just the
// vertices. For P1 geometry this should just be the identity
Expand All @@ -773,12 +773,12 @@ Mesh<typename std::remove_reference_t<typename U::value_type>> create_mesh(
// `extract_topology` could be skipped for 'P1 geometry' elements

// -- Partition topology across ranks of comm
graph::AdjacencyList<std::int64_t> cells1(0);
// std::vector<std::int64_t> cells1;
std::vector<std::int64_t> cells1;
std::vector<std::int64_t> original_idx1;
std::vector<int> ghost_owners;
if (partitioner)
{
spdlog::info("Using partitioner with {} cell data", cells.size());
graph::AdjacencyList<std::int32_t> dest(0);
if (commt != MPI_COMM_NULL)
{
Expand All @@ -789,26 +789,29 @@ Mesh<typename std::remove_reference_t<typename U::value_type>> create_mesh(

// Distribute cells (topology, includes higher-order 'nodes') to
// destination rank
std::vector<int> src;
auto _cells = graph::regular_adjacency_list(
std::vector(cells.begin(), cells.end()), num_cell_nodes);
std::tie(cells1, src, original_idx1, ghost_owners)
= graph::build::distribute(comm, _cells, dest);
assert(cells.size() % num_cell_nodes == 0);
std::size_t num_cells = cells.size() / num_cell_nodes;
std::tie(cells1, original_idx1, ghost_owners) = graph::build::distribute(
comm, cells, {num_cells, num_cell_nodes}, dest);
spdlog::debug("Got {} cells from distribution", cells1.size());
}
else
{
cells1 = graph::regular_adjacency_list(
std::vector(cells.begin(), cells.end()), num_cell_nodes);
std::int64_t offset(0), num_owned(cells1.num_nodes());
cells1 = std::vector<std::int64_t>(cells.begin(), cells.end());
assert(cells1.size() % num_cell_nodes == 0);
std::int64_t offset = 0;
std::int64_t num_owned = cells1.size() / num_cell_nodes;
MPI_Exscan(&num_owned, &offset, 1, MPI_INT64_T, MPI_SUM, comm);
original_idx1.resize(cells1.num_nodes());
original_idx1.resize(num_owned);
std::iota(original_idx1.begin(), original_idx1.end(), offset);
}

// Extract cell 'topology', i.e. extract the vertices for each cell
// and discard any 'higher-order' nodes
std::vector<std::int64_t> cells1_v
= extract_topology(celltype, doflayout, cells1.array());
= extract_topology(celltype, doflayout, cells1);
spdlog::info("Extract basic topology: {}->{}", cells1.size(),
cells1_v.size());

// Build local dual graph for owned cells to (i) get list of vertices
// on the process boundary and (ii) apply re-ordering to cells for
Expand All @@ -820,6 +823,7 @@ Mesh<typename std::remove_reference_t<typename U::value_type>> create_mesh(
std::vector<std::int32_t> cell_offsets(num_owned_cells + 1, 0);
for (std::size_t i = 1; i < cell_offsets.size(); ++i)
cell_offsets[i] = cell_offsets[i - 1] + num_cell_vertices;
spdlog::info("Build local dual graph");
auto [graph, unmatched_facets, max_v, facet_attached_cells]
= build_local_dual_graph(
std::vector{celltype},
Expand All @@ -835,8 +839,8 @@ Mesh<typename std::remove_reference_t<typename U::value_type>> create_mesh(
std::next(_original_idx.begin(), num_owned_cells));
impl::reorder_list(
std::span(cells1_v.data(), remap.size() * num_cell_vertices), remap);
impl::reorder_list(
std::span(cells1.array().data(), remap.size() * num_cell_nodes), remap);
impl::reorder_list(std::span(cells1.data(), remap.size() * num_cell_nodes),
remap);
original_idx1 = _original_idx;

// Boundary vertices are marked as 'unknown'
Expand Down Expand Up @@ -865,15 +869,15 @@ Mesh<typename std::remove_reference_t<typename U::value_type>> create_mesh(

// Build list of unique (global) node indices from cells1 and
// distribute coordinate data
std::vector<std::int64_t> nodes1 = cells1.array();
std::vector<std::int64_t> nodes1 = cells1;
dolfinx::radix_sort(std::span(nodes1));
nodes1.erase(std::unique(nodes1.begin(), nodes1.end()), nodes1.end());
std::vector coords
= dolfinx::MPI::distribute_data(comm, nodes1, commg, x, xshape[1]);

// Create geometry object
Geometry geometry = create_geometry(topology, element, nodes1, cells1.array(),
coords, xshape[1]);
Geometry geometry
= create_geometry(topology, element, nodes1, cells1, coords, xshape[1]);

return Mesh(comm, std::make_shared<Topology>(std::move(topology)),
std::move(geometry));
Expand Down

0 comments on commit bd5ba2c

Please sign in to comment.