Skip to content

Commit

Permalink
fixup!
Browse files Browse the repository at this point in the history
- remove leftover sleep
- fix vector communication
  • Loading branch information
greole committed Dec 10, 2024
1 parent acca0d2 commit 05b2067
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 19 deletions.
35 changes: 20 additions & 15 deletions include/OGL/DevicePersistent/Vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,24 +51,28 @@ struct VectorInitFunctor {
void update(std::shared_ptr<gko::experimental::distributed::Vector<T>>
persistent_vector) const
{
auto repartitioner = dist_matrix_->get_repartitioner();
auto host_size = repartitioner->get_orig_size();
auto repart_size = repartitioner->get_repart_size();
word msg{"updating array " + name_ + " of host size " +
std::to_string(host_size) + " repartitioned size " +
std::to_string(repart_size)};
LOG_1(verbose_, msg)
auto re_init_vec = init();
persistent_vector.swap(re_init_vec);

auto ref_exec = exec_.get_ref_exec();
auto host_view = gko::array<T>::const_view(ref_exec, host_size, other_);

// TODO store
auto comm_pattern = compute_gather_to_owner_counts(
exec_, repartitioner->get_ranks_per_gpu(), host_size);
bool host_buffer = exec_.get_gko_force_host_buffer();
//auto repartitioner = dist_matrix_->get_repartitioner();
//auto host_size = repartitioner->get_orig_size();
//auto repart_size = repartitioner->get_repart_size();
//word msg{"updating array " + name_ + " of host size " +
// std::to_string(host_size) + " repartitioned size " +
// std::to_string(repart_size)};
//LOG_1(verbose_, msg)

communicate_values(exec_, comm_pattern, host_view.get_const_data(),
persistent_vector->get_local_values());
//auto ref_exec = exec_.get_ref_exec();
//auto host_view = gko::array<T>::const_view(ref_exec, host_size, other_);

//// TODO store
//auto comm_pattern = compute_gather_to_owner_counts(
// exec_, repartitioner->get_ranks_per_gpu(), host_size);
//bool host_buffer = exec_.get_gko_force_host_buffer();

//communicate_values(exec_, comm_pattern, host_view.get_const_data(),
// persistent_vector->get_local_values());
}

std::shared_ptr<gko::experimental::distributed::Vector<T>> init() const
Expand Down Expand Up @@ -171,6 +175,7 @@ class PersistentVector
void copy_back()
{
auto exec = exec_.get_device_exec();
auto rank = exec_.get_rank();
auto ref_exec = exec_.get_ref_exec();
auto comm = exec_.get_communicator();
bool host_buffer = exec_.get_gko_force_host_buffer();
Expand Down
5 changes: 3 additions & 2 deletions src/CommunicationPattern.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,15 +186,16 @@ void communicate_values(
target_exec, recv_buffer_size, recv_buffer);
recv_view = tmp;
}
// src_exec is not host
// src_exec is device
// copy to host first then communicate
if (src_exec != src_exec->get_master()) {
label send_size = comm_pattern.send_offsets.back();
auto send_view = gko::array<scalar>::const_view(src_exec, send_size,
send_buffer);
auto tmp = gko::array<scalar>(src_exec->get_master(), send_size);
auto tmp = gko::array<scalar>(src_exec, send_size);

tmp = send_view;
tmp.set_executor(target_exec);

comm->all_to_all_v(src_exec, tmp.get_const_data(),
comm_pattern.send_counts.data(),
Expand Down
2 changes: 0 additions & 2 deletions test/unit/CommunicationPattern.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ extern char **my_argv;
int my_argc;
char **my_argv;

straneoioen


class CommunicationPatternEnvironment : public testing::Environment {
public:
Expand Down

0 comments on commit 05b2067

Please sign in to comment.