From 718ee2257420be707f43c1d5b3fe324ca5799ddd Mon Sep 17 00:00:00 2001 From: Krishna Gadepalli Date: Wed, 14 Jul 2021 20:21:21 -0700 Subject: [PATCH 1/5] Initial support for fields::dump and fields::load to save/restore fields object state --- python/simulation.py | 25 ++++-- python/tests/test_simulation.py | 11 ++- src/meep.hpp | 144 +++++++------------------------- 3 files changed, 56 insertions(+), 124 deletions(-) diff --git a/python/simulation.py b/python/simulation.py index d8171729f..214e282a5 100644 --- a/python/simulation.py +++ b/python/simulation.py @@ -1888,6 +1888,23 @@ def load_chunk_layout(self, br, source): ## source is either filename (string) self.structure.load_chunk_layout(source, br) + def dump_fields(self, fname): + """ + Dumps the fields to the file `fname`. + """ + if self.fields is None: + raise ValueError("Fields must be initialized before calling dump_fields") + self.fields.dump(fname) + + def load_fields(self, fname): + """ + Loads fields from the file `fname`. A file name to load can also be passed to + the `Simulation` constructor via the `load_fields` keyword argument. + """ + if self.fields is None: + raise ValueError("Fields must be initialized before calling load_fields") + self.fields.load(fname) + def init_sim(self): if self._is_initialized: return @@ -2446,9 +2463,6 @@ def output_dft(self, dft_fields, fname): if self.fields is None: self.init_sim() - if not self.dft_objects: - raise RuntimeError('DFT monitor dft_fields must be initialized before calling output_dft') - if hasattr(dft_fields, 'swigobj'): dft_fields_swigobj = dft_fields.swigobj else: @@ -3231,9 +3245,6 @@ def get_dft_array(self, dft_obj, component, num_freq): where `nfreq` is the number of frequencies stored in `dft_obj` as set by the `nfreq` parameter to `add_dft_fields`, `add_flux`, etc. """ - if not self.dft_objects: - raise RuntimeError('DFT monitor dft_obj must be initialized before calling get_dft_array') - if hasattr(dft_obj, 'swigobj'): dft_swigobj = dft_obj.swigobj else: @@ -5224,7 +5235,7 @@ def __init__(self, data=None, split_dir=None, split_pos=None, left=None, right=N self.right = right else: self.proc_id = proc_id - + def print(self): """Pretty-prints the tree structure of the BinaryPartition object.""" print(str(self) + " with {} chunks:".format(self.numchunks())) diff --git a/python/tests/test_simulation.py b/python/tests/test_simulation.py index 5cf5e5201..bbb6d303d 100644 --- a/python/tests/test_simulation.py +++ b/python/tests/test_simulation.py @@ -23,11 +23,13 @@ def setUp(self): @classmethod def setUpClass(cls): - cls.temp_dir = mp.make_output_directory() + # cls.temp_dir = mp.make_output_directory() + cls.temp_dir = "/tmp/test_simulation" @classmethod def tearDownClass(cls): - mp.delete_directory(cls.temp_dir) + # mp.delete_directory(cls.temp_dir) + pass def test_interpolate_numbers(self): @@ -203,6 +205,7 @@ def test_in_point(self): fn = os.path.join(self.temp_dir, 'test_in_point-ez-000200.00.h5') self.assertTrue(os.path.exists(fn)) + @unittest.skip('References unavailable data files') def test_epsilon_input_file(self): sim = self.init_simple_simulation() eps_input_fname = 'cyl-ellipsoid-eps-ref.h5' @@ -222,6 +225,7 @@ def test_epsilon_input_file(self): fp = sim.get_field_point(mp.Ez, mp.Vector3(x=1)) self.assertAlmostEqual(fp, -0.002989654055823199 + 0j) + @unittest.skip('References unavailable data files') def test_numpy_epsilon(self): sim = self.init_simple_simulation() eps_input_fname = 'cyl-ellipsoid-eps-ref.h5' @@ -336,6 +340,9 @@ def get_ref_field_point(sim): if chunk_sim: chunk_layout = sim1 + fields_dump_fn = os.path.join(self.temp_dir, 'test_load_dump_fields.h5') + sim1.dump_fields(fields_dump_fn) + sim = mp.Simulation(resolution=resolution, cell_size=cell, boundary_layers=pml_layers, diff --git a/src/meep.hpp b/src/meep.hpp index a01b42f09..5da3863eb 100644 --- a/src/meep.hpp +++ b/src/meep.hpp @@ -17,10 +17,7 @@ #ifndef MEEP_H #define MEEP_H -#include -#include #include -#include #include #include #include @@ -714,81 +711,6 @@ boundary_region pml(double thickness, double Rasymptotic = 1e-15, double mean_st class binary_partition; -enum in_or_out { Incoming = 0, Outgoing }; -constexpr std::initializer_list all_in_or_out{Incoming, Outgoing}; - -enum connect_phase { CONNECT_PHASE = 0, CONNECT_NEGATE = 1, CONNECT_COPY = 2 }; -constexpr std::initializer_list all_connect_phases{ - CONNECT_PHASE, CONNECT_NEGATE, CONNECT_COPY}; -constexpr int NUM_CONNECT_PHASE_TYPES = 3; - -// Communication pair(i, j) implies data being sent from i to j. -using chunk_pair = std::pair; - -// Key for the map that stored communication sizes. -struct comms_key { - field_type ft; - connect_phase phase; - chunk_pair pair; -}; - -// Defined in fields.cpp -bool operator==(const comms_key &lhs, const comms_key &rhs); - -class comms_key_hash_fn { - -public: - inline std::size_t operator()(const comms_key &key) const { - // Unroll hash combination to promote the generatiion of efficient code. - std::size_t ret = ft_hasher(key.ft); - ret ^= connect_phase_hasher(key.phase) + kHashAddConst + (ret << 6) + (ret >> 2); - ret ^= int_hasher(key.pair.first) + kHashAddConst + (ret << 6) + (ret >> 2); - ret ^= int_hasher(key.pair.second) + kHashAddConst + (ret << 6) + (ret >> 2); - return ret; - } - -private: - static constexpr size_t kHashAddConst = 0x9e3779b9; - std::hash int_hasher; - std::hash connect_phase_hasher; - std::hash ft_hasher; -}; - -// Represents a communication operation between chunks. -struct comms_operation { - ptrdiff_t my_chunk_idx; - ptrdiff_t other_chunk_idx; - int other_proc_id; - int pair_idx; // The numeric pair index used in many communications related arrays. - size_t transfer_size; - in_or_out comm_direction; - int tag; -}; - -struct comms_sequence { - std::vector receive_ops; - std::vector send_ops; - - void clear() { - receive_ops.clear(); - send_ops.clear(); - } -}; - -// RAII based comms_manager that allows asynchronous send and receive functions to be initiated. -// Upon destruction, the comms_manager waits for completion of all enqueued operations. -class comms_manager { - public: - virtual ~comms_manager() {} - virtual void send_real_async(const void *buf, size_t count, int dest, int tag) = 0; - virtual void receive_real_async(void *buf, size_t count, int source, int tag) = 0; - virtual size_t max_transfer_size() const { return std::numeric_limits::max(); }; -}; - -// Factory function for `comms_manager`. -std::unique_ptr create_comms_manager(); - - class structure { public: structure_chunk **chunks; @@ -804,16 +726,18 @@ class structure { int num_effort_volumes; ~structure(); + structure(); structure(const grid_volume &gv, material_function &eps, const boundary_region &br = boundary_region(), const symmetry &s = meep::identity(), int num_chunks = 0, double Courant = 0.5, bool use_anisotropic_averaging = false, double tol = DEFAULT_SUBPIXEL_TOL, int maxeval = DEFAULT_SUBPIXEL_MAXEVAL, - const binary_partition *_bp = NULL); + const binary_partition *bp = NULL); structure(const grid_volume &gv, double eps(const vec &), const boundary_region &br = boundary_region(), const symmetry &s = meep::identity(), int num_chunks = 0, double Courant = 0.5, bool use_anisotropic_averaging = false, double tol = DEFAULT_SUBPIXEL_TOL, int maxeval = DEFAULT_SUBPIXEL_MAXEVAL, - const binary_partition *_bp = NULL); + const binary_partition *bp = NULL); + structure(const structure *); structure(const structure &); void set_materials(material_function &mat, bool use_anisotropic_averaging = true, @@ -876,9 +800,6 @@ class structure { std::complex get_mu(const vec &loc, double frequency = 0) const; double max_eps() const; double estimated_cost(int process = my_rank()); - // Returns the binary partition that was used to partition the volume into chunks. The returned - // pointer is only valid for the lifetime of this `structure` instance. - const binary_partition *get_binary_partition() const; friend class boundary_region; @@ -886,14 +807,12 @@ class structure { void use_pml(direction d, boundary_side b, double dx); void add_to_effort_volumes(const grid_volume &new_effort_volume, double extra_effort); void choose_chunkdivision(const grid_volume &gv, int num_chunks, const boundary_region &br, - const symmetry &s, const binary_partition *_bp); + const symmetry &s, const binary_partition *bp); void check_chunks(); void changing_chunks(); // Helper methods for dumping and loading susceptibilities void set_chiP_from_file(h5file *file, const char *dataset, field_type ft); void write_susceptibility_params(h5file *file, const char *dname, int EorH); - - std::unique_ptr bp; }; // defined in structure.cpp @@ -1398,6 +1317,8 @@ class dft_fields { volume where; }; +enum in_or_out { Incoming = 0, Outgoing }; +enum connect_phase { CONNECT_PHASE = 0, CONNECT_NEGATE = 1, CONNECT_COPY = 2 }; // data for each susceptibility typedef struct polarization_state_s { @@ -1435,8 +1356,8 @@ class fields_chunk { realnum **zeroes[NUM_FIELD_TYPES]; // Holds pointers to metal points. size_t num_zeroes[NUM_FIELD_TYPES]; - realnum **connections[NUM_FIELD_TYPES][NUM_CONNECT_PHASE_TYPES][Outgoing + 1]; - size_t num_connections[NUM_FIELD_TYPES][NUM_CONNECT_PHASE_TYPES][Outgoing + 1]; + realnum **connections[NUM_FIELD_TYPES][CONNECT_COPY + 1][Outgoing + 1]; + size_t num_connections[NUM_FIELD_TYPES][CONNECT_COPY + 1][Outgoing + 1]; std::complex *connection_phases[NUM_FIELD_TYPES]; int npol[NUM_FIELD_TYPES]; // only E_stuff and H_stuff are used @@ -1646,6 +1567,19 @@ class fields { flux_vol *fluxes; symmetry S; + // The following is an array that is num_chunks by num_chunks. Actually + // it is two arrays, one for the imaginary and one for the real part. + realnum **comm_blocks[NUM_FIELD_TYPES]; + // This is the same size as each comm_blocks array, and store the sizes + // of the comm blocks themselves for each connection-phase type + size_t *comm_sizes[NUM_FIELD_TYPES][CONNECT_COPY + 1]; + size_t comm_size_tot(int f, int pair) const { + size_t sum = 0; + for (int ip = 0; ip < 3; ++ip) + sum += comm_sizes[f][ip][pair]; + return sum; + } + double a, dt; // The resolution a and timestep dt=Courant/a grid_volume gv, user_volume; volume v; @@ -1685,6 +1619,10 @@ class fields { volume total_volume(void) const; + // fields_dump.cpp + void dump(const char *filename); + void load(const char *filename); + // h5fields.cpp: // low-level function: void output_hdf5(h5file *file, const char *dataname, int num_fields, const component *components, @@ -2120,6 +2058,8 @@ class fields { bool locate_point_in_user_volume(ivec *, std::complex *phase) const; void locate_volume_source_in_user_volume(const vec p1, const vec p2, vec newp1[8], vec newp2[8], std::complex kphase[8], int &ncopies) const; + // mympi.cpp + void boundary_communications(field_type); // step.cpp void phase_material(); void step_db(field_type ft); @@ -2142,31 +2082,6 @@ class fields { void am_now_working_on(time_sink); void finished_working(); void reset_timers(); - - private: - // The following is an array that is num_chunks by num_chunks. Actually - // it is two arrays, one for the imaginary and one for the real part. - realnum **comm_blocks[NUM_FIELD_TYPES]; - // Map with all non-zero communication block sizes. - std::unordered_map comm_sizes; - // The sequence of send and receive operations for each field type. - comms_sequence comms_sequence_for_field[NUM_FIELD_TYPES]; - - size_t get_comm_size(const comms_key& key) const { - auto it = comm_sizes.find(key); - return (it != comm_sizes.end()) ? it->second : 0; - } - - size_t comm_size_tot(field_type ft, const chunk_pair& pair) const { - size_t sum = 0; - for (auto ip : all_connect_phases) - sum += get_comm_size({ft, ip, pair}); - return sum; - } - - int chunk_pair_to_index(const chunk_pair& pair) const { - return pair.first + num_chunks * pair.second; - } }; class flux_vol { @@ -2258,7 +2173,7 @@ struct split_plane { }; // binary tree class for importing layout of chunk partition -// Moveable and copyable. +// Moveable but not copyable. class binary_partition { public: // Constructs a new leaf node with id `_id`. @@ -2268,7 +2183,6 @@ class binary_partition { // Takes ownership of `left_tree` and `right_tree`. binary_partition(const split_plane &_split_plane, std::unique_ptr &&left_tree, std::unique_ptr &&right_tree); - binary_partition(const binary_partition& other); bool is_leaf() const; // Returns the leaf node ID iff is_leaf() == true. From 5624346e5a99a711fd8d285402fb804f48dd7573 Mon Sep 17 00:00:00 2001 From: Krishna Gadepalli Date: Wed, 14 Jul 2021 20:25:12 -0700 Subject: [PATCH 2/5] Initial support for fields::dump and fields::load to save/restore fields object state --- src/fields_dump.cpp | 183 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100644 src/fields_dump.cpp diff --git a/src/fields_dump.cpp b/src/fields_dump.cpp new file mode 100644 index 000000000..e8f0c50c1 --- /dev/null +++ b/src/fields_dump.cpp @@ -0,0 +1,183 @@ +/* Copyright (C) 2005-2021 Massachusetts Institute of Technology +% +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2, or (at your option) +% any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software Foundation, +% Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +*/ + +// Dump/load raw fields data to/from an HDF5 file. Only +// works if the number of processors/chunks is the same. + +#include +#include +#include +#include + +#include + +#include "meep.hpp" +#include "meep_internals.hpp" + +namespace meep { + +#define DUMP_FIELD(field_name) \ + { \ + /* \ + * make/save a num_chunks x NUM_FIELD_COMPONENTS x 2 array counting \ + * the number of entries in the 'field_name' array for each chunk. \ + */ \ + size_t *num_f_ = new size_t[num_chunks * NUM_FIELD_COMPONENTS * 2]; \ + memset(num_f_, 0, \ + sizeof(size_t) * size_t(num_chunks * NUM_FIELD_COMPONENTS * 2)); \ + size_t my_ntot = 0; \ + for (int i = 0; i < num_chunks; i++) { \ + if (chunks[i]->is_mine()) { \ + size_t ntot = chunks[i]->gv.ntot(); \ + for (int c = 0; c < NUM_FIELD_COMPONENTS; ++c) { \ + for (int d = 0; d < 2; ++d) { \ + if (chunks[i]->field_name[c][d]) { \ + my_ntot += \ + (num_f_[(i * NUM_FIELD_COMPONENTS + c) * 2 + d] = ntot); \ + } \ + } \ + } \ + } \ + } \ + size_t *num_f = new size_t[num_chunks * NUM_FIELD_COMPONENTS * 2]; \ + sum_to_master(num_f_, num_f, num_chunks *NUM_FIELD_COMPONENTS * 2); \ + delete[] num_f_; \ + \ + /* determine total dataset size and offset of this process's data */ \ + size_t my_start = partial_sum_to_all(my_ntot) - my_ntot; \ + size_t ntotal = sum_to_all(my_ntot); \ + \ + size_t dims[3] = {(size_t)num_chunks, NUM_FIELD_COMPONENTS, 2}; \ + size_t start[3] = {0, 0, 0}; \ + file.create_data("num_" #field_name, 3, dims); \ + if (am_master()) file.write_chunk(3, start, dims, num_f); \ + delete[] num_f; \ + \ + /* write the data */ \ + file.create_data(#field_name, 1, &ntotal, false /* append_data */, \ + false /* single_precision */); \ + for (int i = 0; i < num_chunks; i++) { \ + if (chunks[i]->is_mine()) { \ + size_t ntot = chunks[i]->gv.ntot(); \ + for (int c = 0; c < NUM_FIELD_COMPONENTS; ++c) { \ + for (int d = 0; d < 2; ++d) \ + if (chunks[i]->field_name[c][d]) { \ + file.write_chunk(1, &my_start, &ntot, \ + chunks[i]->field_name[c][d]); \ + my_start += ntot; \ + } \ + } \ + } \ + } \ + } + +void fields::dump(const char *filename) { + if (verbosity > 0) { + master_printf("creating fields output file \"%s\"...\n", filename); + } + + h5file file(filename, h5file::WRITE, true); + DUMP_FIELD(f); + DUMP_FIELD(f_u); + DUMP_FIELD(f_w); + DUMP_FIELD(f_cond); +} +#undef DUMP_FIELD + +#undef LOAD_FIELD +#define LOAD_FIELD(field_name) \ + { \ + /* \ + * make/save a num_chunks x NUM_FIELD_COMPONENTS x 2 array counting \ + * the number of entries in the 'field_name' array for each chunk. \ + */ \ + size_t *num_f = new size_t[num_chunks * NUM_FIELD_COMPONENTS * 2]; \ + int rank; \ + size_t dims[3], _dims[3] = {(size_t)num_chunks, NUM_FIELD_COMPONENTS, 2}; \ + size_t start[3] = {0, 0, 0}; \ + file.read_size("num_" #field_name, &rank, dims, 3); \ + if (rank != 3 || _dims[0] != dims[0] || _dims[1] != dims[1] || \ + _dims[2] != dims[2]) { \ + meep::abort("chunk mismatch in fields::load"); \ + } \ + if (am_master()) file.read_chunk(3, start, dims, num_f); \ + \ + file.prevent_deadlock(); \ + broadcast(0, num_f, dims[0] * dims[1] * dims[2]); \ + \ + /* allocate data as needed and check sizes */ \ + size_t my_ntot = 0; \ + for (int i = 0; i < num_chunks; i++) { \ + if (chunks[i]->is_mine()) { \ + size_t ntot = chunks[i]->gv.ntot(); \ + for (int c = 0; c < NUM_FIELD_COMPONENTS; ++c) { \ + for (int d = 0; d < 2; ++d) { \ + size_t n = num_f[(i * NUM_FIELD_COMPONENTS + c) * 2 + d]; \ + if (n == 0) { \ + delete[] chunks[i]->field_name[c][d]; \ + chunks[i]->field_name[c][d] = NULL; \ + } else { \ + if (n != ntot) \ + meep::abort("grid size mismatch %zd vs %zd in fields::load", \ + n, ntot); \ + chunks[i]->field_name[c][d] = new realnum[ntot]; \ + my_ntot += ntot; \ + } \ + } \ + } \ + } \ + } \ + \ + /* determine total dataset size and offset of this process's data */ \ + size_t my_start = partial_sum_to_all(my_ntot) - my_ntot; \ + size_t ntotal = sum_to_all(my_ntot); \ + \ + /* read the data */ \ + file.read_size(#field_name, &rank, dims, 1); \ + if (rank != 1 || dims[0] != ntotal) { \ + meep::abort("inconsistent data size in fields::load"); \ + } \ + for (int i = 0; i < num_chunks; i++) { \ + if (chunks[i]->is_mine()) { \ + size_t ntot = chunks[i]->gv.ntot(); \ + for (int c = 0; c < NUM_FIELD_COMPONENTS; ++c) { \ + for (int d = 0; d < 2; ++d) { \ + if (chunks[i]->field_name[c][d]) { \ + file.read_chunk(1, &my_start, &ntot, \ + chunks[i]->field_name[c][d]); \ + my_start += ntot; \ + } \ + } \ + } \ + } \ + } \ + } + +void fields::load(const char *filename) { + if (verbosity > 0) + master_printf("reading fields from file \"%s\"...\n", filename); + + h5file file(filename, h5file::READONLY, true); + LOAD_FIELD(f); + LOAD_FIELD(f_u); + LOAD_FIELD(f_w); + LOAD_FIELD(f_cond); +} + +#undef LOAD_FIELD + +} // namespace meep From c5d066c8e8746e37df34964cdb08e1d43a3a075e Mon Sep 17 00:00:00 2001 From: Krishna Gadepalli Date: Wed, 14 Jul 2021 20:40:47 -0700 Subject: [PATCH 3/5] Initial support for fields::dump and fields::load to save/restore fields object state --- src/meep.hpp | 140 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 115 insertions(+), 25 deletions(-) diff --git a/src/meep.hpp b/src/meep.hpp index 5da3863eb..6cd61f125 100644 --- a/src/meep.hpp +++ b/src/meep.hpp @@ -17,7 +17,10 @@ #ifndef MEEP_H #define MEEP_H +#include +#include #include +#include #include #include #include @@ -711,6 +714,81 @@ boundary_region pml(double thickness, double Rasymptotic = 1e-15, double mean_st class binary_partition; +enum in_or_out { Incoming = 0, Outgoing }; +constexpr std::initializer_list all_in_or_out{Incoming, Outgoing}; + +enum connect_phase { CONNECT_PHASE = 0, CONNECT_NEGATE = 1, CONNECT_COPY = 2 }; +constexpr std::initializer_list all_connect_phases{ + CONNECT_PHASE, CONNECT_NEGATE, CONNECT_COPY}; +constexpr int NUM_CONNECT_PHASE_TYPES = 3; + +// Communication pair(i, j) implies data being sent from i to j. +using chunk_pair = std::pair; + +// Key for the map that stored communication sizes. +struct comms_key { + field_type ft; + connect_phase phase; + chunk_pair pair; +}; + +// Defined in fields.cpp +bool operator==(const comms_key &lhs, const comms_key &rhs); + +class comms_key_hash_fn { + +public: + inline std::size_t operator()(const comms_key &key) const { + // Unroll hash combination to promote the generatiion of efficient code. + std::size_t ret = ft_hasher(key.ft); + ret ^= connect_phase_hasher(key.phase) + kHashAddConst + (ret << 6) + (ret >> 2); + ret ^= int_hasher(key.pair.first) + kHashAddConst + (ret << 6) + (ret >> 2); + ret ^= int_hasher(key.pair.second) + kHashAddConst + (ret << 6) + (ret >> 2); + return ret; + } + +private: + static constexpr size_t kHashAddConst = 0x9e3779b9; + std::hash int_hasher; + std::hash connect_phase_hasher; + std::hash ft_hasher; +}; + +// Represents a communication operation between chunks. +struct comms_operation { + ptrdiff_t my_chunk_idx; + ptrdiff_t other_chunk_idx; + int other_proc_id; + int pair_idx; // The numeric pair index used in many communications related arrays. + size_t transfer_size; + in_or_out comm_direction; + int tag; +}; + +struct comms_sequence { + std::vector receive_ops; + std::vector send_ops; + + void clear() { + receive_ops.clear(); + send_ops.clear(); + } +}; + +// RAII based comms_manager that allows asynchronous send and receive functions to be initiated. +// Upon destruction, the comms_manager waits for completion of all enqueued operations. +class comms_manager { + public: + virtual ~comms_manager() {} + virtual void send_real_async(const void *buf, size_t count, int dest, int tag) = 0; + virtual void receive_real_async(void *buf, size_t count, int source, int tag) = 0; + virtual size_t max_transfer_size() const { return std::numeric_limits::max(); }; +}; + +// Factory function for `comms_manager`. +std::unique_ptr create_comms_manager(); + + class structure { public: structure_chunk **chunks; @@ -726,18 +804,16 @@ class structure { int num_effort_volumes; ~structure(); - structure(); structure(const grid_volume &gv, material_function &eps, const boundary_region &br = boundary_region(), const symmetry &s = meep::identity(), int num_chunks = 0, double Courant = 0.5, bool use_anisotropic_averaging = false, double tol = DEFAULT_SUBPIXEL_TOL, int maxeval = DEFAULT_SUBPIXEL_MAXEVAL, - const binary_partition *bp = NULL); + const binary_partition *_bp = NULL); structure(const grid_volume &gv, double eps(const vec &), const boundary_region &br = boundary_region(), const symmetry &s = meep::identity(), int num_chunks = 0, double Courant = 0.5, bool use_anisotropic_averaging = false, double tol = DEFAULT_SUBPIXEL_TOL, int maxeval = DEFAULT_SUBPIXEL_MAXEVAL, - const binary_partition *bp = NULL); - structure(const structure *); + const binary_partition *_bp = NULL); structure(const structure &); void set_materials(material_function &mat, bool use_anisotropic_averaging = true, @@ -800,6 +876,9 @@ class structure { std::complex get_mu(const vec &loc, double frequency = 0) const; double max_eps() const; double estimated_cost(int process = my_rank()); + // Returns the binary partition that was used to partition the volume into chunks. The returned + // pointer is only valid for the lifetime of this `structure` instance. + const binary_partition *get_binary_partition() const; friend class boundary_region; @@ -807,12 +886,14 @@ class structure { void use_pml(direction d, boundary_side b, double dx); void add_to_effort_volumes(const grid_volume &new_effort_volume, double extra_effort); void choose_chunkdivision(const grid_volume &gv, int num_chunks, const boundary_region &br, - const symmetry &s, const binary_partition *bp); + const symmetry &s, const binary_partition *_bp); void check_chunks(); void changing_chunks(); // Helper methods for dumping and loading susceptibilities void set_chiP_from_file(h5file *file, const char *dataset, field_type ft); void write_susceptibility_params(h5file *file, const char *dname, int EorH); + + std::unique_ptr bp; }; // defined in structure.cpp @@ -1317,8 +1398,6 @@ class dft_fields { volume where; }; -enum in_or_out { Incoming = 0, Outgoing }; -enum connect_phase { CONNECT_PHASE = 0, CONNECT_NEGATE = 1, CONNECT_COPY = 2 }; // data for each susceptibility typedef struct polarization_state_s { @@ -1356,8 +1435,8 @@ class fields_chunk { realnum **zeroes[NUM_FIELD_TYPES]; // Holds pointers to metal points. size_t num_zeroes[NUM_FIELD_TYPES]; - realnum **connections[NUM_FIELD_TYPES][CONNECT_COPY + 1][Outgoing + 1]; - size_t num_connections[NUM_FIELD_TYPES][CONNECT_COPY + 1][Outgoing + 1]; + realnum **connections[NUM_FIELD_TYPES][NUM_CONNECT_PHASE_TYPES][Outgoing + 1]; + size_t num_connections[NUM_FIELD_TYPES][NUM_CONNECT_PHASE_TYPES][Outgoing + 1]; std::complex *connection_phases[NUM_FIELD_TYPES]; int npol[NUM_FIELD_TYPES]; // only E_stuff and H_stuff are used @@ -1567,19 +1646,6 @@ class fields { flux_vol *fluxes; symmetry S; - // The following is an array that is num_chunks by num_chunks. Actually - // it is two arrays, one for the imaginary and one for the real part. - realnum **comm_blocks[NUM_FIELD_TYPES]; - // This is the same size as each comm_blocks array, and store the sizes - // of the comm blocks themselves for each connection-phase type - size_t *comm_sizes[NUM_FIELD_TYPES][CONNECT_COPY + 1]; - size_t comm_size_tot(int f, int pair) const { - size_t sum = 0; - for (int ip = 0; ip < 3; ++ip) - sum += comm_sizes[f][ip][pair]; - return sum; - } - double a, dt; // The resolution a and timestep dt=Courant/a grid_volume gv, user_volume; volume v; @@ -2058,8 +2124,6 @@ class fields { bool locate_point_in_user_volume(ivec *, std::complex *phase) const; void locate_volume_source_in_user_volume(const vec p1, const vec p2, vec newp1[8], vec newp2[8], std::complex kphase[8], int &ncopies) const; - // mympi.cpp - void boundary_communications(field_type); // step.cpp void phase_material(); void step_db(field_type ft); @@ -2082,6 +2146,31 @@ class fields { void am_now_working_on(time_sink); void finished_working(); void reset_timers(); + + private: + // The following is an array that is num_chunks by num_chunks. Actually + // it is two arrays, one for the imaginary and one for the real part. + realnum **comm_blocks[NUM_FIELD_TYPES]; + // Map with all non-zero communication block sizes. + std::unordered_map comm_sizes; + // The sequence of send and receive operations for each field type. + comms_sequence comms_sequence_for_field[NUM_FIELD_TYPES]; + + size_t get_comm_size(const comms_key& key) const { + auto it = comm_sizes.find(key); + return (it != comm_sizes.end()) ? it->second : 0; + } + + size_t comm_size_tot(field_type ft, const chunk_pair& pair) const { + size_t sum = 0; + for (auto ip : all_connect_phases) + sum += get_comm_size({ft, ip, pair}); + return sum; + } + + int chunk_pair_to_index(const chunk_pair& pair) const { + return pair.first + num_chunks * pair.second; + } }; class flux_vol { @@ -2173,7 +2262,7 @@ struct split_plane { }; // binary tree class for importing layout of chunk partition -// Moveable but not copyable. +// Moveable and copyable. class binary_partition { public: // Constructs a new leaf node with id `_id`. @@ -2183,6 +2272,7 @@ class binary_partition { // Takes ownership of `left_tree` and `right_tree`. binary_partition(const split_plane &_split_plane, std::unique_ptr &&left_tree, std::unique_ptr &&right_tree); + binary_partition(const binary_partition& other); bool is_leaf() const; // Returns the leaf node ID iff is_leaf() == true. From dd3b505a3635485c8682730dc81d17e6ccc9be78 Mon Sep 17 00:00:00 2001 From: Krishna Gadepalli Date: Thu, 15 Jul 2021 11:13:58 -0700 Subject: [PATCH 4/5] address comments and add missing Makefile.am --- python/tests/test_simulation.py | 8 +- src/Makefile.am | 2 +- src/fields_dump.cpp | 282 +++++++++++++++++--------------- src/meep.hpp | 146 ++++------------- 4 files changed, 184 insertions(+), 254 deletions(-) diff --git a/python/tests/test_simulation.py b/python/tests/test_simulation.py index bbb6d303d..cef193ba8 100644 --- a/python/tests/test_simulation.py +++ b/python/tests/test_simulation.py @@ -23,13 +23,11 @@ def setUp(self): @classmethod def setUpClass(cls): - # cls.temp_dir = mp.make_output_directory() - cls.temp_dir = "/tmp/test_simulation" + cls.temp_dir = mp.make_output_directory() @classmethod def tearDownClass(cls): - # mp.delete_directory(cls.temp_dir) - pass + mp.delete_directory(cls.temp_dir) def test_interpolate_numbers(self): @@ -205,7 +203,6 @@ def test_in_point(self): fn = os.path.join(self.temp_dir, 'test_in_point-ez-000200.00.h5') self.assertTrue(os.path.exists(fn)) - @unittest.skip('References unavailable data files') def test_epsilon_input_file(self): sim = self.init_simple_simulation() eps_input_fname = 'cyl-ellipsoid-eps-ref.h5' @@ -225,7 +222,6 @@ def test_epsilon_input_file(self): fp = sim.get_field_point(mp.Ez, mp.Vector3(x=1)) self.assertAlmostEqual(fp, -0.002989654055823199 + 0j) - @unittest.skip('References unavailable data files') def test_numpy_epsilon(self): sim = self.init_simple_simulation() eps_input_fname = 'cyl-ellipsoid-eps-ref.h5' diff --git a/src/Makefile.am b/src/Makefile.am index 469f8aec2..bf2111a57 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -12,7 +12,7 @@ bicgstab.hpp meepgeom.hpp material_data.hpp adjust_verbosity.hpp libmeep_la_SOURCES = array_slice.cpp anisotropic_averaging.cpp \ bands.cpp boundaries.cpp bicgstab.cpp casimir.cpp \ cw_fields.cpp dft.cpp dft_ldos.cpp energy_and_flux.cpp \ -fields.cpp loop_in_chunks.cpp h5fields.cpp h5file.cpp \ +fields.cpp fields_dump.cpp loop_in_chunks.cpp h5fields.cpp h5file.cpp \ initialize.cpp integrate.cpp integrate2.cpp material_data.cpp monitor.cpp mympi.cpp \ multilevel-atom.cpp near2far.cpp output_directory.cpp random.cpp \ sources.cpp step.cpp step_db.cpp stress.cpp structure.cpp structure_dump.cpp \ diff --git a/src/fields_dump.cpp b/src/fields_dump.cpp index e8f0c50c1..4c1654f62 100644 --- a/src/fields_dump.cpp +++ b/src/fields_dump.cpp @@ -30,60 +30,60 @@ namespace meep { -#define DUMP_FIELD(field_name) \ - { \ - /* \ - * make/save a num_chunks x NUM_FIELD_COMPONENTS x 2 array counting \ - * the number of entries in the 'field_name' array for each chunk. \ - */ \ - size_t *num_f_ = new size_t[num_chunks * NUM_FIELD_COMPONENTS * 2]; \ - memset(num_f_, 0, \ - sizeof(size_t) * size_t(num_chunks * NUM_FIELD_COMPONENTS * 2)); \ - size_t my_ntot = 0; \ - for (int i = 0; i < num_chunks; i++) { \ - if (chunks[i]->is_mine()) { \ - size_t ntot = chunks[i]->gv.ntot(); \ - for (int c = 0; c < NUM_FIELD_COMPONENTS; ++c) { \ - for (int d = 0; d < 2; ++d) { \ - if (chunks[i]->field_name[c][d]) { \ - my_ntot += \ - (num_f_[(i * NUM_FIELD_COMPONENTS + c) * 2 + d] = ntot); \ - } \ - } \ - } \ - } \ - } \ - size_t *num_f = new size_t[num_chunks * NUM_FIELD_COMPONENTS * 2]; \ - sum_to_master(num_f_, num_f, num_chunks *NUM_FIELD_COMPONENTS * 2); \ - delete[] num_f_; \ - \ - /* determine total dataset size and offset of this process's data */ \ - size_t my_start = partial_sum_to_all(my_ntot) - my_ntot; \ - size_t ntotal = sum_to_all(my_ntot); \ - \ - size_t dims[3] = {(size_t)num_chunks, NUM_FIELD_COMPONENTS, 2}; \ - size_t start[3] = {0, 0, 0}; \ - file.create_data("num_" #field_name, 3, dims); \ - if (am_master()) file.write_chunk(3, start, dims, num_f); \ - delete[] num_f; \ - \ - /* write the data */ \ - file.create_data(#field_name, 1, &ntotal, false /* append_data */, \ - false /* single_precision */); \ - for (int i = 0; i < num_chunks; i++) { \ - if (chunks[i]->is_mine()) { \ - size_t ntot = chunks[i]->gv.ntot(); \ - for (int c = 0; c < NUM_FIELD_COMPONENTS; ++c) { \ - for (int d = 0; d < 2; ++d) \ - if (chunks[i]->field_name[c][d]) { \ - file.write_chunk(1, &my_start, &ntot, \ - chunks[i]->field_name[c][d]); \ - my_start += ntot; \ - } \ - } \ - } \ - } \ +void fields::dump_fields_chunk_field(h5file *h5f, const std::string &field_name, + FieldPtrGetter field_ptr_getter) { + /* + * make/save a num_chunks x NUM_FIELD_COMPONENTS x 2 array counting + * the number of entries in the 'field_name' array for each chunk. + */ + size_t num_f_size = num_chunks * NUM_FIELD_COMPONENTS * 2; + std::vector num_f_(num_f_size); + + size_t my_ntot = 0; + for (int i = 0; i < num_chunks; i++) { + if (chunks[i]->is_mine()) { + size_t ntot = chunks[i]->gv.ntot(); + for (int c = 0; c < NUM_FIELD_COMPONENTS; ++c) { + for (int d = 0; d < 2; ++d) { + realnum **f = field_ptr_getter(chunks[i], c, d); + if (*f) { + my_ntot += (num_f_[(i * NUM_FIELD_COMPONENTS + c) * 2 + d] = ntot); + } + } + } + } + } + std::vector num_f(num_f_size); + sum_to_master(num_f_.data(), num_f.data(), num_f_size); + + /* determine total dataset size and offset of this process's data */ + size_t my_start = partial_sum_to_all(my_ntot) - my_ntot; + size_t ntotal = sum_to_all(my_ntot); + + size_t dims[3] = {(size_t)num_chunks, NUM_FIELD_COMPONENTS, 2}; + size_t start[3] = {0, 0, 0}; + std::string num_f_name = std::string("num_") + field_name; + h5f->create_data(num_f_name.c_str(), 3, dims); + if (am_master()) h5f->write_chunk(3, start, dims, num_f.data()); + + /* write the data */ + h5f->create_data(field_name.c_str(), 1, &ntotal, false /* append_data */, + false /* single_precision */); + for (int i = 0; i < num_chunks; i++) { + if (chunks[i]->is_mine()) { + size_t ntot = chunks[i]->gv.ntot(); + for (int c = 0; c < NUM_FIELD_COMPONENTS; ++c) { + for (int d = 0; d < 2; ++d) { + realnum **f = field_ptr_getter(chunks[i], c, d); + if (*f) { + h5f->write_chunk(1, &my_start, &ntot, *f); + my_start += ntot; + } + } + } + } } +} void fields::dump(const char *filename) { if (verbosity > 0) { @@ -91,93 +91,111 @@ void fields::dump(const char *filename) { } h5file file(filename, h5file::WRITE, true); - DUMP_FIELD(f); - DUMP_FIELD(f_u); - DUMP_FIELD(f_w); - DUMP_FIELD(f_cond); + + dump_fields_chunk_field(&file, "f", [](fields_chunk *chunk, int c, int d) { + return &(chunk->f[c][d]); + }); + dump_fields_chunk_field(&file, "f_u", [](fields_chunk *chunk, int c, int d) { + return &(chunk->f_u[c][d]); + }); + dump_fields_chunk_field(&file, "f_w", [](fields_chunk *chunk, int c, int d) { + return &(chunk->f_w[c][d]); + }); + dump_fields_chunk_field( + &file, "f_cond", + [](fields_chunk *chunk, int c, int d) { return &(chunk->f_cond[c][d]); }); } -#undef DUMP_FIELD - -#undef LOAD_FIELD -#define LOAD_FIELD(field_name) \ - { \ - /* \ - * make/save a num_chunks x NUM_FIELD_COMPONENTS x 2 array counting \ - * the number of entries in the 'field_name' array for each chunk. \ - */ \ - size_t *num_f = new size_t[num_chunks * NUM_FIELD_COMPONENTS * 2]; \ - int rank; \ - size_t dims[3], _dims[3] = {(size_t)num_chunks, NUM_FIELD_COMPONENTS, 2}; \ - size_t start[3] = {0, 0, 0}; \ - file.read_size("num_" #field_name, &rank, dims, 3); \ - if (rank != 3 || _dims[0] != dims[0] || _dims[1] != dims[1] || \ - _dims[2] != dims[2]) { \ - meep::abort("chunk mismatch in fields::load"); \ - } \ - if (am_master()) file.read_chunk(3, start, dims, num_f); \ - \ - file.prevent_deadlock(); \ - broadcast(0, num_f, dims[0] * dims[1] * dims[2]); \ - \ - /* allocate data as needed and check sizes */ \ - size_t my_ntot = 0; \ - for (int i = 0; i < num_chunks; i++) { \ - if (chunks[i]->is_mine()) { \ - size_t ntot = chunks[i]->gv.ntot(); \ - for (int c = 0; c < NUM_FIELD_COMPONENTS; ++c) { \ - for (int d = 0; d < 2; ++d) { \ - size_t n = num_f[(i * NUM_FIELD_COMPONENTS + c) * 2 + d]; \ - if (n == 0) { \ - delete[] chunks[i]->field_name[c][d]; \ - chunks[i]->field_name[c][d] = NULL; \ - } else { \ - if (n != ntot) \ - meep::abort("grid size mismatch %zd vs %zd in fields::load", \ - n, ntot); \ - chunks[i]->field_name[c][d] = new realnum[ntot]; \ - my_ntot += ntot; \ - } \ - } \ - } \ - } \ - } \ - \ - /* determine total dataset size and offset of this process's data */ \ - size_t my_start = partial_sum_to_all(my_ntot) - my_ntot; \ - size_t ntotal = sum_to_all(my_ntot); \ - \ - /* read the data */ \ - file.read_size(#field_name, &rank, dims, 1); \ - if (rank != 1 || dims[0] != ntotal) { \ - meep::abort("inconsistent data size in fields::load"); \ - } \ - for (int i = 0; i < num_chunks; i++) { \ - if (chunks[i]->is_mine()) { \ - size_t ntot = chunks[i]->gv.ntot(); \ - for (int c = 0; c < NUM_FIELD_COMPONENTS; ++c) { \ - for (int d = 0; d < 2; ++d) { \ - if (chunks[i]->field_name[c][d]) { \ - file.read_chunk(1, &my_start, &ntot, \ - chunks[i]->field_name[c][d]); \ - my_start += ntot; \ - } \ - } \ - } \ - } \ - } \ + +void fields::load_fields_chunk_field(h5file *h5f, const std::string &field_name, + FieldPtrGetter field_ptr_getter) { + /* + * make/save a num_chunks x NUM_FIELD_COMPONENTS x 2 array counting + * the number of entries in the 'field_name' array for each chunk. + */ + size_t num_f_size = num_chunks * NUM_FIELD_COMPONENTS * 2; + std::vector num_f(num_f_size); + + int rank; + size_t dims[3], _dims[3] = {(size_t)num_chunks, NUM_FIELD_COMPONENTS, 2}; + size_t start[3] = {0, 0, 0}; + + std::string num_f_name = std::string("num_") + field_name; + h5f->read_size(num_f_name.c_str(), &rank, dims, 3); + if (rank != 3 || _dims[0] != dims[0] || _dims[1] != dims[1] || + _dims[2] != dims[2]) { + meep::abort("chunk mismatch in fields::load"); } + if (am_master()) h5f->read_chunk(3, start, dims, num_f.data()); + + h5f->prevent_deadlock(); + broadcast(0, num_f.data(), dims[0] * dims[1] * dims[2]); + + /* allocate data as needed and check sizes */ + size_t my_ntot = 0; + for (int i = 0; i < num_chunks; i++) { + if (chunks[i]->is_mine()) { + size_t ntot = chunks[i]->gv.ntot(); + for (int c = 0; c < NUM_FIELD_COMPONENTS; ++c) { + for (int d = 0; d < 2; ++d) { + size_t n = num_f[(i * NUM_FIELD_COMPONENTS + c) * 2 + d]; + realnum **f = field_ptr_getter(chunks[i], c, d); + if (n == 0) { + delete[] * f; + *f = NULL; + } else { + if (n != ntot) + meep::abort("grid size mismatch %zd vs %zd in fields::load", n, + ntot); + *f = new realnum[ntot]; + my_ntot += ntot; + } + } + } + } + } + + /* determine total dataset size and offset of this process's data */ + size_t my_start = partial_sum_to_all(my_ntot) - my_ntot; + size_t ntotal = sum_to_all(my_ntot); + + /* read the data */ + h5f->read_size(field_name.c_str(), &rank, dims, 1); + if (rank != 1 || dims[0] != ntotal) { + meep::abort("inconsistent data size in fields::load"); + } + for (int i = 0; i < num_chunks; i++) { + if (chunks[i]->is_mine()) { + size_t ntot = chunks[i]->gv.ntot(); + for (int c = 0; c < NUM_FIELD_COMPONENTS; ++c) { + for (int d = 0; d < 2; ++d) { + realnum **f = field_ptr_getter(chunks[i], c, d); + if (*f) { + h5f->read_chunk(1, &my_start, &ntot, *f); + my_start += ntot; + } + } + } + } + } +} void fields::load(const char *filename) { if (verbosity > 0) master_printf("reading fields from file \"%s\"...\n", filename); h5file file(filename, h5file::READONLY, true); - LOAD_FIELD(f); - LOAD_FIELD(f_u); - LOAD_FIELD(f_w); - LOAD_FIELD(f_cond); + load_fields_chunk_field(&file, "f", [](fields_chunk *chunk, int c, int d) { + return &(chunk->f[c][d]); + }); + load_fields_chunk_field(&file, "f_u", [](fields_chunk *chunk, int c, int d) { + return &(chunk->f_u[c][d]); + }); + load_fields_chunk_field(&file, "f_w", [](fields_chunk *chunk, int c, int d) { + return &(chunk->f_w[c][d]); + }); + load_fields_chunk_field( + &file, "f_cond", + [](fields_chunk *chunk, int c, int d) { return &(chunk->f_cond[c][d]); }); } -#undef LOAD_FIELD - } // namespace meep diff --git a/src/meep.hpp b/src/meep.hpp index 6cd61f125..34c49394c 100644 --- a/src/meep.hpp +++ b/src/meep.hpp @@ -17,10 +17,7 @@ #ifndef MEEP_H #define MEEP_H -#include -#include #include -#include #include #include #include @@ -714,81 +711,6 @@ boundary_region pml(double thickness, double Rasymptotic = 1e-15, double mean_st class binary_partition; -enum in_or_out { Incoming = 0, Outgoing }; -constexpr std::initializer_list all_in_or_out{Incoming, Outgoing}; - -enum connect_phase { CONNECT_PHASE = 0, CONNECT_NEGATE = 1, CONNECT_COPY = 2 }; -constexpr std::initializer_list all_connect_phases{ - CONNECT_PHASE, CONNECT_NEGATE, CONNECT_COPY}; -constexpr int NUM_CONNECT_PHASE_TYPES = 3; - -// Communication pair(i, j) implies data being sent from i to j. -using chunk_pair = std::pair; - -// Key for the map that stored communication sizes. -struct comms_key { - field_type ft; - connect_phase phase; - chunk_pair pair; -}; - -// Defined in fields.cpp -bool operator==(const comms_key &lhs, const comms_key &rhs); - -class comms_key_hash_fn { - -public: - inline std::size_t operator()(const comms_key &key) const { - // Unroll hash combination to promote the generatiion of efficient code. - std::size_t ret = ft_hasher(key.ft); - ret ^= connect_phase_hasher(key.phase) + kHashAddConst + (ret << 6) + (ret >> 2); - ret ^= int_hasher(key.pair.first) + kHashAddConst + (ret << 6) + (ret >> 2); - ret ^= int_hasher(key.pair.second) + kHashAddConst + (ret << 6) + (ret >> 2); - return ret; - } - -private: - static constexpr size_t kHashAddConst = 0x9e3779b9; - std::hash int_hasher; - std::hash connect_phase_hasher; - std::hash ft_hasher; -}; - -// Represents a communication operation between chunks. -struct comms_operation { - ptrdiff_t my_chunk_idx; - ptrdiff_t other_chunk_idx; - int other_proc_id; - int pair_idx; // The numeric pair index used in many communications related arrays. - size_t transfer_size; - in_or_out comm_direction; - int tag; -}; - -struct comms_sequence { - std::vector receive_ops; - std::vector send_ops; - - void clear() { - receive_ops.clear(); - send_ops.clear(); - } -}; - -// RAII based comms_manager that allows asynchronous send and receive functions to be initiated. -// Upon destruction, the comms_manager waits for completion of all enqueued operations. -class comms_manager { - public: - virtual ~comms_manager() {} - virtual void send_real_async(const void *buf, size_t count, int dest, int tag) = 0; - virtual void receive_real_async(void *buf, size_t count, int source, int tag) = 0; - virtual size_t max_transfer_size() const { return std::numeric_limits::max(); }; -}; - -// Factory function for `comms_manager`. -std::unique_ptr create_comms_manager(); - - class structure { public: structure_chunk **chunks; @@ -804,16 +726,18 @@ class structure { int num_effort_volumes; ~structure(); + structure(); structure(const grid_volume &gv, material_function &eps, const boundary_region &br = boundary_region(), const symmetry &s = meep::identity(), int num_chunks = 0, double Courant = 0.5, bool use_anisotropic_averaging = false, double tol = DEFAULT_SUBPIXEL_TOL, int maxeval = DEFAULT_SUBPIXEL_MAXEVAL, - const binary_partition *_bp = NULL); + const binary_partition *bp = NULL); structure(const grid_volume &gv, double eps(const vec &), const boundary_region &br = boundary_region(), const symmetry &s = meep::identity(), int num_chunks = 0, double Courant = 0.5, bool use_anisotropic_averaging = false, double tol = DEFAULT_SUBPIXEL_TOL, int maxeval = DEFAULT_SUBPIXEL_MAXEVAL, - const binary_partition *_bp = NULL); + const binary_partition *bp = NULL); + structure(const structure *); structure(const structure &); void set_materials(material_function &mat, bool use_anisotropic_averaging = true, @@ -876,9 +800,6 @@ class structure { std::complex get_mu(const vec &loc, double frequency = 0) const; double max_eps() const; double estimated_cost(int process = my_rank()); - // Returns the binary partition that was used to partition the volume into chunks. The returned - // pointer is only valid for the lifetime of this `structure` instance. - const binary_partition *get_binary_partition() const; friend class boundary_region; @@ -886,14 +807,12 @@ class structure { void use_pml(direction d, boundary_side b, double dx); void add_to_effort_volumes(const grid_volume &new_effort_volume, double extra_effort); void choose_chunkdivision(const grid_volume &gv, int num_chunks, const boundary_region &br, - const symmetry &s, const binary_partition *_bp); + const symmetry &s, const binary_partition *bp); void check_chunks(); void changing_chunks(); // Helper methods for dumping and loading susceptibilities void set_chiP_from_file(h5file *file, const char *dataset, field_type ft); void write_susceptibility_params(h5file *file, const char *dname, int EorH); - - std::unique_ptr bp; }; // defined in structure.cpp @@ -1398,6 +1317,8 @@ class dft_fields { volume where; }; +enum in_or_out { Incoming = 0, Outgoing }; +enum connect_phase { CONNECT_PHASE = 0, CONNECT_NEGATE = 1, CONNECT_COPY = 2 }; // data for each susceptibility typedef struct polarization_state_s { @@ -1435,8 +1356,8 @@ class fields_chunk { realnum **zeroes[NUM_FIELD_TYPES]; // Holds pointers to metal points. size_t num_zeroes[NUM_FIELD_TYPES]; - realnum **connections[NUM_FIELD_TYPES][NUM_CONNECT_PHASE_TYPES][Outgoing + 1]; - size_t num_connections[NUM_FIELD_TYPES][NUM_CONNECT_PHASE_TYPES][Outgoing + 1]; + realnum **connections[NUM_FIELD_TYPES][CONNECT_COPY + 1][Outgoing + 1]; + size_t num_connections[NUM_FIELD_TYPES][CONNECT_COPY + 1][Outgoing + 1]; std::complex *connection_phases[NUM_FIELD_TYPES]; int npol[NUM_FIELD_TYPES]; // only E_stuff and H_stuff are used @@ -1646,6 +1567,19 @@ class fields { flux_vol *fluxes; symmetry S; + // The following is an array that is num_chunks by num_chunks. Actually + // it is two arrays, one for the imaginary and one for the real part. + realnum **comm_blocks[NUM_FIELD_TYPES]; + // This is the same size as each comm_blocks array, and store the sizes + // of the comm blocks themselves for each connection-phase type + size_t *comm_sizes[NUM_FIELD_TYPES][CONNECT_COPY + 1]; + size_t comm_size_tot(int f, int pair) const { + size_t sum = 0; + for (int ip = 0; ip < 3; ++ip) + sum += comm_sizes[f][ip][pair]; + return sum; + } + double a, dt; // The resolution a and timestep dt=Courant/a grid_volume gv, user_volume; volume v; @@ -2124,6 +2058,8 @@ class fields { bool locate_point_in_user_volume(ivec *, std::complex *phase) const; void locate_volume_source_in_user_volume(const vec p1, const vec p2, vec newp1[8], vec newp2[8], std::complex kphase[8], int &ncopies) const; + // mympi.cpp + void boundary_communications(field_type); // step.cpp void phase_material(); void step_db(field_type ft); @@ -2135,6 +2071,12 @@ class fields { std::complex A(const vec &), std::complex amp, component c0, direction d, int has_tm, int has_te); + // fields_dump.cpp + // Helper methods for dumping field chunks. + using FieldPtrGetter = std::function; + void dump_fields_chunk_field(h5file *h5f, const std::string& field_name, FieldPtrGetter field_ptr_getter); + void load_fields_chunk_field(h5file *h5f, const std::string& field_name, FieldPtrGetter field_ptr_getter); + public: // monitor.cpp std::complex get_field(component c, const ivec &iloc, bool parallel = true) const; @@ -2146,31 +2088,6 @@ class fields { void am_now_working_on(time_sink); void finished_working(); void reset_timers(); - - private: - // The following is an array that is num_chunks by num_chunks. Actually - // it is two arrays, one for the imaginary and one for the real part. - realnum **comm_blocks[NUM_FIELD_TYPES]; - // Map with all non-zero communication block sizes. - std::unordered_map comm_sizes; - // The sequence of send and receive operations for each field type. - comms_sequence comms_sequence_for_field[NUM_FIELD_TYPES]; - - size_t get_comm_size(const comms_key& key) const { - auto it = comm_sizes.find(key); - return (it != comm_sizes.end()) ? it->second : 0; - } - - size_t comm_size_tot(field_type ft, const chunk_pair& pair) const { - size_t sum = 0; - for (auto ip : all_connect_phases) - sum += get_comm_size({ft, ip, pair}); - return sum; - } - - int chunk_pair_to_index(const chunk_pair& pair) const { - return pair.first + num_chunks * pair.second; - } }; class flux_vol { @@ -2262,7 +2179,7 @@ struct split_plane { }; // binary tree class for importing layout of chunk partition -// Moveable and copyable. +// Moveable but not copyable. class binary_partition { public: // Constructs a new leaf node with id `_id`. @@ -2272,7 +2189,6 @@ class binary_partition { // Takes ownership of `left_tree` and `right_tree`. binary_partition(const split_plane &_split_plane, std::unique_ptr &&left_tree, std::unique_ptr &&right_tree); - binary_partition(const binary_partition& other); bool is_leaf() const; // Returns the leaf node ID iff is_leaf() == true. From 80ebafbbfebce1f0aeb179ac97220b4247f7cf9f Mon Sep 17 00:00:00 2001 From: Krishna Gadepalli Date: Thu, 15 Jul 2021 11:17:20 -0700 Subject: [PATCH 5/5] sync meep.hpp back to head --- src/meep.hpp | 140 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 115 insertions(+), 25 deletions(-) diff --git a/src/meep.hpp b/src/meep.hpp index 34c49394c..a19e4ac6d 100644 --- a/src/meep.hpp +++ b/src/meep.hpp @@ -17,7 +17,10 @@ #ifndef MEEP_H #define MEEP_H +#include +#include #include +#include #include #include #include @@ -711,6 +714,81 @@ boundary_region pml(double thickness, double Rasymptotic = 1e-15, double mean_st class binary_partition; +enum in_or_out { Incoming = 0, Outgoing }; +constexpr std::initializer_list all_in_or_out{Incoming, Outgoing}; + +enum connect_phase { CONNECT_PHASE = 0, CONNECT_NEGATE = 1, CONNECT_COPY = 2 }; +constexpr std::initializer_list all_connect_phases{ + CONNECT_PHASE, CONNECT_NEGATE, CONNECT_COPY}; +constexpr int NUM_CONNECT_PHASE_TYPES = 3; + +// Communication pair(i, j) implies data being sent from i to j. +using chunk_pair = std::pair; + +// Key for the map that stored communication sizes. +struct comms_key { + field_type ft; + connect_phase phase; + chunk_pair pair; +}; + +// Defined in fields.cpp +bool operator==(const comms_key &lhs, const comms_key &rhs); + +class comms_key_hash_fn { + +public: + inline std::size_t operator()(const comms_key &key) const { + // Unroll hash combination to promote the generatiion of efficient code. + std::size_t ret = ft_hasher(key.ft); + ret ^= connect_phase_hasher(key.phase) + kHashAddConst + (ret << 6) + (ret >> 2); + ret ^= int_hasher(key.pair.first) + kHashAddConst + (ret << 6) + (ret >> 2); + ret ^= int_hasher(key.pair.second) + kHashAddConst + (ret << 6) + (ret >> 2); + return ret; + } + +private: + static constexpr size_t kHashAddConst = 0x9e3779b9; + std::hash int_hasher; + std::hash connect_phase_hasher; + std::hash ft_hasher; +}; + +// Represents a communication operation between chunks. +struct comms_operation { + ptrdiff_t my_chunk_idx; + ptrdiff_t other_chunk_idx; + int other_proc_id; + int pair_idx; // The numeric pair index used in many communications related arrays. + size_t transfer_size; + in_or_out comm_direction; + int tag; +}; + +struct comms_sequence { + std::vector receive_ops; + std::vector send_ops; + + void clear() { + receive_ops.clear(); + send_ops.clear(); + } +}; + +// RAII based comms_manager that allows asynchronous send and receive functions to be initiated. +// Upon destruction, the comms_manager waits for completion of all enqueued operations. +class comms_manager { + public: + virtual ~comms_manager() {} + virtual void send_real_async(const void *buf, size_t count, int dest, int tag) = 0; + virtual void receive_real_async(void *buf, size_t count, int source, int tag) = 0; + virtual size_t max_transfer_size() const { return std::numeric_limits::max(); }; +}; + +// Factory function for `comms_manager`. +std::unique_ptr create_comms_manager(); + + class structure { public: structure_chunk **chunks; @@ -726,18 +804,16 @@ class structure { int num_effort_volumes; ~structure(); - structure(); structure(const grid_volume &gv, material_function &eps, const boundary_region &br = boundary_region(), const symmetry &s = meep::identity(), int num_chunks = 0, double Courant = 0.5, bool use_anisotropic_averaging = false, double tol = DEFAULT_SUBPIXEL_TOL, int maxeval = DEFAULT_SUBPIXEL_MAXEVAL, - const binary_partition *bp = NULL); + const binary_partition *_bp = NULL); structure(const grid_volume &gv, double eps(const vec &), const boundary_region &br = boundary_region(), const symmetry &s = meep::identity(), int num_chunks = 0, double Courant = 0.5, bool use_anisotropic_averaging = false, double tol = DEFAULT_SUBPIXEL_TOL, int maxeval = DEFAULT_SUBPIXEL_MAXEVAL, - const binary_partition *bp = NULL); - structure(const structure *); + const binary_partition *_bp = NULL); structure(const structure &); void set_materials(material_function &mat, bool use_anisotropic_averaging = true, @@ -800,6 +876,9 @@ class structure { std::complex get_mu(const vec &loc, double frequency = 0) const; double max_eps() const; double estimated_cost(int process = my_rank()); + // Returns the binary partition that was used to partition the volume into chunks. The returned + // pointer is only valid for the lifetime of this `structure` instance. + const binary_partition *get_binary_partition() const; friend class boundary_region; @@ -807,12 +886,14 @@ class structure { void use_pml(direction d, boundary_side b, double dx); void add_to_effort_volumes(const grid_volume &new_effort_volume, double extra_effort); void choose_chunkdivision(const grid_volume &gv, int num_chunks, const boundary_region &br, - const symmetry &s, const binary_partition *bp); + const symmetry &s, const binary_partition *_bp); void check_chunks(); void changing_chunks(); // Helper methods for dumping and loading susceptibilities void set_chiP_from_file(h5file *file, const char *dataset, field_type ft); void write_susceptibility_params(h5file *file, const char *dname, int EorH); + + std::unique_ptr bp; }; // defined in structure.cpp @@ -1317,8 +1398,6 @@ class dft_fields { volume where; }; -enum in_or_out { Incoming = 0, Outgoing }; -enum connect_phase { CONNECT_PHASE = 0, CONNECT_NEGATE = 1, CONNECT_COPY = 2 }; // data for each susceptibility typedef struct polarization_state_s { @@ -1356,8 +1435,8 @@ class fields_chunk { realnum **zeroes[NUM_FIELD_TYPES]; // Holds pointers to metal points. size_t num_zeroes[NUM_FIELD_TYPES]; - realnum **connections[NUM_FIELD_TYPES][CONNECT_COPY + 1][Outgoing + 1]; - size_t num_connections[NUM_FIELD_TYPES][CONNECT_COPY + 1][Outgoing + 1]; + realnum **connections[NUM_FIELD_TYPES][NUM_CONNECT_PHASE_TYPES][Outgoing + 1]; + size_t num_connections[NUM_FIELD_TYPES][NUM_CONNECT_PHASE_TYPES][Outgoing + 1]; std::complex *connection_phases[NUM_FIELD_TYPES]; int npol[NUM_FIELD_TYPES]; // only E_stuff and H_stuff are used @@ -1567,19 +1646,6 @@ class fields { flux_vol *fluxes; symmetry S; - // The following is an array that is num_chunks by num_chunks. Actually - // it is two arrays, one for the imaginary and one for the real part. - realnum **comm_blocks[NUM_FIELD_TYPES]; - // This is the same size as each comm_blocks array, and store the sizes - // of the comm blocks themselves for each connection-phase type - size_t *comm_sizes[NUM_FIELD_TYPES][CONNECT_COPY + 1]; - size_t comm_size_tot(int f, int pair) const { - size_t sum = 0; - for (int ip = 0; ip < 3; ++ip) - sum += comm_sizes[f][ip][pair]; - return sum; - } - double a, dt; // The resolution a and timestep dt=Courant/a grid_volume gv, user_volume; volume v; @@ -2058,8 +2124,6 @@ class fields { bool locate_point_in_user_volume(ivec *, std::complex *phase) const; void locate_volume_source_in_user_volume(const vec p1, const vec p2, vec newp1[8], vec newp2[8], std::complex kphase[8], int &ncopies) const; - // mympi.cpp - void boundary_communications(field_type); // step.cpp void phase_material(); void step_db(field_type ft); @@ -2088,6 +2152,31 @@ class fields { void am_now_working_on(time_sink); void finished_working(); void reset_timers(); + + private: + // The following is an array that is num_chunks by num_chunks. Actually + // it is two arrays, one for the imaginary and one for the real part. + realnum **comm_blocks[NUM_FIELD_TYPES]; + // Map with all non-zero communication block sizes. + std::unordered_map comm_sizes; + // The sequence of send and receive operations for each field type. + comms_sequence comms_sequence_for_field[NUM_FIELD_TYPES]; + + size_t get_comm_size(const comms_key& key) const { + auto it = comm_sizes.find(key); + return (it != comm_sizes.end()) ? it->second : 0; + } + + size_t comm_size_tot(field_type ft, const chunk_pair& pair) const { + size_t sum = 0; + for (auto ip : all_connect_phases) + sum += get_comm_size({ft, ip, pair}); + return sum; + } + + int chunk_pair_to_index(const chunk_pair& pair) const { + return pair.first + num_chunks * pair.second; + } }; class flux_vol { @@ -2179,7 +2268,7 @@ struct split_plane { }; // binary tree class for importing layout of chunk partition -// Moveable but not copyable. +// Moveable and copyable. class binary_partition { public: // Constructs a new leaf node with id `_id`. @@ -2189,6 +2278,7 @@ class binary_partition { // Takes ownership of `left_tree` and `right_tree`. binary_partition(const split_plane &_split_plane, std::unique_ptr &&left_tree, std::unique_ptr &&right_tree); + binary_partition(const binary_partition& other); bool is_leaf() const; // Returns the leaf node ID iff is_leaf() == true.