Skip to content

Commit

Permalink
Filename extensions: Allow specifying wildcards (#1584)
Browse files Browse the repository at this point in the history
* Rough, but fundamentally functioning draft

* Properly initialize

* Only read extensions from disk if the access type is read

* Avoid parameter shadowing

* Temporary attempt to defer initialization

todo:

1. temporarily initialize with dummy io handler
2. move the late initialization logic to IOHandler()

* Without Cleanup: todos from previous commit

* Cleanup for previous, to be squashed

* More fine-grained deferral of initialization

* Fixes that would theoretically allow deferring also CREATE mode

* Initialize early if possible, defer only when needed

* Access type related fixes

* CI fixes for MSVC

* Select ADIOS2 file ending more specifically

* Little fix

* Slight fixes

* Testing

* Add to examples

* Better error messages

* Little fix

* Documentation

* Cleanup

* Fix test

* ambiguous

Co-authored-by: Franz Pöschel <[email protected]>

* Include Order

* Missing Includes

Co-authored-by: Franz Pöschel <[email protected]>

* First read/write

---------

Co-authored-by: Axel Huebl <[email protected]>
  • Loading branch information
franzpoeschel and ax3l authored Feb 28, 2024
1 parent d64dbc2 commit c2e0e83
Show file tree
Hide file tree
Showing 17 changed files with 659 additions and 110 deletions.
15 changes: 13 additions & 2 deletions docs/source/usage/firstread.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ C++17
.. code-block:: cpp
auto series = io::Series(
"data%T.h5",
"data_%T.h5",
io::Access::READ_ONLY);
Expand All @@ -96,9 +96,20 @@ Python
.. code-block:: python3
series = io.Series(
"data%T.h5",
"data_%T.h5",
io.Access.read_only)
.. tip::

Replace the file ending ``.h5`` with a wildcard ``.%E`` to let openPMD autodetect the ending from the file system.
Use the wildcard ``%T`` to match filename encoded iterations.

.. tip::

More detailed options can be passed via JSON or TOML as a further constructor parameter.
Try ``{"defer_iteration_parsing": true}`` to speed up the first access.
(Remember to explicitly ``it.open()`` iterations in that case.)

Iteration
---------

Expand Down
2 changes: 1 addition & 1 deletion docs/source/usage/firstwrite.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ Python
Iteration
---------

Grouping by an arbitrary, positive integer number ``<N>`` in a series:
Grouping by an arbitrary, nonnegative integer number ``<N>`` in a series:

C++17
^^^^^
Expand Down
2 changes: 2 additions & 0 deletions examples/2a_read_thetaMode_serial.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ using namespace openPMD;

int main()
{
/* The pattern %E instructs the openPMD-api to determine the file ending
* automatically. It can also be given explicitly, e.g. `data%T.h5`. */
Series series =
Series("../samples/git-sample/thetaMode/data%T.h5", Access::READ_ONLY);

Expand Down
2 changes: 2 additions & 0 deletions examples/2a_read_thetaMode_serial.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import openpmd_api as io

if __name__ == "__main__":
# The pattern %E instructs the openPMD-api to determine the file ending
# automatically. It can also be given explicitly, e.g. `data%T.h5`.
series = io.Series("../samples/git-sample/thetaMode/data%T.h5",
io.Access.read_only)

Expand Down
12 changes: 9 additions & 3 deletions include/openPMD/IO/AbstractIOHandler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,12 @@ class AbstractIOHandler
{}
virtual ~AbstractIOHandler() = default;

AbstractIOHandler(AbstractIOHandler const &) = default;
AbstractIOHandler(AbstractIOHandler &&) = default;

AbstractIOHandler &operator=(AbstractIOHandler const &) = default;
AbstractIOHandler &operator=(AbstractIOHandler &&) = default;

/** Add provided task to queue according to FIFO.
*
* @param iotask Task to be executed after all previously enqueued
Expand Down Expand Up @@ -245,7 +251,7 @@ class AbstractIOHandler
/** The currently used backend */
virtual std::string backendName() const = 0;

std::string const directory;
std::string directory;
/*
* Originally, the reason for distinguishing these two was that during
* parsing in reading access modes, the access type would be temporarily
Expand All @@ -261,8 +267,8 @@ class AbstractIOHandler
* which is entirely implemented by the frontend, which internally uses
* the backend in CREATE mode.
*/
Access const m_backendAccess;
Access const m_frontendAccess;
Access m_backendAccess;
Access m_frontendAccess;
internal::SeriesStatus m_seriesStatus = internal::SeriesStatus::Default;
std::queue<IOTask> m_work;
/**
Expand Down
1 change: 1 addition & 0 deletions include/openPMD/IO/DummyIOHandler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,5 +45,6 @@ class DummyIOHandler : public AbstractIOHandler
* without IO.
*/
std::future<void> flush(internal::ParsedFlushParams &) override;
std::string backendName() const override;
}; // DummyIOHandler
} // namespace openPMD
1 change: 1 addition & 0 deletions include/openPMD/IO/Format.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ enum class Format
ADIOS2_SSC,
JSON,
TOML,
GENERIC,
DUMMY
};

Expand Down
17 changes: 4 additions & 13 deletions include/openPMD/IO/IOTask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -689,19 +689,10 @@ class OPENPMDAPI_EXPORT IOTask
, parameter{std::move(p).to_heap()}
{}

explicit IOTask(IOTask const &other)
: writable{other.writable}
, operation{other.operation}
, parameter{other.parameter}
{}

IOTask &operator=(IOTask const &other)
{
writable = other.writable;
operation = other.operation;
parameter = other.parameter;
return *this;
}
IOTask(IOTask const &other);
IOTask(IOTask &&other) noexcept;
IOTask &operator=(IOTask const &other);
IOTask &operator=(IOTask &&other) noexcept;

Writable *writable;
Operation operation;
Expand Down
92 changes: 84 additions & 8 deletions include/openPMD/Series.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
*/
#pragma once

#include "openPMD/Error.hpp"
#include "openPMD/IO/AbstractIOHandler.hpp"
#include "openPMD/IO/Access.hpp"
#include "openPMD/IO/Format.hpp"
Expand All @@ -40,10 +41,14 @@

#include <cstdint> // uint64_t
#include <deque>
#include <functional>
#include <map>
#include <memory>
#include <optional>
#include <set>
#include <stdexcept>
#include <string>
#include <tuple>

// expose private and protected members for invasive testing
#ifndef OPENPMD_private
Expand Down Expand Up @@ -192,6 +197,9 @@ namespace internal
*/
std::optional<ParsePreference> m_parsePreference;

std::optional<std::function<AbstractIOHandler *(Series &)>>
m_deferred_initialization = std::nullopt;

void close();
}; // SeriesData

Expand Down Expand Up @@ -221,6 +229,18 @@ class Series : public Attributable
explicit Series();

#if openPMD_HAVE_MPI
/**
* @brief Construct a new Series
*
* For further details, refer to the documentation of the non-MPI overload.
*
* @param filepath The file path.
* @param at Access mode.
* @param comm The MPI communicator.
* @param options Advanced backend configuration via JSON.
* May be specified as a JSON-formatted string directly, or as a path
* to a JSON textfile, prepended by an at sign '@'.
*/
Series(
std::string const &filepath,
Access at,
Expand All @@ -229,13 +249,50 @@ class Series : public Attributable
#endif

/**
* @brief Construct a new Series
*
* @param filepath The backend will be determined by the filepath extension.
* @brief Construct a new Series.
*
* For details on access modes, JSON/TOML configuration and iteration
* encoding, refer to:
*
* * https://openpmd-api.readthedocs.io/en/latest/usage/workflow.html#access-modes
* * https://openpmd-api.readthedocs.io/en/latest/details/backendconfig.html
* * https://openpmd-api.readthedocs.io/en/latest/usage/concepts.html#iteration-and-series
*
* In case of file-based iteration encoding, the file names for each
* iteration are determined by an expansion pattern that must be specified.
* It takes one out of two possible forms:
*
* 1. Simple form: %T is replaced with the iteration index, e.g.
* `simData_%T.bp` becomes `simData_50.bp`.
* 2. Padded form: e.g. %06T is replaced with the iteration index padded to
* at least six digits. `simData_%06T.bp` becomes `simData_000050.bp`.
*
* The backend is determined:
*
* 1. Explicitly via the JSON/TOML parameter `backend`, e.g. `{"backend":
* "adios2"}`.
* 2. Otherwise implicitly from the filename extension, e.g.
* `simData_%T.h5`.
*
* The filename extension can be replaced with a globbing pattern %E.
* It will be replaced with an automatically determined file name extension:
*
* 1. In CREATE mode: The extension is set to a backend-specific default
* extension. This requires that the backend is specified via JSON/TOML.
* 2. In READ_ONLY, READ_WRITE and READ_LINEAR modes: These modes require
* that files already exist on disk. The disk will be scanned for files
* that match the pattern and the resulting file extension will be used.
* If the result is ambiguous or no such file is found, an error is
* raised.
* 3. In APPEND mode: Like (2.), except if no matching file is found. In
* that case, the procedure of (1.) is used, owing to the fact that
* APPEND mode can be used to create new datasets.
*
* @param filepath The file path.
* @param at Access mode.
* @param options Advanced backend configuration via JSON.
* May be specified as a JSON-formatted string directly, or as a path
* to a JSON textfile, prepended by an at sign '@'.
* May be specified as a JSON/TOML-formatted string directly, or as a
* path to a JSON/TOML textfile, prepended by an at sign '@'.
*/
Series(
std::string const &filepath,
Expand Down Expand Up @@ -502,6 +559,7 @@ class Series : public Attributable
* @return String of a pattern for data backend.
*/
std::string backend() const;
std::string backend();

/** Execute all required remaining IO operations to write or read data.
*
Expand Down Expand Up @@ -636,7 +694,20 @@ OPENPMD_private
void parseJsonOptions(TracingJSON &options, ParsedInput &);
bool hasExpansionPattern(std::string filenameWithExtension);
bool reparseExpansionPattern(std::string filenameWithExtension);
void init(std::unique_ptr<AbstractIOHandler>, std::unique_ptr<ParsedInput>);
template <typename... MPI_Communicator>
void init(
std::string const &filepath,
Access at,
std::string const &options,
MPI_Communicator &&...);
template <typename TracingJSON>
std::tuple<std::unique_ptr<ParsedInput>, TracingJSON> initIOHandler(
std::string const &filepath,
std::string const &options,
Access at,
bool resolve_generic_extension);
void initSeries(
std::unique_ptr<AbstractIOHandler>, std::unique_ptr<ParsedInput>);
void initDefaults(IterationEncoding, bool initAll = false);
/**
* @brief Internal call for flushing a Series.
Expand Down Expand Up @@ -688,7 +759,7 @@ OPENPMD_private
* ReadIterations since those methods should be aware when the current step
* is broken).
*/
std::optional<std::deque<IterationIndex_t> > readGorVBased(
std::optional<std::deque<IterationIndex_t>> readGorVBased(
bool do_always_throw_errors,
bool init,
std::set<IterationIndex_t> const &ignoreIterations = {});
Expand Down Expand Up @@ -758,7 +829,12 @@ OPENPMD_private
* Returns the current content of the /data/snapshot attribute.
* (We could also add this to the public API some time)
*/
std::optional<std::vector<IterationIndex_t> > currentSnapshot() const;
std::optional<std::vector<IterationIndex_t>> currentSnapshot() const;

AbstractIOHandler *runDeferredInitialization();

AbstractIOHandler *IOHandler();
AbstractIOHandler const *IOHandler() const;
}; // Series
} // namespace openPMD

Expand Down
4 changes: 4 additions & 0 deletions src/Format.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ Format determineFormat(std::string const &filename)
return Format::JSON;
if (auxiliary::ends_with(filename, ".toml"))
return Format::TOML;
if (auxiliary::ends_with(filename, ".%E"))
return Format::GENERIC;

// Format might still be specified via JSON
return Format::DUMMY;
Expand All @@ -70,6 +72,8 @@ std::string suffix(Format f)
return ".json";
case Format::TOML:
return ".toml";
case Format::GENERIC:
return ".%E";
default:
return "";
}
Expand Down
35 changes: 25 additions & 10 deletions src/IO/ADIOS/ADIOS2IOHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,17 +354,32 @@ std::string ADIOS2IOHandlerImpl::fileSuffix(bool verbose) const
{
// SST engine adds its suffix unconditionally
// so we don't add it
#if defined(ADIOS2_HAVE_BP5) && openPMD_HAS_ADIOS_2_9
constexpr char const *const default_file_ending = ".bp5";
#else
constexpr char const *const default_file_ending = ".bp4";
#endif

static std::map<std::string, AcceptedEndingsForEngine> const endings{
{"sst", {{"", ""}, {".sst", ""}}},
{"staging", {{"", ""}, {".sst", ""}}},
{"filestream", {{".bp", ".bp"}, {".bp4", ".bp4"}, {".bp5", ".bp5"}}},
{"bp4", {{".bp4", ".bp4"}, {".bp", ".bp"}}},
{"bp5", {{".bp5", ".bp5"}, {".bp", ".bp"}}},
{"bp3", {{".bp", ".bp"}}},
{"file", {{".bp", ".bp"}, {".bp4", ".bp4"}, {".bp5", ".bp5"}}},
{"hdf5", {{".h5", ".h5"}}},
{"nullcore", {{".nullcore", ".nullcore"}, {".bp", ".bp"}}},
{"ssc", {{".ssc", ".ssc"}}}};
{"sst", {{"", ""}, {".sst", ""}, {".%E", ""}}},
{"staging", {{"", ""}, {".sst", ""}, {".%E", ""}}},
{"filestream",
{{".bp", ".bp"},
{".bp4", ".bp4"},
{".bp5", ".bp5"},
{".%E", default_file_ending}}},
{"bp4", {{".bp4", ".bp4"}, {".bp", ".bp"}, {".%E", ".bp4"}}},
{"bp5", {{".bp5", ".bp5"}, {".bp", ".bp"}, {".%E", ".bp5"}}},
{"bp3", {{".bp", ".bp"}, {".%E", ".bp"}}},
{"file",
{{".bp", ".bp"},
{".bp4", ".bp4"},
{".bp5", ".bp5"},
{".%E", default_file_ending}}},
{"hdf5", {{".h5", ".h5"}, {".%E", ".h5"}}},
{"nullcore",
{{".nullcore", ".nullcore"}, {".bp", ".bp"}, {".%E", ".nullcore"}}},
{"ssc", {{".ssc", ".ssc"}, {".%E", ".ssc"}}}};

if (auto engine = endings.find(m_engineType); engine != endings.end())
{
Expand Down
5 changes: 5 additions & 0 deletions src/IO/DummyIOHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,9 @@ std::future<void> DummyIOHandler::flush(internal::ParsedFlushParams &)
{
return std::future<void>();
}

std::string DummyIOHandler::backendName() const
{
return "Dummy";
}
} // namespace openPMD
6 changes: 6 additions & 0 deletions src/IO/IOTask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,4 +154,10 @@ namespace internal
}
}
} // namespace internal

IOTask::IOTask(IOTask const &) = default;
IOTask::IOTask(IOTask &&) noexcept = default;

IOTask &IOTask::operator=(IOTask const &) = default;
IOTask &IOTask::operator=(IOTask &&) noexcept = default;
} // namespace openPMD
Loading

0 comments on commit c2e0e83

Please sign in to comment.