From 0d1f803baef8f70c2f43b0f4cdcdf2574759f7c4 Mon Sep 17 00:00:00 2001 From: Alex Dewar Date: Wed, 9 Aug 2023 12:16:53 +0100 Subject: [PATCH] Move some parsing functions into configuration_parsing.cpp --- src/HealthGPS.Console/CMakeLists.txt | 2 + src/HealthGPS.Console/configuration.cpp | 315 +--------------- .../configuration_parsing.cpp | 337 ++++++++++++++++++ src/HealthGPS.Console/configuration_parsing.h | 37 ++ 4 files changed, 387 insertions(+), 304 deletions(-) create mode 100644 src/HealthGPS.Console/configuration_parsing.cpp create mode 100644 src/HealthGPS.Console/configuration_parsing.h diff --git a/src/HealthGPS.Console/CMakeLists.txt b/src/HealthGPS.Console/CMakeLists.txt index e48f055ec..ffe2027e8 100644 --- a/src/HealthGPS.Console/CMakeLists.txt +++ b/src/HealthGPS.Console/CMakeLists.txt @@ -17,6 +17,8 @@ target_sources(HealthGPS.Console "command_options.h" "configuration.cpp" "configuration.h" + "configuration_parsing.cpp" + "configuration_parsing.h" "csvparser.cpp" "csvparser.h" "event_monitor.cpp" diff --git a/src/HealthGPS.Console/configuration.cpp b/src/HealthGPS.Console/configuration.cpp index 802b31e71..b918e57c3 100644 --- a/src/HealthGPS.Console/configuration.cpp +++ b/src/HealthGPS.Console/configuration.cpp @@ -1,4 +1,5 @@ #include "configuration.h" +#include "configuration_parsing.h" #include "jsonparser.h" #include "version.h" @@ -35,304 +36,6 @@ using json = nlohmann::json; ConfigurationError::ConfigurationError(const std::string &msg) : std::runtime_error{msg} {} -auto get_key(const json &j, const std::string &key) { - try { - return j.at(key); - } catch (const std::out_of_range &) { - fmt::print(fg(fmt::color::red), "Missing key \"{}\"\n", key); - throw ConfigurationError{fmt::format("Missing key \"{}\"", key)}; - } -} - -template bool get_to(const json &j, const std::string &key, T &out) { - try { - j.at(key).get_to(out); - return true; - } catch (const std::out_of_range &) { - fmt::print(fg(fmt::color::red), "Missing key \"{}\"\n", key); - return false; - } catch (const json::type_error &) { - fmt::print(fg(fmt::color::red), "Key \"{}\" is of wrong type\n", key); - return false; - } -} - -template bool get_to(const json &j, const std::string &key, T &out, bool &success) { - const bool ret = get_to(j, key, out); - if (!ret) { - success = false; - } - return ret; -} - -void rebase_path(std::filesystem::path &path, const std::filesystem::path &base_dir) { - if (path.is_relative()) { - path = std::filesystem::weakly_canonical(base_dir / path); - } - - if (!std::filesystem::exists(path)) { - throw ConfigurationError{fmt::format("Path does not exist: {}", path.string())}; - } -} - -/// @brief Get a path, based on base_dir, and check if it exists -/// @param j Input JSON -/// @param base_dir Base folder -/// @return An absolute path, assuming that base_dir is the base if relative -/// @throw json::type_error: Invalid JSON types -/// @throw ConfigurationError: Path does not exist -std::filesystem::path get_valid_path(const json &j, const std::filesystem::path &base_dir) { - auto path = j.get(); - rebase_path(path, base_dir); - return path; -} - -bool get_valid_path_to(const json &j, const std::string &key, const std::filesystem::path &base_dir, - std::filesystem::path &out) { - if (!get_to(j, key, out)) { - return false; - } - - try { - rebase_path(out, base_dir); - } catch (const ConfigurationError &) { - fmt::print(fg(fmt::color::red), "Could not find file {}", out.string()); - return false; - } - - return true; -} - -void get_valid_path_to(const json &j, const std::string &key, const std::filesystem::path &base_dir, - std::filesystem::path &out, bool &success) { - if (!get_valid_path_to(j, key, base_dir, out)) { - success = false; - } -} - -/// @brief Load FileInfo from JSON -/// @param j Input JSON -/// @param base_dir Base folder -/// @return FileInfo -/// @throw ConfigurationError: Invalid config file format -auto get_file_info(const json &j, const std::filesystem::path &base_dir) { - const auto dataset = get_key(j, "dataset"); - - bool success = true; - poco::FileInfo info; - get_valid_path_to(dataset, "name", base_dir, info.name, success); - get_to(dataset, "format", info.format, success); - get_to(dataset, "delimiter", info.delimiter, success); - get_to(dataset, "columns", info.columns, success); - if (!success) { - throw ConfigurationError{"Could not load input file info"}; - } - - return info; -} - -/// @brief Load BaselineInfo from JSON -/// @param j Input JSON -/// @param base_dir Base folder -/// @return BaselineInfo -/// @throw json::type_error: Invalid JSON types -/// @throw ConfigurationError: One or more files could not be found -auto get_baseline_info(const json &j, const std::filesystem::path &base_dir) { - const auto &adj = get_key(j, "baseline_adjustments"); - - bool success = true; - poco::BaselineInfo info; - get_to(adj, "format", info.format, success); - get_to(adj, "delimiter", info.delimiter, success); - get_to(adj, "encoding", info.encoding, success); - if (get_to(adj, "file_names", info.file_names, success)) { - // Rebase paths and check for errors - for (auto &[name, path] : info.file_names) { - try { - rebase_path(path, base_dir); - fmt::print("{:<14}, file: {}\n", name, path.string()); - } catch (const ConfigurationError &) { - fmt::print(fg(fmt::color::red), "Could not find file: {}\n", path.string()); - success = false; - } - } - } - - if (!success) { - throw ConfigurationError{"Could not get baseline adjustments"}; - } - - return info; -} - -/// @brief Load ModellingInfo from JSON -/// @param j Input JSON -/// @param base_dir Base folder -/// @throw json::type_error: Invalid JSON types -/// @throw ConfigurationError: Could not load modelling info -void load_modelling_info(const json &j, const std::filesystem::path &base_dir, - Configuration &config) { - bool success = true; - const auto modelling = get_key(j, "modelling"); - - auto &info = config.modelling; - get_to(modelling, "risk_factors", info.risk_factors, success); - - // Rebase paths and check for errors - if (get_to(modelling, "risk_factor_models", info.risk_factor_models, success)) { - for (auto &[type, path] : info.risk_factor_models) { - try { - rebase_path(path, base_dir); - fmt::print("{:<14}, file: {}\n", type, path.string()); - } catch (const ConfigurationError &) { - success = false; - fmt::print(fg(fmt::color::red), "Adjustment type: {}, file: {} not found.\n", type, - path.string()); - } - } - } - - try { - info.baseline_adjustment = get_baseline_info(modelling, base_dir); - } catch (const std::exception &e) { - success = false; - fmt::print(fmt::fg(fmt::color::red), "Could not load baseline adjustment: {}\n", e.what()); - } - - try { - // SES mapping - // TODO: Maybe this needs its own helper function - config.ses = get_key(modelling, "ses_model").get(); - } catch (const std::exception &e) { - success = false; - fmt::print(fmt::fg(fmt::color::red), "Could not load SES mappings"); - } - - if (!success) { - throw ConfigurationError("Could not load modelling info"); - } -} - -void load_interventions(const json &j, Configuration &config) { - const auto interventions = get_key(j, "interventions"); - - try { - // If the type of intervention is null, then there's nothing else to do - if (interventions.at("active_type_id").is_null()) { - return; - } - } catch (const std::out_of_range &) { - throw ConfigurationError{"Interventions section missing key \"active_type_id\""}; - } - - core::Identifier active_type_id; - try { - active_type_id = interventions["active_type_id"].get(); - } catch (const json::type_error &) { - throw ConfigurationError{"active_type_id key must be of type string"}; - } - - /* - * NB: This loads all of the policy scenario info from the JSON file, which is - * strictly speaking unnecessary, but it does mean that we can verify the data - * format is correct. - */ - std::unordered_map policy_types; - if (!get_to(interventions, "types", policy_types)) { - throw ConfigurationError{"Could not load policy types from interventions section"}; - } - - try { - config.intervention = policy_types.at(active_type_id); - config.intervention.identifier = active_type_id.to_string(); - config.has_active_intervention = true; - } catch (const std::out_of_range &) { - throw ConfigurationError{fmt::format("Unknown active intervention type identifier: {}", - active_type_id.to_string())}; - } -} - -void load_running_info(const json &j, Configuration &config) { - const auto running = get_key(j, "running"); - - bool success = true; - get_to(running, "start_time", config.start_time, success); - get_to(running, "stop_time", config.stop_time, success); - get_to(running, "trial_runs", config.trial_runs, success); - get_to(running, "sync_timeout_ms", config.sync_timeout_ms, success); - get_to(running, "diseases", config.diseases, success); - - // I copied this logic from the old code, but it seems strange to me. Why do we - // store multiple seeds but only use the first? -- Alex - std::vector seeds; - if (get_to(running, "seed", seeds, success) && !seeds.empty()) { - config.custom_seed = seeds[0]; - } - - // Intervention Policy - try { - load_interventions(running, config); - } catch (const ConfigurationError &e) { - success = false; - fmt::print(fmt::fg(fmt::color::red), "Could not load interventions: {}", e.what()); - } - - if (!success) { - throw ConfigurationError{"Could not load running info"}; - } -} - -bool check_version(const json &j) { - int version; - if (!get_to(j, "version", version)) { - fmt::print(fg(fmt::color::red), "Invalid definition, file must have a schema version"); - return false; - } - - if (version != 2) { - fmt::print(fg(fmt::color::red), "Configuration schema version: {} mismatch, supported: 2", - version); - return false; - } - - return true; -} - -auto get_settings(const json &j) { - if (!j.contains("settings")) { - fmt::print(fg(fmt::color::red), "\"settings\" key missing"); - throw ConfigurationError{"\"settings\" key missing"}; - } - - return j["settings"].get(); -} - -void load_inputs(const json &j, const std::filesystem::path &config_dir, Configuration &config) { - const auto inputs = get_key(j, "inputs"); - bool success = true; - - // Input dataset file - try { - config.file = get_file_info(inputs, config_dir); - fmt::print("Input dataset file: {}\n", config.file.name.string()); - } catch (const std::exception &e) { - success = false; - fmt::print(fg(fmt::color::red), "Could not load dataset file: {}\n", e.what()); - } - - // Settings - try { - config.settings = get_settings(inputs); - } catch (const std::exception &e) { - success = false; - fmt::print(fg(fmt::color::red), "Could not load settings info"); - } - - if (!success) { - throw ConfigurationError{"Could not load settings info"}; - } -} - Configuration get_configuration(CommandOptions &options) { MEASURE_FUNCTION(); namespace fs = std::filesystem; @@ -363,7 +66,10 @@ Configuration get_configuration(CommandOptions &options) { } }(); - if (!check_version(opt)) { + // Check the file format version + try { + check_version(opt); + } catch (const ConfigurationError &) { success = false; } @@ -372,7 +78,7 @@ Configuration get_configuration(CommandOptions &options) { // input dataset file try { - load_inputs(opt, config_dir, config); + load_input_info(opt, config, config_dir); fmt::print("Input dataset file: {}\n", config.file.name.string()); } catch (const std::exception &e) { success = false; @@ -381,7 +87,7 @@ Configuration get_configuration(CommandOptions &options) { // Modelling information try { - load_modelling_info(opt, config_dir, config); + load_modelling_info(opt, config, config_dir); } catch (const std::exception &e) { success = false; fmt::print(fg(fmt::color::red), "Could not load modelling info: {}\n", e.what()); @@ -395,9 +101,10 @@ Configuration get_configuration(CommandOptions &options) { fmt::print(fg(fmt::color::red), "Could not load running info: {}\n", e.what()); } - if (get_to(opt, "output", config.output, success)) { - config.output.folder = expand_environment_variables(config.output.folder); - } else { + try { + load_output_info(opt, config); + } catch (const ConfigurationError &) { + success = false; fmt::print(fg(fmt::color::red), "Could not load output info"); } diff --git a/src/HealthGPS.Console/configuration_parsing.cpp b/src/HealthGPS.Console/configuration_parsing.cpp new file mode 100644 index 000000000..1a36ff947 --- /dev/null +++ b/src/HealthGPS.Console/configuration_parsing.cpp @@ -0,0 +1,337 @@ +#include "configuration_parsing.h" +#include "jsonparser.h" + +#include +#include +#include + +namespace host { +using json = nlohmann::json; + +/// @brief Load value from JSON, printing an error message if it fails +/// @param j JSON object +/// @param key Key to value +/// @throw ConfigurationError: Key not found +/// @return Key value +auto get(const json &j, const std::string &key) { + try { + return j.at(key); + } catch (const std::out_of_range &) { + fmt::print(fg(fmt::color::red), "Missing key \"{}\"\n", key); + throw ConfigurationError{fmt::format("Missing key \"{}\"", key)}; + } +} + +/// @brief Get value from JSON object and store in out +/// @tparam T Type of output object +/// @param j JSON object +/// @param key Key to value +/// @param out Output object +/// @return True if value was retrieved successfully, false otherwise +template bool get_to(const json &j, const std::string &key, T &out) { + try { + j.at(key).get_to(out); + return true; + } catch (const std::out_of_range &) { + fmt::print(fg(fmt::color::red), "Missing key \"{}\"\n", key); + return false; + } catch (const json::type_error &) { + fmt::print(fg(fmt::color::red), "Key \"{}\" is of wrong type\n", key); + return false; + } +} + +/// @brief Get value from JSON object and store in out, setting success flag +/// @tparam T Type of output object +/// @param j JSON object +/// @param key Key to value +/// @param out Output object +/// @param success Success flag, set to false in case of failure +/// @return True if value was retrieved successfully, false otherwise +template bool get_to(const json &j, const std::string &key, T &out, bool &success) { + const bool ret = get_to(j, key, out); + if (!ret) { + success = false; + } + return ret; +} + +/// @brief Rebase path on base_dir +/// @param path Initial path (relative or absolute) +/// @param base_dir New base directory for relative path +/// @throw ConfigurationError: If path does not exist +void rebase_valid_path(std::filesystem::path &path, const std::filesystem::path &base_dir) { + if (path.is_relative()) { + path = std::filesystem::weakly_canonical(base_dir / path); + } + + if (!std::filesystem::exists(path)) { + throw ConfigurationError{fmt::format("Path does not exist: {}", path.string())}; + } +} + +/// @brief Get a valid path from a JSON object +/// @param j JSON object +/// @param key Key to value +/// @param base_dir Base directory for relative path +/// @param out Output variable +/// @return True if value was retrieved successfully and is valid path, false otherwise +bool get_valid_path_to(const json &j, const std::string &key, const std::filesystem::path &base_dir, + std::filesystem::path &out) { + if (!get_to(j, key, out)) { + return false; + } + + try { + rebase_valid_path(out, base_dir); + } catch (const ConfigurationError &) { + fmt::print(fg(fmt::color::red), "Could not find file {}", out.string()); + return false; + } + + return true; +} + +/// @brief Get a valid path from a JSON object +/// @param j JSON object +/// @param key Key to value +/// @param base_dir Base directory for relative path +/// @param out Output variable +/// @param success Success flag, set to false in case of failure +void get_valid_path_to(const json &j, const std::string &key, const std::filesystem::path &base_dir, + std::filesystem::path &out, bool &success) { + if (!get_valid_path_to(j, key, base_dir, out)) { + success = false; + } +} + +/// @brief Load FileInfo from JSON +/// @param j Input JSON +/// @param base_dir Base folder +/// @return FileInfo +/// @throw ConfigurationError: Invalid config file format +auto get_file_info(const json &j, const std::filesystem::path &base_dir) { + const auto dataset = get(j, "dataset"); + + bool success = true; + poco::FileInfo info; + get_valid_path_to(dataset, "name", base_dir, info.name, success); + get_to(dataset, "format", info.format, success); + get_to(dataset, "delimiter", info.delimiter, success); + get_to(dataset, "columns", info.columns, success); + if (!success) { + throw ConfigurationError{"Could not load input file info"}; + } + + return info; +} + +auto get_settings(const json &j) { + poco::SettingsInfo info; + if (!get_to(j, "settings", info)) { + throw ConfigurationError{"Could not load settings info"}; + } + + return info; +} + +/// @brief Load BaselineInfo from JSON +/// @param j Input JSON +/// @param base_dir Base folder +/// @return BaselineInfo +/// @throw ConfigurationError: One or more files could not be found +auto get_baseline_info(const json &j, const std::filesystem::path &base_dir) { + const auto &adj = get(j, "baseline_adjustments"); + + bool success = true; + poco::BaselineInfo info; + get_to(adj, "format", info.format, success); + get_to(adj, "delimiter", info.delimiter, success); + get_to(adj, "encoding", info.encoding, success); + if (get_to(adj, "file_names", info.file_names, success)) { + // Rebase paths and check for errors + for (auto &[name, path] : info.file_names) { + try { + rebase_valid_path(path, base_dir); + fmt::print("{:<14}, file: {}\n", name, path.string()); + } catch (const ConfigurationError &) { + fmt::print(fg(fmt::color::red), "Could not find file: {}\n", path.string()); + success = false; + } + } + } + + if (!success) { + throw ConfigurationError{"Could not get baseline adjustments"}; + } + + return info; +} + +/// @brief Load interventions from running section +/// @param running Running section of JSON object +/// @param config Config object to update +/// @throw ConfigurationError: Could not load interventions +void load_interventions(const json &running, Configuration &config) { + const auto interventions = get(running, "interventions"); + + try { + // If the type of intervention is null, then there's nothing else to do + if (interventions.at("active_type_id").is_null()) { + return; + } + } catch (const std::out_of_range &) { + throw ConfigurationError{"Interventions section missing key \"active_type_id\""}; + } + + const auto active_type_id = [&interventions]() { + try { + return interventions["active_type_id"].get(); + } catch (const json::type_error &) { + throw ConfigurationError{"active_type_id key must be of type string"}; + } + }(); + + /* + * NB: This loads all of the policy scenario info from the JSON file, which is + * strictly speaking unnecessary, but it does mean that we can verify the data + * format is correct. + */ + std::unordered_map policy_types; + if (!get_to(interventions, "types", policy_types)) { + throw ConfigurationError{"Could not load policy types from interventions section"}; + } + + try { + config.intervention = policy_types.at(active_type_id); + config.intervention.identifier = active_type_id.to_string(); + config.has_active_intervention = true; + } catch (const std::out_of_range &) { + throw ConfigurationError{fmt::format("Unknown active intervention type identifier: {}", + active_type_id.to_string())}; + } +} + +void check_version(const json &j) { + int version; + if (!get_to(j, "version", version)) { + throw ConfigurationError{"File must have a schema version"}; + } + + if (version != 2) { + throw ConfigurationError{ + fmt::format("Configuration schema version: {} mismatch, supported: 2", version)}; + } +} + +void load_input_info(const json &j, Configuration &config, + const std::filesystem::path &config_dir) { + const auto inputs = get(j, "inputs"); + bool success = true; + + // Input dataset file + try { + config.file = get_file_info(inputs, config_dir); + fmt::print("Input dataset file: {}\n", config.file.name.string()); + } catch (const std::exception &e) { + success = false; + fmt::print(fg(fmt::color::red), "Could not load dataset file: {}\n", e.what()); + } + + // Settings + try { + config.settings = get_settings(inputs); + } catch (const std::exception &e) { + success = false; + fmt::print(fg(fmt::color::red), "Could not load settings info"); + } + + if (!success) { + throw ConfigurationError{"Could not load settings info"}; + } +} + +void load_modelling_info(const json &j, Configuration &config, + const std::filesystem::path &config_dir) { + bool success = true; + const auto modelling = get(j, "modelling"); + + auto &info = config.modelling; + get_to(modelling, "risk_factors", info.risk_factors, success); + + // Rebase paths and check for errors + if (get_to(modelling, "risk_factor_models", info.risk_factor_models, success)) { + for (auto &[type, path] : info.risk_factor_models) { + try { + rebase_valid_path(path, config_dir); + fmt::print("{:<14}, file: {}\n", type, path.string()); + } catch (const ConfigurationError &) { + success = false; + fmt::print(fg(fmt::color::red), "Adjustment type: {}, file: {} not found.\n", type, + path.string()); + } + } + } + + try { + info.baseline_adjustment = get_baseline_info(modelling, config_dir); + } catch (const std::exception &e) { + success = false; + fmt::print(fmt::fg(fmt::color::red), "Could not load baseline adjustment: {}\n", e.what()); + } + + try { + // SES mapping + // TODO: Maybe this needs its own helper function + config.ses = get(modelling, "ses_model").get(); + } catch (const std::exception &e) { + success = false; + fmt::print(fmt::fg(fmt::color::red), "Could not load SES mappings"); + } + + if (!success) { + throw ConfigurationError("Could not load modelling info"); + } +} + +void load_running_info(const json &j, Configuration &config) { + const auto running = get(j, "running"); + + bool success = true; + get_to(running, "start_time", config.start_time, success); + get_to(running, "stop_time", config.stop_time, success); + get_to(running, "trial_runs", config.trial_runs, success); + get_to(running, "sync_timeout_ms", config.sync_timeout_ms, success); + get_to(running, "diseases", config.diseases, success); + + { + // I copied this logic from the old code, but it seems strange to me. Why do we + // store multiple seeds but only use the first? -- Alex + std::vector seeds; + if (get_to(running, "seed", seeds, success) && !seeds.empty()) { + config.custom_seed = seeds[0]; + } + } + + // Intervention Policy + try { + load_interventions(running, config); + } catch (const ConfigurationError &e) { + success = false; + fmt::print(fmt::fg(fmt::color::red), "Could not load interventions: {}", e.what()); + } + + if (!success) { + throw ConfigurationError{"Could not load running info"}; + } +} + +void load_output_info(const json &j, Configuration &config) { + if (!get_to(j, "output", config.output)) { + throw ConfigurationError{"Could not load output info"}; + } + + config.output.folder = expand_environment_variables(config.output.folder); +} + +} // namespace host diff --git a/src/HealthGPS.Console/configuration_parsing.h b/src/HealthGPS.Console/configuration_parsing.h new file mode 100644 index 000000000..ff6a5984c --- /dev/null +++ b/src/HealthGPS.Console/configuration_parsing.h @@ -0,0 +1,37 @@ +#pragma once +#include "configuration.h" + +namespace host { +/// @brief Check the schema version and throw if invalid +/// @param j The root JSON object +/// @throw ConfigurationError: If version attribute is not present or invalid +void check_version(const nlohmann::json &j); + +/// @brief Load input dataset +/// @param j The root JSON object +/// @param config The config object to update +/// @param config_dir The directory of the config file +/// @throw ConfigurationError: Could not load input dataset +void load_input_info(const nlohmann::json &j, Configuration &config, + const std::filesystem::path &config_dir); + +/// @brief Load ModellingInfo from JSON +/// @param j The root JSON object +/// @param config The config object to update +/// @param config_dir The directory of the config file +/// @throw ConfigurationError: Could not load modelling info +void load_modelling_info(const nlohmann::json &j, Configuration &config, + const std::filesystem::path &config_dir); + +/// @brief Load running section of JSON object +/// @param j The root JSON object +/// @param config The config object to update +/// @throw ConfigurationError: Could not load running section +void load_running_info(const nlohmann::json &j, Configuration &config); + +/// @brief Load output section of JSON object +/// @param j The root JSON object +/// @param config The config object to update +/// @throw ConfigurationError: Could not load output info +void load_output_info(const nlohmann::json &j, Configuration &config); +} // namespace host \ No newline at end of file