Skip to content

Commit

Permalink
Allow for using zipped data folders
Browse files Browse the repository at this point in the history
Closes #363.
  • Loading branch information
alexdewar committed May 30, 2024
1 parent 31e46b1 commit a3ae4f5
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 13 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore

# ------------ Custom User Options ----------------
*.zip

# CMake presets, build directories and binary files
build/
Expand Down
15 changes: 12 additions & 3 deletions src/HealthGPS.Datastore/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
find_package(fmt CONFIG REQUIRED)
find_package(jsoncons CONFIG REQUIRED)
find_package(libzippp CONFIG REQUIRED)

add_library(HealthGPS.Datastore STATIC "")
target_compile_features(HealthGPS.Datastore PUBLIC cxx_std_${CMAKE_CXX_STANDARD})

target_sources(HealthGPS.Datastore PRIVATE "api.h" "datamanager.cpp" "datamanager.h" "schema.cpp"
"schema.h")
target_sources(
HealthGPS.Datastore
PRIVATE "api.h"
"datamanager.cpp"
"datamanager.h"
"schema.cpp"
"schema.h"
"zip_file.cpp"
"zip_file.h")

target_link_libraries(HealthGPS.Datastore PRIVATE HealthGPS.Core fmt::fmt jsoncons)
target_link_libraries(HealthGPS.Datastore PRIVATE HealthGPS.Core fmt::fmt jsoncons
libzippp::libzippp)

set(ROOT_NAMESPACE hgps::data)
39 changes: 29 additions & 10 deletions src/HealthGPS.Datastore/datamanager.cpp
Original file line number Diff line number Diff line change
@@ -1,41 +1,60 @@
#include "datamanager.h"
#include "schema.h"
#include "zip_file.h"

#include "HealthGPS.Core/math_util.h"
#include "HealthGPS.Core/string_util.h"
#include "HealthGPS/program_path.h"
#include "schema.h"

#include <fmt/color.h>
#include <rapidcsv.h>

#include <fstream>
#include <utility>

namespace hgps::data {
DataManager::DataManager(std::filesystem::path root_directory, VerboseMode verbosity)
: root_{std::move(root_directory)}, verbosity_{verbosity} {
auto full_filename = root_ / "index.json";
namespace {
nlohmann::json read_input_files_from_directory(const std::filesystem::path &root_directory) {
auto full_filename = root_directory / "index.json";
auto ifs = std::ifstream{full_filename};
if (!ifs) {
throw std::invalid_argument(
fmt::format("File-based store, index file: '{}' not found.", full_filename.string()));
}

index_ = nlohmann::json::parse(ifs);
auto index = nlohmann::json::parse(ifs);

if (!index_.contains("version")) {
if (!index.contains("version")) {
throw std::runtime_error("File-based store, invalid definition missing schema version");
}

auto version = index_["version"].get<int>();
auto version = index["version"].get<int>();
if (version != 2) {
throw std::runtime_error(fmt::format(
"File-based store, index schema version: {} mismatch, supported: 2", version));
}

// Validate against schema
ifs.seekg(0);
const auto schema_directory = get_program_directory() / "schemas" / "v1";
validate_index(schema_directory, ifs);
const auto schema_directory = hgps::get_program_directory() / "schemas" / "v1";
hgps::data::validate_index(schema_directory, ifs);

return index;
}
} // anonymous namespace

namespace hgps::data {
DataManager::DataManager(std::filesystem::path path, VerboseMode verbosity)
: verbosity_{verbosity} {
if (std::filesystem::is_regular_file(path)) {
// If it's a file, assume it's a zip file
root_ = create_temporary_directory();
extract_zip_file(path, root_);
} else {
// Otherwise it's a folder
root_ = std::move(path);
}

index_ = read_input_files_from_directory(root_);
}

std::vector<Country> DataManager::get_countries() const {
Expand Down
55 changes: 55 additions & 0 deletions src/HealthGPS.Datastore/zip_file.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#include "zip_file.h"

#include <fmt/format.h>
#include <libzippp.h>

#include <fstream>
#include <random>

namespace hgps::data {
std::filesystem::path create_temporary_directory() {
auto tmp_dir = std::filesystem::temp_directory_path();
std::random_device dev;
std::mt19937 prng(dev());
std::uniform_int_distribution<unsigned> rand;
std::filesystem::path path;

while (true) {
std::stringstream ss;
ss << std::hex << rand(prng);
path = tmp_dir / ss.str();
// true if the directory was created.
if (std::filesystem::create_directory(path)) {
return path;
}
}
}

void extract_zip_file(const std::filesystem::path &file_path,
const std::filesystem::path &output_directory) {

using namespace libzippp;

ZipArchive zf(file_path);
zf.open();

std::filesystem::path out_path;
for (const auto &entry : zf.getEntries()) {
out_path = output_directory / entry.getName();
if (entry.isDirectory()) {
if (!std::filesystem::create_directories(out_path)) {
throw std::runtime_error{
fmt::format("Failed to create directory: {}", out_path.string())};
}
} else {
std::ofstream ofs{out_path};
if (!ofs) {
throw std::runtime_error{
fmt::format("Failed to create file: {}", out_path.string())};
}

ofs << entry.readAsText();
}
}
}
} // namespace hgps::data
14 changes: 14 additions & 0 deletions src/HealthGPS.Datastore/zip_file.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#pragma once

#include <filesystem>

namespace hgps::data {
//! Create a temporary directory with a unique path
std::filesystem::path create_temporary_directory();

/// @brief Extract a zip file to the specified location
/// @param file_path The path to the zip file
/// @param output_directory The path to the output folder
void extract_zip_file(const std::filesystem::path &file_path,
const std::filesystem::path &output_directory);
} // namespace hgps::data

0 comments on commit a3ae4f5

Please sign in to comment.