diff --git a/src/hdf5_hl.d.ts b/src/hdf5_hl.d.ts index a8771cb..0315314 100644 --- a/src/hdf5_hl.d.ts +++ b/src/hdf5_hl.d.ts @@ -105,7 +105,7 @@ export declare class Group extends HasAttrs { obj_path: string; }; get(obj_name: string): BrokenSoftLink | ExternalLink | Datatype | Group | Dataset | null; - create_group(name: string): Group; + create_group(name: string, track_order?: boolean): Group; create_dataset(args: { name: string; data: GuessableDataTypes; @@ -115,6 +115,7 @@ export declare class Group extends HasAttrs { chunks?: number[] | null; compression?: (number | 'gzip'); compression_opts?: number | number[]; + track_order?: boolean; }): Dataset; create_soft_link(target: string, name: string): number; create_hard_link(target: string, name: string): number; @@ -125,7 +126,7 @@ export declare class Group extends HasAttrs { export declare class File extends Group { filename: string; mode: ACCESS_MODESTRING; - constructor(filename: string, mode?: ACCESS_MODESTRING); + constructor(filename: string, mode?: ACCESS_MODESTRING, track_order?: boolean); flush(): void; close(): Status; } diff --git a/src/hdf5_hl.ts b/src/hdf5_hl.ts index 9e1c4df..f34f717 100644 --- a/src/hdf5_hl.ts +++ b/src/hdf5_hl.ts @@ -751,8 +751,8 @@ export class Group extends HasAttrs { return null } - create_group(name: string): Group { - Module.create_group(this.file_id, this.path + "/" + name); + create_group(name: string, track_order: boolean = false): Group { + Module.create_group(this.file_id, this.path + "/" + name, track_order); return this.get(name) as Group; } @@ -764,9 +764,10 @@ export class Group extends HasAttrs { maxshape?: (number | null)[] | null, chunks?: number[] | null, compression?: (number | 'gzip'), - compression_opts?: number | number[] + compression_opts?: number | number[], + track_order?: boolean, }): Dataset { - const { name, data, shape, dtype, maxshape, chunks, compression, compression_opts } = args; + const { name, data, shape, dtype, maxshape, chunks, compression, compression_opts, track_order } = args; const final_dtype = dtype ?? guess_dtype(data); let metadata = dtype_to_metadata(final_dtype); if (compression && !chunks) { @@ -821,7 +822,8 @@ export class Group extends HasAttrs { metadata.type, metadata.size, metadata.signed, - metadata.vlen + metadata.vlen, + track_order ?? false, ); } else { @@ -840,7 +842,8 @@ export class Group extends HasAttrs { metadata.signed, metadata.vlen, compression_id, - compression_opts_out + compression_opts_out, + track_order ?? false ); } finally { Module._free(data_ptr); @@ -880,20 +883,12 @@ export class Group extends HasAttrs { export class File extends Group { filename: string; mode: ACCESS_MODESTRING; - constructor(filename: string, mode: ACCESS_MODESTRING = "r") { - let file_id: bigint; - let access_mode = ACCESS_MODES[mode]; - let h5_mode = Module[access_mode]; - if (['H5F_ACC_TRUNC', 'H5F_ACC_EXCL'].includes(access_mode)) { - file_id = Module.ccall("H5Fcreate", "bigint", ["string", "number", "bigint", "bigint"], [filename, h5_mode, 0n, 0n]); - } - else { - // then it is an existing file... - file_id = Module.ccall("H5Fopen", "bigint", ["string", "number", "bigint"], [filename, h5_mode, 0n]); - } + constructor(filename: string, mode: ACCESS_MODESTRING = "r", track_order: boolean = false) { + const access_mode = ACCESS_MODES[mode]; + const h5_mode = Module[access_mode]; + const file_id = Module.open(filename, h5_mode, track_order); super(file_id, "/"); this.filename = filename; - this.mode = mode; } diff --git a/src/hdf5_util.cc b/src/hdf5_util.cc index 751e04f..4bc79ad 100644 --- a/src/hdf5_util.cc +++ b/src/hdf5_util.cc @@ -27,6 +27,31 @@ EM_JS(void, throw_error, (const char *string_error), { // // pass // } +int64_t open(const std::string& filename_string, unsigned int h5_mode = H5F_ACC_RDONLY, bool track_order = false) +{ + const char *filename = filename_string.c_str(); + hid_t file_id; + hid_t fcpl_id = H5Pcreate(H5P_FILE_CREATE); + + if (track_order) + { + H5Pset_link_creation_order(fcpl_id, H5P_CRT_ORDER_TRACKED | H5P_CRT_ORDER_INDEXED); + H5Pset_attr_creation_order(fcpl_id, H5P_CRT_ORDER_TRACKED | H5P_CRT_ORDER_INDEXED); + } + + if (h5_mode == H5F_ACC_TRUNC || h5_mode == H5F_ACC_EXCL) + { + file_id = H5Fcreate(filename, h5_mode, fcpl_id, H5P_DEFAULT); + } + else + { + // then it is an existing file... + file_id = H5Fopen(filename, h5_mode, H5P_DEFAULT); + } + herr_t status = H5Pclose(fcpl_id); + return (int64_t)file_id; +} + herr_t link_name_callback(hid_t loc_id, const char *name, const H5L_info_t *linfo, void *opdata) { std::vector *namelist = reinterpret_cast *>(opdata); @@ -691,14 +716,21 @@ int get_attribute_data(hid_t loc_id, const std::string& group_name_string, const return (int)status; } -int create_group(hid_t loc_id, std::string grp_name_string) +int create_group(hid_t loc_id, std::string grp_name_string, const bool track_order=false) { - hid_t grp_id = H5Gcreate2(loc_id, grp_name_string.c_str(), H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - herr_t status = H5Gclose(grp_id); + hid_t gcpl_id = H5Pcreate(H5P_GROUP_CREATE); + if (track_order) + { + H5Pset_link_creation_order(gcpl_id, H5P_CRT_ORDER_TRACKED | H5P_CRT_ORDER_INDEXED); + H5Pset_attr_creation_order(gcpl_id, H5P_CRT_ORDER_TRACKED | H5P_CRT_ORDER_INDEXED); + } + hid_t grp_id = H5Gcreate2(loc_id, grp_name_string.c_str(), H5P_DEFAULT, gcpl_id, H5P_DEFAULT); + herr_t status = H5Pclose(gcpl_id); + status = H5Gclose(grp_id); return (int)status; } -herr_t setup_dataset(val dims_in, val maxdims_in, val chunks_in, int dtype, int dsize, bool is_signed, bool is_vlstr, int compression, val compression_opts, hid_t *filetype, hid_t *space, hid_t *dcpl) +herr_t setup_dataset(val dims_in, val maxdims_in, val chunks_in, int dtype, int dsize, bool is_signed, bool is_vlstr, int compression, val compression_opts, bool track_order, hid_t *filetype, hid_t *space, hid_t *dcpl) { herr_t status; @@ -719,15 +751,12 @@ herr_t setup_dataset(val dims_in, val maxdims_in, val chunks_in, int dtype, int * Create dataset creation property list (dcpl), * defining chunking if chunks_in is not null */ + *dcpl = H5Pcreate(H5P_DATASET_CREATE); - if (chunks_in == val::null()) { - *dcpl = H5P_DEFAULT; - } - else { + if (chunks_in != val::null()) { std::vector chunks_vec = vecFromJSArray(chunks_in); hsize_t *chunks = chunks_vec.data(); int nchunks = chunks_vec.size(); - *dcpl = H5Pcreate(H5P_DATASET_CREATE); H5Pset_chunk(*dcpl, nchunks, chunks); if (compression != 0) { std::vector compression_opts_vec = vecFromJSArray(compression_opts); @@ -737,6 +766,10 @@ herr_t setup_dataset(val dims_in, val maxdims_in, val chunks_in, int dtype, int } } + if (track_order) { + H5Pset_attr_creation_order(*dcpl, H5P_CRT_ORDER_TRACKED | H5P_CRT_ORDER_INDEXED); + } + if (dtype == H5T_STRING) { size_t str_size = (is_vlstr) ? H5T_VARIABLE : dsize; @@ -825,7 +858,7 @@ int create_attribute(hid_t loc_id, std::string obj_name_string, std::string attr // throw_error("data type not supported"); // } - status = setup_dataset(dims_in, dims_in, val::null(), dtype, dsize, is_signed, is_vlstr, 0, val::null(), &filetype, &space, &dcpl); + status = setup_dataset(dims_in, dims_in, val::null(), dtype, dsize, is_signed, is_vlstr, 0, val::null(), false, &filetype, &space, &dcpl); /* * Create the attribute and write the data to it. */ @@ -878,7 +911,7 @@ int create_vlen_str_attribute(hid_t loc_id, std::string obj_name_string, std::st return create_attribute(loc_id, obj_name_string, attr_name_string, wdata_uint64, dims_in, dtype, dsize, is_signed, is_vlstr); } -int create_dataset(hid_t loc_id, std::string dset_name_string, uint64_t wdata_uint64, val dims_in, val maxdims_in, val chunks_in, int dtype, int dsize, bool is_signed, bool is_vlstr, int compression, val compression_opts) +int create_dataset(hid_t loc_id, std::string dset_name_string, uint64_t wdata_uint64, val dims_in, val maxdims_in, val chunks_in, int dtype, int dsize, bool is_signed, bool is_vlstr, int compression, val compression_opts, bool track_order=false) { hid_t filetype, space, dset, dcpl; herr_t status; @@ -886,7 +919,7 @@ int create_dataset(hid_t loc_id, std::string dset_name_string, uint64_t wdata_ui void *wdata = (void *)wdata_uint64; const char *dset_name = dset_name_string.c_str(); - status = setup_dataset(dims_in, maxdims_in, chunks_in, dtype, dsize, is_signed, is_vlstr, compression, compression_opts, &filetype, &space, &dcpl); + status = setup_dataset(dims_in, maxdims_in, chunks_in, dtype, dsize, is_signed, is_vlstr, compression, compression_opts, track_order, &filetype, &space, &dcpl); dset = H5Dcreate2(loc_id, dset_name, filetype, space, H5P_DEFAULT, dcpl, H5P_DEFAULT); status = H5Dwrite(dset, filetype, space, space, H5P_DEFAULT, wdata); @@ -897,7 +930,7 @@ int create_dataset(hid_t loc_id, std::string dset_name_string, uint64_t wdata_ui return (int)status; } -int create_vlen_str_dataset(hid_t loc_id, std::string dset_name_string, val data, val dims_in, val maxdims_in, val chunks_in, int dtype, int dsize, bool is_signed, bool is_vlstr) { +int create_vlen_str_dataset(hid_t loc_id, std::string dset_name_string, val data, val dims_in, val maxdims_in, val chunks_in, int dtype, int dsize, bool is_signed, bool is_vlstr, bool track_order=false) { uint64_t wdata_uint64; // ptr as uint64_t (webassembly will be 64-bit someday) std::vector data_string_vec = vecFromJSArray(data); @@ -909,7 +942,7 @@ int create_vlen_str_dataset(hid_t loc_id, std::string dset_name_string, val data } // pass the pointer as an int... wdata_uint64 = (uint64_t)data_char_vec.data(); - return create_dataset(loc_id, dset_name_string, wdata_uint64, dims_in, maxdims_in, chunks_in, dtype, dsize, is_signed, is_vlstr, 0, val::null()); + return create_dataset(loc_id, dset_name_string, wdata_uint64, dims_in, maxdims_in, chunks_in, dtype, dsize, is_signed, is_vlstr, 0, val::null(), track_order); } int resize_dataset(hid_t loc_id, const std::string dset_name_string, val new_size_in) @@ -1291,6 +1324,7 @@ int deactivate_throwing_error_handler() { EMSCRIPTEN_BINDINGS(hdf5) { + function("open", &open); function("get_keys", &get_keys_vector); function("get_names", &get_child_names); function("get_types", &get_child_types); diff --git a/src/hdf5_util_helpers.d.ts b/src/hdf5_util_helpers.d.ts index 2f6e9ba..8650b93 100644 --- a/src/hdf5_util_helpers.d.ts +++ b/src/hdf5_util_helpers.d.ts @@ -59,7 +59,8 @@ export interface VirtualSource { } export interface H5Module extends EmscriptenModule { - create_dataset(file_id: bigint, arg1: string, arg2: bigint, shape: bigint[], maxshape: (bigint | null)[], chunks: bigint[] | null, type: number, size: number, signed: boolean, vlen: boolean, compression_id: number, compression_opts: number[]): number; + open(filename: string, mode: number, track_order: boolean): bigint; + create_dataset(file_id: bigint, arg1: string, arg2: bigint, shape: bigint[], maxshape: (bigint | null)[], chunks: bigint[] | null, type: number, size: number, signed: boolean, vlen: boolean, compression_id: number, compression_opts: number[], track_order: boolean): number; create_soft_link(file_id: bigint, link_target: string, link_name: string): number; create_hard_link(file_id: bigint, link_target: string, link_name: string): number; create_external_link(file_id: bigint, file_name: string, link_target: string, link_name: string): number; @@ -91,8 +92,8 @@ export interface H5Module extends EmscriptenModule { H5Z_FILTER_SCALEOFFSET: 6; H5Z_FILTER_RESERVED: 256; H5Z_FILTER_MAX: 65535; - create_group(file_id: bigint, name: string): number; - create_vlen_str_dataset(file_id: bigint, dset_name: string, prepared_data: any, shape: bigint[], maxshape: (bigint | null)[], chunks: bigint[] | null, type: number, size: number, signed: boolean, vlen: boolean): number; + create_group(file_id: bigint, name: string, track_order: boolean): number; + create_vlen_str_dataset(file_id: bigint, dset_name: string, prepared_data: any, shape: bigint[], maxshape: (bigint | null)[], chunks: bigint[] | null, type: number, size: number, signed: boolean, vlen: boolean, track_order: boolean): number; get_dataset_data(file_id: bigint, path: string, count: bigint[] | null, offset: bigint[] | null, strides: bigint[] | null, rdata_ptr: bigint): number; set_dataset_data(file_id: bigint, path: string, count: bigint[] | null, offset: bigint[] | null, strides: bigint[] | null, wdata_ptr: bigint): number; refresh_dataset(file_id: bigint, path: string): number; diff --git a/test/test.mjs b/test/test.mjs index 9e69bf7..4dcf9d0 100644 --- a/test/test.mjs +++ b/test/test.mjs @@ -19,6 +19,7 @@ import references from './create_read_references.mjs'; import test_throwing_error_handler from './test_throwing_error_handler.mjs'; import test_empty from './empty_dataset_and_attrs.mjs'; import vlen_test from './vlen_test.mjs'; +import track_order from './track_order.mjs'; let tests = []; const add_tests = (tests_in) => { /*global*/ tests = tests.concat(tests_in)} @@ -41,6 +42,7 @@ add_tests(references); add_tests(test_throwing_error_handler); add_tests(test_empty); add_tests(vlen_test); +add_tests(track_order); let passed = true; async function run_test(test) { diff --git a/test/track_order.mjs b/test/track_order.mjs new file mode 100644 index 0000000..4e9e058 --- /dev/null +++ b/test/track_order.mjs @@ -0,0 +1,115 @@ +#!/usr/bin/env node + +import { strict as assert } from 'assert'; +import { existsSync, mkdirSync, unlinkSync } from 'fs'; +import { join } from 'path'; +import h5wasm from "h5wasm/node"; + +async function test_track_order() { + + await h5wasm.ready; + const PATH = join(".", "test", "tmp"); + const TRACKED_FILEPATH = join(PATH, "track_order.h5"); + const UNTRACKED_FILEPATH = join(PATH, "untrack_order.h5"); + + const VALUES = [3,2,1]; + const DATA = new Float32Array(VALUES); + + if (!(existsSync(PATH))) { + mkdirSync(PATH); + } + + // write with tracking on + { + const write_file = new h5wasm.File(TRACKED_FILEPATH, "w", true); + + // create attributes with names in reverse alphabetical order: + write_file.create_attribute("c", "first attribute"); + write_file.create_attribute("b", "second attribute"); + write_file.create_attribute("a", "third attribute"); + + // create datasets with names in reverse alphabetical order: + write_file.create_dataset({name: "c_data", data: DATA}); + write_file.create_dataset({name: "b_data", data: DATA}); + const a_data = write_file.create_dataset({name: "a_data", data: DATA, track_order: true}); + + // create attributes with names in reverse alphabetical order: + a_data.create_attribute("c", "first attribute"); + a_data.create_attribute("b", "second attribute"); + a_data.create_attribute("a", "third attribute"); + + write_file.flush(); + write_file.close(); + } + + // write with tracking off + { + const write_file = new h5wasm.File(UNTRACKED_FILEPATH, "w", false); + + // create attributes with names in reverse alphabetical order: + write_file.create_attribute("c", "first attribute"); + write_file.create_attribute("b", "second attribute"); + write_file.create_attribute("a", "third attribute"); + + // create datasets with names in reverse alphabetical order: + write_file.create_dataset({name: "c_data", data: DATA}); + write_file.create_dataset({name: "b_data", data: DATA}); + const a_data = write_file.create_dataset({name: "a_data", data: DATA, track_order: false}); + + // create attributes with names in reverse alphabetical order: + a_data.create_attribute("c", "first attribute"); + a_data.create_attribute("b", "second attribute"); + a_data.create_attribute("a", "third attribute"); + + write_file.flush(); + write_file.close(); + } + + // read with tracking on + { + const read_file = new h5wasm.File(TRACKED_FILEPATH, "r"); + + // check that attrs are in original order: + assert.deepEqual(Object.keys(read_file.attrs), ["c", "b", "a"]); + + // check that datasets are in original order: + assert.deepEqual(read_file.keys(), ["c_data", "b_data", "a_data"]); + + // check that attrs of dataset are in original order: + const dataset_attrs = read_file.get("a_data").attrs; + assert.deepEqual(Object.keys(dataset_attrs), ["c", "b", "a"]); + + read_file.close() + } + + // read with tracking off + { + const read_file = new h5wasm.File(UNTRACKED_FILEPATH, "r"); + + // check that attrs are in alphabetical (not original) order: + assert.deepEqual(Object.keys(read_file.attrs), ["a", "b", "c"]); + + // check that datasets are in alphabetical (not original) order: + assert.deepEqual(read_file.keys(), ["a_data", "b_data", "c_data"]); + + // check that attrs of dataset are in alphabetical (not original) order: + const dataset_attrs = read_file.get("a_data").attrs; + assert.deepEqual(Object.keys(dataset_attrs), ["a", "b", "c"]); + + read_file.close() + } + + // cleanup file when finished: + unlinkSync(TRACKED_FILEPATH); + unlinkSync(UNTRACKED_FILEPATH); + +} + +export const tests = [ + { + description: "Create and read attrs and datasets with and without track_order", + test: test_track_order + }, +]; + +export default tests;