Skip to content

Commit

Permalink
Merge pull request #82 from usnistgov/rebase-track-order
Browse files Browse the repository at this point in the history
Add option to track order of attributes and fields
  • Loading branch information
bmaranville authored Aug 28, 2024
2 parents 7b0932d + 5a04d24 commit e8acf24
Show file tree
Hide file tree
Showing 6 changed files with 185 additions and 37 deletions.
5 changes: 3 additions & 2 deletions src/hdf5_hl.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ export declare class Group extends HasAttrs {
obj_path: string;
};
get(obj_name: string): BrokenSoftLink | ExternalLink | Datatype | Group | Dataset | null;
create_group(name: string): Group;
create_group(name: string, track_order?: boolean): Group;
create_dataset(args: {
name: string;
data: GuessableDataTypes;
Expand All @@ -115,6 +115,7 @@ export declare class Group extends HasAttrs {
chunks?: number[] | null;
compression?: (number | 'gzip');
compression_opts?: number | number[];
track_order?: boolean;
}): Dataset;
create_soft_link(target: string, name: string): number;
create_hard_link(target: string, name: string): number;
Expand All @@ -125,7 +126,7 @@ export declare class Group extends HasAttrs {
export declare class File extends Group {
filename: string;
mode: ACCESS_MODESTRING;
constructor(filename: string, mode?: ACCESS_MODESTRING);
constructor(filename: string, mode?: ACCESS_MODESTRING, track_order?: boolean);
flush(): void;
close(): Status;
}
Expand Down
31 changes: 13 additions & 18 deletions src/hdf5_hl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -751,8 +751,8 @@ export class Group extends HasAttrs {
return null
}

create_group(name: string): Group {
Module.create_group(this.file_id, this.path + "/" + name);
create_group(name: string, track_order: boolean = false): Group {
Module.create_group(this.file_id, this.path + "/" + name, track_order);
return this.get(name) as Group;
}

Expand All @@ -764,9 +764,10 @@ export class Group extends HasAttrs {
maxshape?: (number | null)[] | null,
chunks?: number[] | null,
compression?: (number | 'gzip'),
compression_opts?: number | number[]
compression_opts?: number | number[],
track_order?: boolean,
}): Dataset {
const { name, data, shape, dtype, maxshape, chunks, compression, compression_opts } = args;
const { name, data, shape, dtype, maxshape, chunks, compression, compression_opts, track_order } = args;
const final_dtype = dtype ?? guess_dtype(data);
let metadata = dtype_to_metadata(final_dtype);
if (compression && !chunks) {
Expand Down Expand Up @@ -821,7 +822,8 @@ export class Group extends HasAttrs {
metadata.type,
metadata.size,
metadata.signed,
metadata.vlen
metadata.vlen,
track_order ?? false,
);
}
else {
Expand All @@ -840,7 +842,8 @@ export class Group extends HasAttrs {
metadata.signed,
metadata.vlen,
compression_id,
compression_opts_out
compression_opts_out,
track_order ?? false
);
} finally {
Module._free(data_ptr);
Expand Down Expand Up @@ -880,20 +883,12 @@ export class Group extends HasAttrs {
export class File extends Group {
filename: string;
mode: ACCESS_MODESTRING;
constructor(filename: string, mode: ACCESS_MODESTRING = "r") {
let file_id: bigint;
let access_mode = ACCESS_MODES[mode];
let h5_mode = Module[access_mode];
if (['H5F_ACC_TRUNC', 'H5F_ACC_EXCL'].includes(access_mode)) {
file_id = Module.ccall("H5Fcreate", "bigint", ["string", "number", "bigint", "bigint"], [filename, h5_mode, 0n, 0n]);
}
else {
// then it is an existing file...
file_id = Module.ccall("H5Fopen", "bigint", ["string", "number", "bigint"], [filename, h5_mode, 0n]);
}
constructor(filename: string, mode: ACCESS_MODESTRING = "r", track_order: boolean = false) {
const access_mode = ACCESS_MODES[mode];
const h5_mode = Module[access_mode];
const file_id = Module.open(filename, h5_mode, track_order);
super(file_id, "/");
this.filename = filename;

this.mode = mode;
}

Expand Down
62 changes: 48 additions & 14 deletions src/hdf5_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,31 @@ EM_JS(void, throw_error, (const char *string_error), {
// // pass
// }

int64_t open(const std::string& filename_string, unsigned int h5_mode = H5F_ACC_RDONLY, bool track_order = false)
{
const char *filename = filename_string.c_str();
hid_t file_id;
hid_t fcpl_id = H5Pcreate(H5P_FILE_CREATE);

if (track_order)
{
H5Pset_link_creation_order(fcpl_id, H5P_CRT_ORDER_TRACKED | H5P_CRT_ORDER_INDEXED);
H5Pset_attr_creation_order(fcpl_id, H5P_CRT_ORDER_TRACKED | H5P_CRT_ORDER_INDEXED);
}

if (h5_mode == H5F_ACC_TRUNC || h5_mode == H5F_ACC_EXCL)
{
file_id = H5Fcreate(filename, h5_mode, fcpl_id, H5P_DEFAULT);
}
else
{
// then it is an existing file...
file_id = H5Fopen(filename, h5_mode, H5P_DEFAULT);
}
herr_t status = H5Pclose(fcpl_id);
return (int64_t)file_id;
}

herr_t link_name_callback(hid_t loc_id, const char *name, const H5L_info_t *linfo, void *opdata)
{
std::vector<std::string> *namelist = reinterpret_cast<std::vector<std::string> *>(opdata);
Expand Down Expand Up @@ -691,14 +716,21 @@ int get_attribute_data(hid_t loc_id, const std::string& group_name_string, const
return (int)status;
}

int create_group(hid_t loc_id, std::string grp_name_string)
int create_group(hid_t loc_id, std::string grp_name_string, const bool track_order=false)
{
hid_t grp_id = H5Gcreate2(loc_id, grp_name_string.c_str(), H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
herr_t status = H5Gclose(grp_id);
hid_t gcpl_id = H5Pcreate(H5P_GROUP_CREATE);
if (track_order)
{
H5Pset_link_creation_order(gcpl_id, H5P_CRT_ORDER_TRACKED | H5P_CRT_ORDER_INDEXED);
H5Pset_attr_creation_order(gcpl_id, H5P_CRT_ORDER_TRACKED | H5P_CRT_ORDER_INDEXED);
}
hid_t grp_id = H5Gcreate2(loc_id, grp_name_string.c_str(), H5P_DEFAULT, gcpl_id, H5P_DEFAULT);
herr_t status = H5Pclose(gcpl_id);
status = H5Gclose(grp_id);
return (int)status;
}

herr_t setup_dataset(val dims_in, val maxdims_in, val chunks_in, int dtype, int dsize, bool is_signed, bool is_vlstr, int compression, val compression_opts, hid_t *filetype, hid_t *space, hid_t *dcpl)
herr_t setup_dataset(val dims_in, val maxdims_in, val chunks_in, int dtype, int dsize, bool is_signed, bool is_vlstr, int compression, val compression_opts, bool track_order, hid_t *filetype, hid_t *space, hid_t *dcpl)
{
herr_t status;

Expand All @@ -719,15 +751,12 @@ herr_t setup_dataset(val dims_in, val maxdims_in, val chunks_in, int dtype, int
* Create dataset creation property list (dcpl),
* defining chunking if chunks_in is not null
*/
*dcpl = H5Pcreate(H5P_DATASET_CREATE);

if (chunks_in == val::null()) {
*dcpl = H5P_DEFAULT;
}
else {
if (chunks_in != val::null()) {
std::vector<hsize_t> chunks_vec = vecFromJSArray<hsize_t>(chunks_in);
hsize_t *chunks = chunks_vec.data();
int nchunks = chunks_vec.size();
*dcpl = H5Pcreate(H5P_DATASET_CREATE);
H5Pset_chunk(*dcpl, nchunks, chunks);
if (compression != 0) {
std::vector<uint> compression_opts_vec = vecFromJSArray<uint>(compression_opts);
Expand All @@ -737,6 +766,10 @@ herr_t setup_dataset(val dims_in, val maxdims_in, val chunks_in, int dtype, int
}
}

if (track_order) {
H5Pset_attr_creation_order(*dcpl, H5P_CRT_ORDER_TRACKED | H5P_CRT_ORDER_INDEXED);
}

if (dtype == H5T_STRING)
{
size_t str_size = (is_vlstr) ? H5T_VARIABLE : dsize;
Expand Down Expand Up @@ -825,7 +858,7 @@ int create_attribute(hid_t loc_id, std::string obj_name_string, std::string attr
// throw_error("data type not supported");
// }

status = setup_dataset(dims_in, dims_in, val::null(), dtype, dsize, is_signed, is_vlstr, 0, val::null(), &filetype, &space, &dcpl);
status = setup_dataset(dims_in, dims_in, val::null(), dtype, dsize, is_signed, is_vlstr, 0, val::null(), false, &filetype, &space, &dcpl);
/*
* Create the attribute and write the data to it.
*/
Expand Down Expand Up @@ -878,15 +911,15 @@ int create_vlen_str_attribute(hid_t loc_id, std::string obj_name_string, std::st
return create_attribute(loc_id, obj_name_string, attr_name_string, wdata_uint64, dims_in, dtype, dsize, is_signed, is_vlstr);
}

int create_dataset(hid_t loc_id, std::string dset_name_string, uint64_t wdata_uint64, val dims_in, val maxdims_in, val chunks_in, int dtype, int dsize, bool is_signed, bool is_vlstr, int compression, val compression_opts)
int create_dataset(hid_t loc_id, std::string dset_name_string, uint64_t wdata_uint64, val dims_in, val maxdims_in, val chunks_in, int dtype, int dsize, bool is_signed, bool is_vlstr, int compression, val compression_opts, bool track_order=false)
{
hid_t filetype, space, dset, dcpl;
herr_t status;
// data is pointer to raw bytes
void *wdata = (void *)wdata_uint64;
const char *dset_name = dset_name_string.c_str();

status = setup_dataset(dims_in, maxdims_in, chunks_in, dtype, dsize, is_signed, is_vlstr, compression, compression_opts, &filetype, &space, &dcpl);
status = setup_dataset(dims_in, maxdims_in, chunks_in, dtype, dsize, is_signed, is_vlstr, compression, compression_opts, track_order, &filetype, &space, &dcpl);
dset = H5Dcreate2(loc_id, dset_name, filetype, space, H5P_DEFAULT, dcpl, H5P_DEFAULT);
status = H5Dwrite(dset, filetype, space, space, H5P_DEFAULT, wdata);

Expand All @@ -897,7 +930,7 @@ int create_dataset(hid_t loc_id, std::string dset_name_string, uint64_t wdata_ui
return (int)status;
}

int create_vlen_str_dataset(hid_t loc_id, std::string dset_name_string, val data, val dims_in, val maxdims_in, val chunks_in, int dtype, int dsize, bool is_signed, bool is_vlstr) {
int create_vlen_str_dataset(hid_t loc_id, std::string dset_name_string, val data, val dims_in, val maxdims_in, val chunks_in, int dtype, int dsize, bool is_signed, bool is_vlstr, bool track_order=false) {
uint64_t wdata_uint64; // ptr as uint64_t (webassembly will be 64-bit someday)

std::vector<std::string> data_string_vec = vecFromJSArray<std::string>(data);
Expand All @@ -909,7 +942,7 @@ int create_vlen_str_dataset(hid_t loc_id, std::string dset_name_string, val data
}
// pass the pointer as an int...
wdata_uint64 = (uint64_t)data_char_vec.data();
return create_dataset(loc_id, dset_name_string, wdata_uint64, dims_in, maxdims_in, chunks_in, dtype, dsize, is_signed, is_vlstr, 0, val::null());
return create_dataset(loc_id, dset_name_string, wdata_uint64, dims_in, maxdims_in, chunks_in, dtype, dsize, is_signed, is_vlstr, 0, val::null(), track_order);
}

int resize_dataset(hid_t loc_id, const std::string dset_name_string, val new_size_in)
Expand Down Expand Up @@ -1291,6 +1324,7 @@ int deactivate_throwing_error_handler() {

EMSCRIPTEN_BINDINGS(hdf5)
{
function("open", &open);
function("get_keys", &get_keys_vector);
function("get_names", &get_child_names);
function("get_types", &get_child_types);
Expand Down
7 changes: 4 additions & 3 deletions src/hdf5_util_helpers.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ export interface VirtualSource {
}

export interface H5Module extends EmscriptenModule {
create_dataset(file_id: bigint, arg1: string, arg2: bigint, shape: bigint[], maxshape: (bigint | null)[], chunks: bigint[] | null, type: number, size: number, signed: boolean, vlen: boolean, compression_id: number, compression_opts: number[]): number;
open(filename: string, mode: number, track_order: boolean): bigint;
create_dataset(file_id: bigint, arg1: string, arg2: bigint, shape: bigint[], maxshape: (bigint | null)[], chunks: bigint[] | null, type: number, size: number, signed: boolean, vlen: boolean, compression_id: number, compression_opts: number[], track_order: boolean): number;
create_soft_link(file_id: bigint, link_target: string, link_name: string): number;
create_hard_link(file_id: bigint, link_target: string, link_name: string): number;
create_external_link(file_id: bigint, file_name: string, link_target: string, link_name: string): number;
Expand Down Expand Up @@ -91,8 +92,8 @@ export interface H5Module extends EmscriptenModule {
H5Z_FILTER_SCALEOFFSET: 6;
H5Z_FILTER_RESERVED: 256;
H5Z_FILTER_MAX: 65535;
create_group(file_id: bigint, name: string): number;
create_vlen_str_dataset(file_id: bigint, dset_name: string, prepared_data: any, shape: bigint[], maxshape: (bigint | null)[], chunks: bigint[] | null, type: number, size: number, signed: boolean, vlen: boolean): number;
create_group(file_id: bigint, name: string, track_order: boolean): number;
create_vlen_str_dataset(file_id: bigint, dset_name: string, prepared_data: any, shape: bigint[], maxshape: (bigint | null)[], chunks: bigint[] | null, type: number, size: number, signed: boolean, vlen: boolean, track_order: boolean): number;
get_dataset_data(file_id: bigint, path: string, count: bigint[] | null, offset: bigint[] | null, strides: bigint[] | null, rdata_ptr: bigint): number;
set_dataset_data(file_id: bigint, path: string, count: bigint[] | null, offset: bigint[] | null, strides: bigint[] | null, wdata_ptr: bigint): number;
refresh_dataset(file_id: bigint, path: string): number;
Expand Down
2 changes: 2 additions & 0 deletions test/test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import references from './create_read_references.mjs';
import test_throwing_error_handler from './test_throwing_error_handler.mjs';
import test_empty from './empty_dataset_and_attrs.mjs';
import vlen_test from './vlen_test.mjs';
import track_order from './track_order.mjs';

let tests = [];
const add_tests = (tests_in) => { /*global*/ tests = tests.concat(tests_in)}
Expand All @@ -41,6 +42,7 @@ add_tests(references);
add_tests(test_throwing_error_handler);
add_tests(test_empty);
add_tests(vlen_test);
add_tests(track_order);

let passed = true;
async function run_test(test) {
Expand Down
115 changes: 115 additions & 0 deletions test/track_order.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#!/usr/bin/env node

import { strict as assert } from 'assert';
import { existsSync, mkdirSync, unlinkSync } from 'fs';
import { join } from 'path';
import h5wasm from "h5wasm/node";

async function test_track_order() {

await h5wasm.ready;
const PATH = join(".", "test", "tmp");
const TRACKED_FILEPATH = join(PATH, "track_order.h5");
const UNTRACKED_FILEPATH = join(PATH, "untrack_order.h5");

const VALUES = [3,2,1];
const DATA = new Float32Array(VALUES);

if (!(existsSync(PATH))) {
mkdirSync(PATH);
}

// write with tracking on
{
const write_file = new h5wasm.File(TRACKED_FILEPATH, "w", true);

// create attributes with names in reverse alphabetical order:
write_file.create_attribute("c", "first attribute");
write_file.create_attribute("b", "second attribute");
write_file.create_attribute("a", "third attribute");

// create datasets with names in reverse alphabetical order:
write_file.create_dataset({name: "c_data", data: DATA});
write_file.create_dataset({name: "b_data", data: DATA});
const a_data = write_file.create_dataset({name: "a_data", data: DATA, track_order: true});

// create attributes with names in reverse alphabetical order:
a_data.create_attribute("c", "first attribute");
a_data.create_attribute("b", "second attribute");
a_data.create_attribute("a", "third attribute");

write_file.flush();
write_file.close();
}

// write with tracking off
{
const write_file = new h5wasm.File(UNTRACKED_FILEPATH, "w", false);

// create attributes with names in reverse alphabetical order:
write_file.create_attribute("c", "first attribute");
write_file.create_attribute("b", "second attribute");
write_file.create_attribute("a", "third attribute");

// create datasets with names in reverse alphabetical order:
write_file.create_dataset({name: "c_data", data: DATA});
write_file.create_dataset({name: "b_data", data: DATA});
const a_data = write_file.create_dataset({name: "a_data", data: DATA, track_order: false});

// create attributes with names in reverse alphabetical order:
a_data.create_attribute("c", "first attribute");
a_data.create_attribute("b", "second attribute");
a_data.create_attribute("a", "third attribute");

write_file.flush();
write_file.close();
}

// read with tracking on
{
const read_file = new h5wasm.File(TRACKED_FILEPATH, "r");

// check that attrs are in original order:
assert.deepEqual(Object.keys(read_file.attrs), ["c", "b", "a"]);

// check that datasets are in original order:
assert.deepEqual(read_file.keys(), ["c_data", "b_data", "a_data"]);

// check that attrs of dataset are in original order:
const dataset_attrs = read_file.get("a_data").attrs;
assert.deepEqual(Object.keys(dataset_attrs), ["c", "b", "a"]);

read_file.close()
}

// read with tracking off
{
const read_file = new h5wasm.File(UNTRACKED_FILEPATH, "r");

// check that attrs are in alphabetical (not original) order:
assert.deepEqual(Object.keys(read_file.attrs), ["a", "b", "c"]);

// check that datasets are in alphabetical (not original) order:
assert.deepEqual(read_file.keys(), ["a_data", "b_data", "c_data"]);

// check that attrs of dataset are in alphabetical (not original) order:
const dataset_attrs = read_file.get("a_data").attrs;
assert.deepEqual(Object.keys(dataset_attrs), ["a", "b", "c"]);

read_file.close()
}

// cleanup file when finished:
unlinkSync(TRACKED_FILEPATH);
unlinkSync(UNTRACKED_FILEPATH);

}

export const tests = [
{
description: "Create and read attrs and datasets with and without track_order",
test: test_track_order
},
];

export default tests;

0 comments on commit e8acf24

Please sign in to comment.