From eba0ed781eb6a850b77c559a2277118d7c342766 Mon Sep 17 00:00:00 2001 From: Allen Byrne Date: Thu, 17 Oct 2024 09:33:08 -0500 Subject: [PATCH 1/3] Incorporate remaining parts of the filters.html file --- doxygen/dox/ExamplesAPI.dox | 10 ---- src/H5Dmodule.h | 3 +- src/H5PLmodule.h | 95 +++++++++++++++++++++++++++++++++---- src/H5Zmodule.h | 38 +++++++++++++++ 4 files changed, 125 insertions(+), 21 deletions(-) diff --git a/doxygen/dox/ExamplesAPI.dox b/doxygen/dox/ExamplesAPI.dox index e2ee26ee28e..f3903cec1d2 100644 --- a/doxygen/dox/ExamplesAPI.dox +++ b/doxygen/dox/ExamplesAPI.dox @@ -800,16 +800,6 @@ FORTRAN h5ex_d_lzf.ddl -Read / Write Dataset using MAFISC Compression - -C - FORTRAN Java JavaObj MATLAB PyHigh PyLow - -h5ex_d_mafisc.h5 -h5ex_d_mafisc.tst -h5ex_d_mafisc.ddl - - Read / Write Dataset using ZFP Compression C diff --git a/src/H5Dmodule.h b/src/H5Dmodule.h index 96c5b1a704e..695c5b6b3d0 100644 --- a/src/H5Dmodule.h +++ b/src/H5Dmodule.h @@ -1612,7 +1612,8 @@ allocated if necessary. * care must be taken to assure that all the external files are accessible in the new location. * * \subsection subsec_dataset_filters Using HDF5 Filters - * This section describes in detail how to use the n-bit, scale-offset filters and szip filters. + * This section describes in detail how to use the n-bit, scale-offset filters and szip filters. For + * details on the how filters are used in the read / write of data, see #subsubsec_dataset_transfer_pipe. * * \subsubsection subsubsec_dataset_filters_nbit Using the N‐bit Filter * N-bit data has n significant bits, where n may not correspond to a precise number of bytes. On diff --git a/src/H5PLmodule.h b/src/H5PLmodule.h index 1aedc2783fe..e7606fa9b54 100644 --- a/src/H5PLmodule.h +++ b/src/H5PLmodule.h @@ -48,16 +48,23 @@ * available to the application. For example, if the application intends to apply the HDF5 bzip2 compression * filter that was registered with The HDF Group and has an identification number 307 * (Registered - * Filters) then the application would follow the steps as outlined below: \code dcpl = H5Pcreate - * (H5P_DATASET_CREATE); status = H5Pset_filter (dcpl, (H5Z_filter_t)307, H5Z_FLAG_MANDATORY, (size_t)6, - * cd_values); dset = H5Dcreate (file, DATASET, H5T_STD_I32LE, space, H5P_DEFAULT, dcpl, status = H5Dwrite - * (dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, wdata[0]); \endcode + * Filters) then the application would follow the steps as outlined below: + * \code + * dcpl = H5Pcreate (H5P_DATASET_CREATE); + * status = H5Pset_filter (dcpl, (H5Z_filter_t)307, H5Z_FLAG_MANDATORY, (size_t)6, cd_values); + * dset = H5Dcreate (file, DATASET, H5T_STD_I32LE, space, H5P_DEFAULT, dcpl, H5P_DEFAULT); + * status = H5Dwrite (dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, wdata[0]); + * \endcode * * \subsubsection subsubsec_filter_plugins_model_read Reading Data with an Applied Third-party Filter * An application does not need to do anything special to read the data with a third-party filter applied. For * example, if one wants to read data written in the previous example, the following regular steps should be - * taken: \code file = H5Fopen (FILE, H5F_ACC_RDONLY, H5P_DEFAULT); dset = H5Dopen (file, DATASET, - * H5P_DEFAULT); H5Dread (dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, rdata[0]); \endcode + * taken: + * \code + * file = H5Fopen (FILE, H5F_ACC_RDONLY, H5P_DEFAULT); + * dset = H5Dopen (file, DATASET, H5P_DEFAULT); + * H5Dread (dset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, rdata[0]); + * \endcode * * The command-line utility h5dump, for example, will read and display the data as shown: * \code @@ -161,10 +168,78 @@ * plugin on the system. * * \subsubsection subsubsec_filter_plugins_prog_write Writing a Filter Function - * The HDF5 filter function for the dynamically loaded filter feature should be written as any custom filter - * described in Custom Filters. See the - * “Example” section, section 5, of that document to get an idea of the simple filter function, and see the - * example of the more sophisticated HDF5 bzip2 filter function in the “Building an HDF5 bzip2 Plugin Example” + * The HDF5 filter function for the dynamically loaded filter feature should be written as a custom filter. + * This example shows how to define and register a simple filter + * that adds a checksum capability to the data stream. + * + * The function that acts as the filter always returns zero (failure) if the md5() function was not detected at + * configuration time (left as an exercise for the reader). Otherwise the function is broken down to an input and output + * half. The output half calculates a checksum, increases the size of the output buffer if necessary, and appends the checksum to + * the end of the buffer. The input half calculates the checksum on the first part of the buffer and compares it to the checksum + * already stored at the end of the buffer. If the two differ then zero (failure) is returned, otherwise the buffer size is reduced + * to exclude the checksum. + * /code + * size_t md5_filter(unsigned int flags, size_t cd_nelmts, const unsigned int cd_values[], size_t nbytes, size_t *buf_size, void **buf) + * { + * #ifdef HAVE_MD5 + * unsigned char cksum[16]; + * + * if (flags & H5Z_REVERSE) { + * // Input + * assert(nbytes >= 16); + * md5(nbytes-16, *buf, cksum); + * // Compare + * if (memcmp(cksum, (char*)(*buf)+ nbytes- 16, 16)) { + * return 0; // fail + * } + * // Strip off checksum + * return nbytes - 16; + * } + * else { + * // Output + * md5(nbytes, *buf, cksum); + * // Increase buffer size if necessary + * if (nbytes + 16 > *buf_size) { + * *buf_size = nbytes + 16; + * *buf = realloc(*buf, *buf_size); + * } + * // Append checksum + * memcpy((char*)(*buf)+nbytes, cksum, 16); + * return nbytes+16; + * } + * #else + * return 0; // fail + * #endif + * } + * /endcode + * + * Once the filter function is defined it must be registered so + * the HDF5 library knows about it. Since we're testing this + * filter we choose one of the #H5Z_filter_t numbers + * from the reserved range. We'll randomly choose 305. + * + * /code + * #define FILTER_MD5 305 + * herr_t status = H5Zregister(FILTER_MD5, "md5 checksum", md5_filter); + * /endcode + * + * Now we can use the filter in a pipeline. We could have added + * the filter to the pipeline before defining or registering the + * filter as long as the filter was defined and registered by time + * we tried to use it (if the filter is marked as optional then we + * could have used it without defining it and the library would + * have automatically removed it from the pipeline for each chunk + * written before the filter was defined and registered). + * + * /code + * hid_t dcpl = H5Pcreate(H5P_DATASET_CREATE); + * hsize_t chunk_size[3] = {10,10,10}; + * H5Pset_chunk(dcpl, 3, chunk_size); + * H5Pset_filter(dcpl, FILTER_MD5, 0, 0, NULL); + * hid_t dset = H5Dcreate(file, "dset", H5T_NATIVE_DOUBLE, space, dcpl); + * /endcode + * + * See the example of a more sophisticated HDF5 bzip2 filter function in the /ref subsec_filter_plugins_build * section. The HDF5 bzip2 filter function is also available for download from Filter Plugin Repository. * diff --git a/src/H5Zmodule.h b/src/H5Zmodule.h index d1087b8bcd5..e02be5c2677 100644 --- a/src/H5Zmodule.h +++ b/src/H5Zmodule.h @@ -71,6 +71,7 @@ * shuffling algorithm) and error checking (Fletcher32 checksum). For further * flexibility, the library allows a user application to extend the pipeline * through the creation and registration of customized filters. + * See \ref sec_filter_plugins * * The flexibility of the filter pipeline implementation enables the definition * of additional filters by a user application. A filter @@ -83,6 +84,43 @@ * the difficulty of implementing random access for partial I/O. Compact dataset * filters are not supported because they would not produce significant results. * + * HDF5 allows chunked data to pass through user-defined filters + * on the way to or from disk. The filters operate on chunks of an + * #H5D_CHUNKED dataset can be arranged in a pipeline + * so output of one filter becomes the input of the next filter. + * + * Each filter has a two-byte identification number (type + * #H5Z_filter_t) allocated by The HDF Group and can also be + * passed application-defined integer resources to control its + * behavior. Each filter also has an optional ASCII comment + * string. + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Values for #H5Z_filter_tDescription
0-255These values are reserved for filters predefined and + * registered by the HDF5 library and of use to the general + * public.
256-511Filter numbers in this range are used for testing only + * and can be used temporarily by any organization. No + * attempt is made to resolve numbering conflicts since all + * definitions are by nature temporary.
512-65535Reserved for future assignment. Please contact the + * HDF5 development team + * to reserve a value or range of values for + * use by your filters.
+ * * Filter identifiers for the filters distributed with the HDF5 * Library are as follows: * From e7d602819240544f0af4b371d1e24036078c4a71 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 17 Oct 2024 14:35:13 +0000 Subject: [PATCH 2/3] Committing clang-format changes --- src/H5PLmodule.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/H5PLmodule.h b/src/H5PLmodule.h index e7606fa9b54..adbfd302a99 100644 --- a/src/H5PLmodule.h +++ b/src/H5PLmodule.h @@ -172,14 +172,14 @@ * This example shows how to define and register a simple filter * that adds a checksum capability to the data stream. * - * The function that acts as the filter always returns zero (failure) if the md5() function was not detected at - * configuration time (left as an exercise for the reader). Otherwise the function is broken down to an input and output - * half. The output half calculates a checksum, increases the size of the output buffer if necessary, and appends the checksum to - * the end of the buffer. The input half calculates the checksum on the first part of the buffer and compares it to the checksum - * already stored at the end of the buffer. If the two differ then zero (failure) is returned, otherwise the buffer size is reduced - * to exclude the checksum. - * /code - * size_t md5_filter(unsigned int flags, size_t cd_nelmts, const unsigned int cd_values[], size_t nbytes, size_t *buf_size, void **buf) + * The function that acts as the filter always returns zero (failure) if the md5() function was + * not detected at configuration time (left as an exercise for the reader). Otherwise the function is broken + * down to an input and output half. The output half calculates a checksum, increases the size of the output + * buffer if necessary, and appends the checksum to the end of the buffer. The input half calculates the + * checksum on the first part of the buffer and compares it to the checksum already stored at the end of the + * buffer. If the two differ then zero (failure) is returned, otherwise the buffer size is reduced to exclude + * the checksum. /code size_t md5_filter(unsigned int flags, size_t cd_nelmts, const unsigned int cd_values[], + * size_t nbytes, size_t *buf_size, void **buf) * { * #ifdef HAVE_MD5 * unsigned char cksum[16]; From 403b4a5c60bd2817dbcb123c9261f5fc8ff38001 Mon Sep 17 00:00:00 2001 From: Allen Byrne Date: Thu, 17 Oct 2024 10:10:22 -0500 Subject: [PATCH 3/3] Correct doxygen usage --- src/H5Dmodule.h | 2 +- src/H5PLmodule.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/H5Dmodule.h b/src/H5Dmodule.h index 695c5b6b3d0..5829e80fc9a 100644 --- a/src/H5Dmodule.h +++ b/src/H5Dmodule.h @@ -1613,7 +1613,7 @@ allocated if necessary. * * \subsection subsec_dataset_filters Using HDF5 Filters * This section describes in detail how to use the n-bit, scale-offset filters and szip filters. For - * details on the how filters are used in the read / write of data, see #subsubsec_dataset_transfer_pipe. + * details on the how filters are used in the read / write of data, see /def subsubsec_dataset_transfer_pipe. * * \subsubsection subsubsec_dataset_filters_nbit Using the N‐bit Filter * N-bit data has n significant bits, where n may not correspond to a precise number of bytes. On diff --git a/src/H5PLmodule.h b/src/H5PLmodule.h index adbfd302a99..9f867ee5fff 100644 --- a/src/H5PLmodule.h +++ b/src/H5PLmodule.h @@ -181,7 +181,7 @@ * the checksum. /code size_t md5_filter(unsigned int flags, size_t cd_nelmts, const unsigned int cd_values[], * size_t nbytes, size_t *buf_size, void **buf) * { - * #ifdef HAVE_MD5 + * \c \#ifdef HAVE_MD5 * unsigned char cksum[16]; * * if (flags & H5Z_REVERSE) { @@ -207,9 +207,9 @@ * memcpy((char*)(*buf)+nbytes, cksum, 16); * return nbytes+16; * } - * #else + * \c \#else * return 0; // fail - * #endif + * \c \#endif * } * /endcode * @@ -219,7 +219,7 @@ * from the reserved range. We'll randomly choose 305. * * /code - * #define FILTER_MD5 305 + * \c \#define FILTER_MD5 305 * herr_t status = H5Zregister(FILTER_MD5, "md5 checksum", md5_filter); * /endcode *