Skip to content

Latest commit

 

History

History
271 lines (222 loc) · 7.32 KB

fsm.org

File metadata and controls

271 lines (222 loc) · 7.32 KB

Tracking HDF5 Free Space

Introduction

The Story

Without FSM

#include "literate-hdf5.h"

int main(int argc, char** argv)
{
  hid_t file = (*
                <<make-file>> ) ("no_fsm.h5"); // (ref:no-fsm-blk0)

  H5Dclose((*
            <<create-1Mi-dataset>> ) (file, "1Mi-double", H5T_IEEE_F64LE));
  H5Dclose((*
            <<create-1Mi-dataset>> ) (file, "1Mi-float", H5T_IEEE_F32LE));

  H5Ldelete(file, "1Mi-double", H5P_DEFAULT);
  H5Fclose(file);

  file = H5Fopen("no_fsm.h5", H5F_ACC_RDWR, H5P_DEFAULT);
  H5Dclose((*
            <<create-1Mi-dataset>> ) (file, "1Mi-uint64", H5T_STD_U64LE));
  H5Fclose(file);
}
h5dump -BH no_fsm.h5
HDF5 "no_fsm.h5" {
SUPER_BLOCK {
   SUPERBLOCK_VERSION 3
   FREELIST_VERSION 0
   SYMBOLTABLE_VERSION 0
   OBJECTHEADER_VERSION 0
   OFFSET_SIZE 8
   LENGTH_SIZE 8
   BTREE_RANK 16
   BTREE_LEAF 4
   ISTORE_K 32
   FILE_SPACE_STRATEGY H5F_FSPACE_STRATEGY_FSM_AGGR
   FREE_SPACE_PERSIST FALSE
   FREE_SPACE_SECTION_THRESHOLD 1
   FILE_SPACE_PAGE_SIZE 4096
   USER_BLOCK {
      USERBLOCK_SIZE 0
   }
}
GROUP "/" {
   DATASET "1Mi-float" {
      DATATYPE  H5T_IEEE_F32LE
      DATASPACE  SIMPLE { ( 1024, 1024 ) / ( 1024, 1024 ) }
   }
   DATASET "1Mi-uint64" {
      DATATYPE  H5T_STD_U64LE
      DATASPACE  SIMPLE { ( 1024, 1024 ) / ( 1024, 1024 ) }
   }
}
}

With FSM

#include "literate-hdf5.h"

int main(int argc, char** argv)
{
  hid_t file = (*
                <<make-fsm-file>> ) ("fsm.h5", 1048576, 4096); // (ref:fsm-blk0)
  H5Dclose((*
             <<create-1Mi-dataset>> ) (file, "1Mi-double", H5T_IEEE_F64LE));
  H5Dclose((*
             <<create-1Mi-dataset>> ) (file, "1Mi-float", H5T_IEEE_F32LE));

  H5Ldelete(file, "1Mi-double", H5P_DEFAULT);
  H5Fclose(file);

  file = H5Fopen("fsm.h5", H5F_ACC_RDWR, H5P_DEFAULT);
  H5Dclose((*
            <<create-1Mi-dataset>> ) (file, "1Mi-uint64", H5T_STD_U64LE));
  H5Fclose(file);
}
h5dump -BH fsm.h5
HDF5 "fsm.h5" {
SUPER_BLOCK {
   SUPERBLOCK_VERSION 3
   FREELIST_VERSION 0
   SYMBOLTABLE_VERSION 0
   OBJECTHEADER_VERSION 0
   OFFSET_SIZE 8
   LENGTH_SIZE 8
   BTREE_RANK 16
   BTREE_LEAF 4
   ISTORE_K 32
   FILE_SPACE_STRATEGY H5F_FSPACE_STRATEGY_PAGE
   FREE_SPACE_PERSIST TRUE
   FREE_SPACE_SECTION_THRESHOLD 1048576
   FILE_SPACE_PAGE_SIZE 4096
   USER_BLOCK {
      USERBLOCK_SIZE 0
   }
}
GROUP "/" {
   DATASET "1Mi-float" {
      DATATYPE  H5T_IEEE_F32LE
      DATASPACE  SIMPLE { ( 1024, 1024 ) / ( 1024, 1024 ) }
   }
   DATASET "1Mi-uint64" {
      DATATYPE  H5T_STD_U64LE
      DATASPACE  SIMPLE { ( 1024, 1024 ) / ( 1024, 1024 ) }
   }
}
}

The difference

ls -lh *fsm.h5
-rw------- 1 gerdheber gerdheber 13M Jan  8 15:54 fsm.h5
-rw------- 1 gerdheber gerdheber 21M Jan  8 15:55 no_fsm.h5
import difflib, sys
sys.stdout.writelines(difflib.context_diff(out1.split(), out2.split()))
***
---
***************
*** 1,5 ****
  HDF5! "no_fsm.h5"  {  SUPER_BLOCK  {--- 1,5 ----
  HDF5! "fsm.h5"  {  SUPER_BLOCK  {***************
*** 22,32 ****
  ISTORE_K  32  FILE_SPACE_STRATEGY! H5F_FSPACE_STRATEGY_FSM_AGGR  FREE_SPACE_PERSIST! FALSE  FREE_SPACE_SECTION_THRESHOLD! 1  FILE_SPACE_PAGE_SIZE  4096  USER_BLOCK--- 22,32 ----
  ISTORE_K  32  FILE_SPACE_STRATEGY! H5F_FSPACE_STRATEGY_PAGE  FREE_SPACE_PERSIST! TRUE  FREE_SPACE_SECTION_THRESHOLD! 1048576  FILE_SPACE_PAGE_SIZE  4096  USER_BLOCK

Reclaiming the gap with h5repack

h5repack -v no_fsm.h5 no_fsm_repacked.h5
0
ls -lh no_fsm*.h5
-rw------- 1 gerdheber gerdheber 21M Jan  8 15:55 no_fsm.h5
-rw------- 1 gerdheber gerdheber 13M Jan  8 15:59 no_fsm_repacked.h5

Building Blocks

File creation without free-space tracking

The tracking of free space in HDF5 files HDF5 1.10+. Although we do not enable free-space tracking in this building block, we configure a file access property list on line (make-file-blk0) to create a “vanilla” HDF5 file compatible with the 1.10 series of HDF5 libraries.

lambda(hid_t, (const char* name),
       {
         hid_t result;
         hid_t fapl = H5Pcreate(H5P_FILE_ACCESS);
         H5Pset_libver_bounds(fapl, H5F_LIBVER_V110, H5F_LIBVER_LATEST); // (ref:make-file-blk0)

         result = H5Fcreate(name, H5F_ACC_TRUNC, H5P_DEFAULT, fapl);
         H5Pclose(fapl);

         return result;
       })

File creation with free-space tracking enabled

The tracking of free space in HDF5 files beyond H5Fclose is available only in HDF5 1.10+. We configure a file access property list on line (make-fsm-blk1), accordingly.

We enable free-space tracking by setting

  1. An appropriate file space strategy (see line (make-fsm-blk0))
  2. A threshold (in bytes) for free-space sections to be tracked (see line (make-fsm-blk1))
lambda(hid_t, (const char* name, hsize_t threshold, hsize_t page_size),
       {
         hid_t result;
         hid_t fcpl = H5Pcreate(H5P_FILE_CREATE);
         hid_t fapl = H5Pcreate(H5P_FILE_ACCESS);
#if H5_VERSION_GE(1, 10, 0) // (ref:vers-chk0)
         H5Pset_file_space_page_size (fcpl, page_size); // (ref:page-size)
         H5Pset_file_space_strategy(fcpl, H5F_FSPACE_STRATEGY_PAGE, 1, // (ref:make-fsm-blk0)
                                    threshold);
         H5Pset_libver_bounds(fapl, H5F_LIBVER_V110, H5F_LIBVER_LATEST); // (ref:make-fsm-blk1)
#elif H5_VERSION_GE(1, 8, 0) // (ref:vers-chk1)
         H5Pset_libver_bounds(fapl, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST);
#endif
         result = H5Fcreate(name, H5F_ACC_TRUNC, fcpl, fapl);
         H5Pclose(fapl);
         H5Pclose(fcpl);

         return result;
       })

Create a 1 million element dataset

lambda(hid_t, (hid_t file, const char* name, hid_t dtype),
       {
         hid_t result;
         hid_t fspace = H5Screate_simple(2, (hsize_t[]) {1024, 1024}, NULL);
         hid_t lcpl = H5Pcreate(H5P_LINK_CREATE);
         hid_t dcpl = H5Pcreate(H5P_DATASET_CREATE);

         H5Pset_create_intermediate_group(lcpl, 1);
         H5Pset_alloc_time(dcpl, H5D_ALLOC_TIME_EARLY);

         result = H5Dcreate(file, name, dtype, fspace, lcpl, dcpl,
                            H5P_DEFAULT);
         H5Pclose(dcpl);
         H5Pclose(lcpl);
         H5Sclose(fspace);

         return result;
       })