diff --git a/docs/HTML/DateTime.html b/docs/HTML/DateTime.html index 0ffffee3..0cc9a795 100644 --- a/docs/HTML/DateTime.html +++ b/docs/HTML/DateTime.html @@ -91,6 +91,7 @@

Summary

Since DataFrame is a statistical library, it often deals with time-series data. So, it needs to keep track of time.
The most efficient way of indexing DataFrame by time is to use an index type of time_t for second precision or double or long long integer for more precision. DateTime class provides a more elaborate handling of time. Also, it is a general handy DateTime object. DateTime is a cool and handy object to manipulate date/time with nanosecond precision and multi timezone capability. It has a very simple and intuitive interface that allows you to break date/time to their components, reassemble date/time from their components, advance or pullback date/time with different granularities, and more.

+DateTime is one of the types that DataFrame library can read/write from/to files and serialization.


diff --git a/docs/HTML/VirtualString.html b/docs/HTML/VirtualString.html index b76573c9..99b5001a 100644 --- a/docs/HTML/VirtualString.html +++ b/docs/HTML/VirtualString.html @@ -61,8 +61,8 @@ FixedSizeString is a fixed-size and null-terminated string. Since the size is a template parameter, each different size is a different object type. But since FixedSizeString is derived from VirtualString (which is not a templated object), different size instances can be interchanged through references to VirtualString. VirtualString implements almost all of std::string functionalities.
-There is no dynamic memory allocation and deallocation involved here.
-FixedSizeString is the type of column names in DataFrame and could be used by users for their other purposes. +FixedSizeString does not do any dynamic memory allocation/deallocation.
+FixedSizeString "Convenient typedefs" are among the types that DataFrame library can read/write from/to files and serialization
diff --git a/docs/HTML/read.html b/docs/HTML/read.html index bc15983d..ac634d31 100644 --- a/docs/HTML/read.html +++ b/docs/HTML/read.html @@ -120,10 +120,16 @@ string -- std::string string -- const char * string -- char * +vstr32 -- Fixed-size string of 31 char length +vstr64 -- Fixed-size string of 63 char length +vstr128 -- Fixed-size string of 127 char length +vstr512 -- Fixed-size string of 511 char length +vstr1K -- Fixed-size string of 1023 char length +vstr2K -- Fixed-size string of 2047 char length bool -- bool DateTime -- DateTime data in format of - <Epoch seconds>.<nanoseconds> - (1516179600.874123908) + <Epoch seconds>.<nanoseconds> + (1516179600.874123908) In case of io_format::csv2, io_format::csv, and io_format::binary the following additional types are also supported:
diff --git a/docs/HTML/write.html b/docs/HTML/write.html
index 72b2d242..ab74b6bb 100644
--- a/docs/HTML/write.html
+++ b/docs/HTML/write.html
@@ -118,10 +118,16 @@
 string     -- std::string
 string     -- const char *
 string     -- char *
+vstr32     -- Fixed-size string of 31 char length
+vstr64     -- Fixed-size string of 63 char length
+vstr128    -- Fixed-size string of 127 char length
+vstr512    -- Fixed-size string of 511 char length
+vstr1K     -- Fixed-size string of 1023 char length
+vstr2K     -- Fixed-size string of 2047 char length
 bool       -- bool
 DateTime   -- DateTime data in format of
-    <Epoch seconds>.<nanoseconds>
-    (1516179600.874123908)
+              <Epoch seconds>.<nanoseconds>
+              (1516179600.874123908)
         
In case of io_format::csv2, io_format::csv, and io_format::binary the following additional types are also supported:
diff --git a/include/DataFrame/Internals/DataFrame_misc.tcc b/include/DataFrame/Internals/DataFrame_misc.tcc
index fd26c843..0167d199 100644
--- a/include/DataFrame/Internals/DataFrame_misc.tcc
+++ b/include/DataFrame/Internals/DataFrame_misc.tcc
@@ -244,7 +244,13 @@ DataFrame::print_binary_functor_::operator() (const T &vec)  {
     const long  local_start_row = std::min (long(vec.size()), start_row);
     const long  local_end_row = std::min (long(vec.size()), end_row);
 
-    if constexpr (std::is_same_v)
+    if constexpr (std::is_same_v ||
+                  std::is_same_v ||
+                  std::is_same_v ||
+                  std::is_same_v ||
+                  std::is_same_v ||
+                  std::is_same_v ||
+                  std::is_same_v)
         _write_binary_string_(os, vec, local_start_row, local_end_row);
     else if constexpr (std::is_same_v)
         _write_binary_datetime_(os, vec, local_start_row, local_end_row);
diff --git a/include/DataFrame/Internals/DataFrame_private_decl.h b/include/DataFrame/Internals/DataFrame_private_decl.h
index 773174d4..fc2994af 100644
--- a/include/DataFrame/Internals/DataFrame_private_decl.h
+++ b/include/DataFrame/Internals/DataFrame_private_decl.h
@@ -1373,8 +1373,9 @@ struct  ColVectorPushBack_, Dummy>  {
 
 // ----------------------------------------------------------------------------
 
+template
 inline static void
-json_str_col_vector_push_back_(StlVecType &vec,
+json_str_col_vector_push_back_(StlVecType &vec,
                                std::istream &file)  {
 
     char    value[2048];
diff --git a/include/DataFrame/Internals/DataFrame_read.tcc b/include/DataFrame/Internals/DataFrame_read.tcc
index 3f5231ed..2348648d 100644
--- a/include/DataFrame/Internals/DataFrame_read.tcc
+++ b/include/DataFrame/Internals/DataFrame_read.tcc
@@ -301,6 +301,48 @@ void DataFrame::read_json_(std::istream &stream, bool columns_only)  {
                 vec.reserve(col_size);
                 json_str_col_vector_push_back_(vec, stream);
             }
+            else if (col_type == "vstr32")  {
+                StlVecType    &vec =
+                    create_column(col_name.c_str(), false);
+
+                vec.reserve(col_size);
+                json_str_col_vector_push_back_(vec, stream);
+            }
+            else if (col_type == "vstr64")  {
+                StlVecType    &vec =
+                    create_column(col_name.c_str(), false);
+
+                vec.reserve(col_size);
+                json_str_col_vector_push_back_(vec, stream);
+            }
+            else if (col_type == "vstr128")  {
+                StlVecType   &vec =
+                    create_column(col_name.c_str(), false);
+
+                vec.reserve(col_size);
+                json_str_col_vector_push_back_(vec, stream);
+            }
+            else if (col_type == "vstr512")  {
+                StlVecType   &vec =
+                    create_column(col_name.c_str(), false);
+
+                vec.reserve(col_size);
+                json_str_col_vector_push_back_(vec, stream);
+            }
+            else if (col_type == "vstr1K")  {
+                StlVecType    &vec =
+                    create_column(col_name.c_str(), false);
+
+                vec.reserve(col_size);
+                json_str_col_vector_push_back_(vec, stream);
+            }
+            else if (col_type == "vstr2K")  {
+                StlVecType    &vec =
+                    create_column(col_name.c_str(), false);
+
+                vec.reserve(col_size);
+                json_str_col_vector_push_back_(vec, stream);
+            }
             else if (col_type == "DateTime")  {
                 StlVecType    &vec =
                     create_column(col_name.c_str(), false);
@@ -500,9 +542,9 @@ void DataFrame::read_csv_(std::istream &stream, bool columns_only)  {
                 col_vector_push_back_func_(vec, stream, &::strtoull);
             }
             else if (type_str == "string")  {
-                StlVecType                     &vec =
+                StlVecType &vec =
                     create_column(col_name.c_str(), false);
-                auto                                        converter =
+                auto                    converter =
                     [](const char *s, char **)-> const char * { return s; };
                 const ColVectorPushBack_
                     > slug;
@@ -510,6 +552,72 @@ void DataFrame::read_csv_(std::istream &stream, bool columns_only)  {
                 vec.reserve(::atoi(value.c_str()));
                 slug(vec, stream, converter);
             }
+            else if (type_str == "vstr32")  {
+                StlVecType    &vec =
+                    create_column(col_name.c_str(), false);
+                auto                    converter =
+                    [](const char *s, char **)-> const char * { return s; };
+                const ColVectorPushBack_
+                    > slug;
+
+                vec.reserve(::atoi(value.c_str()));
+                slug(vec, stream, converter);
+            }
+            else if (type_str == "vstr64")  {
+                StlVecType    &vec =
+                    create_column(col_name.c_str(), false);
+                auto                    converter =
+                    [](const char *s, char **)-> const char * { return s; };
+                const ColVectorPushBack_
+                    > slug;
+
+                vec.reserve(::atoi(value.c_str()));
+                slug(vec, stream, converter);
+            }
+            else if (type_str == "vstr128")  {
+                StlVecType   &vec =
+                    create_column(col_name.c_str(), false);
+                auto                    converter =
+                    [](const char *s, char **)-> const char * { return s; };
+                const ColVectorPushBack_
+                    > slug;
+
+                vec.reserve(::atoi(value.c_str()));
+                slug(vec, stream, converter);
+            }
+            else if (type_str == "vstr512")  {
+                StlVecType   &vec =
+                    create_column(col_name.c_str(), false);
+                auto                    converter =
+                    [](const char *s, char **)-> const char * { return s; };
+                const ColVectorPushBack_
+                    > slug;
+
+                vec.reserve(::atoi(value.c_str()));
+                slug(vec, stream, converter);
+            }
+            else if (type_str == "vstr1K")  {
+                StlVecType    &vec =
+                    create_column(col_name.c_str(), false);
+                auto                    converter =
+                    [](const char *s, char **)-> const char * { return s; };
+                const ColVectorPushBack_
+                    > slug;
+
+                vec.reserve(::atoi(value.c_str()));
+                slug(vec, stream, converter);
+            }
+            else if (type_str == "vstr2K")  {
+                StlVecType    &vec =
+                    create_column(col_name.c_str(), false);
+                auto                    converter =
+                    [](const char *s, char **)-> const char * { return s; };
+                const ColVectorPushBack_
+                    > slug;
+
+                vec.reserve(::atoi(value.c_str()));
+                slug(vec, stream, converter);
+            }
             else if (type_str == "DateTime")  {
                 StlVecType    &vec =
                     create_column(col_name.c_str(), false);
@@ -776,6 +884,37 @@ read_csv2_(std::FILE *stream,
                                           type_str.c_str(),
                                           col_name.c_str(),
                                           nrows);
+                else if (type_str == "vstr32")
+                    spec_vec.emplace_back(StlVecType(),
+                                          type_str.c_str(),
+                                          col_name.c_str(),
+                                          nrows);
+                else if (type_str == "vstr64")
+                    spec_vec.emplace_back(StlVecType(),
+                                          type_str.c_str(),
+                                          col_name.c_str(),
+                                          nrows);
+                else if (type_str == "vstr128")
+                    spec_vec.emplace_back(StlVecType(),
+                                          type_str.c_str(),
+                                          col_name.c_str(),
+                                          nrows);
+                else if (type_str == "vstr512")
+                    spec_vec.emplace_back(StlVecType(),
+                                          type_str.c_str(),
+                                          col_name.c_str(),
+                                          nrows);
+                else if (type_str == "vstr1K")
+                    spec_vec.emplace_back(StlVecType(),
+                                          type_str.c_str(),
+                                          col_name.c_str(),
+                                          nrows);
+                else if (type_str == "vstr2K")
+                    spec_vec.emplace_back(StlVecType(),
+                                          type_str.c_str(),
+                                          col_name.c_str(),
+                                          nrows);
+
                 // This includes DateTime, DateTimeAME, DateTimeEUR,
                 // DateTimeISO
                 //
@@ -1017,6 +1156,30 @@ read_csv2_(std::FILE *stream,
                     std::any_cast &>
                         (col_spec.col_vec).emplace_back(value);
                 }
+                else if (col_spec.type_spec == "vstr32")  {
+                    std::any_cast &>
+                        (col_spec.col_vec).emplace_back(value.c_str());
+                }
+                else if (col_spec.type_spec == "vstr64")  {
+                    std::any_cast &>
+                        (col_spec.col_vec).emplace_back(value.c_str());
+                }
+                else if (col_spec.type_spec == "vstr128")  {
+                    std::any_cast &>
+                        (col_spec.col_vec).emplace_back(value.c_str());
+                }
+                else if (col_spec.type_spec == "vstr512")  {
+                    std::any_cast &>
+                        (col_spec.col_vec).emplace_back(value.c_str());
+                }
+                else if (col_spec.type_spec == "vstr1K")  {
+                    std::any_cast &>
+                        (col_spec.col_vec).emplace_back(value.c_str());
+                }
+                else if (col_spec.type_spec == "vstr2K")  {
+                    std::any_cast &>
+                        (col_spec.col_vec).emplace_back(value.c_str());
+                }
                 else if (col_spec.type_spec == "DateTime")  {
                     if (! value.empty()) [[likely]]  {
                         time_t      t;
@@ -1307,6 +1470,42 @@ read_csv2_(std::FILE *stream,
                     std::move(std::any_cast &>
                         (col_spec.col_vec)),
                     nan_policy::dont_pad_with_nans);
+            else if (col_spec.type_spec == "vstr32")
+                load_column(
+                    col_spec.col_name.c_str(),
+                    std::move(std::any_cast &>
+                        (col_spec.col_vec)),
+                    nan_policy::dont_pad_with_nans);
+            else if (col_spec.type_spec == "vstr64")
+                load_column(
+                    col_spec.col_name.c_str(),
+                    std::move(std::any_cast &>
+                        (col_spec.col_vec)),
+                    nan_policy::dont_pad_with_nans);
+            else if (col_spec.type_spec == "vstr128")
+                load_column(
+                    col_spec.col_name.c_str(),
+                    std::move(std::any_cast &>
+                        (col_spec.col_vec)),
+                    nan_policy::dont_pad_with_nans);
+            else if (col_spec.type_spec == "vstr512")
+                load_column(
+                    col_spec.col_name.c_str(),
+                    std::move(std::any_cast &>
+                        (col_spec.col_vec)),
+                    nan_policy::dont_pad_with_nans);
+            else if (col_spec.type_spec == "vstr1K")
+                load_column(
+                    col_spec.col_name.c_str(),
+                    std::move(std::any_cast &>
+                        (col_spec.col_vec)),
+                    nan_policy::dont_pad_with_nans);
+            else if (col_spec.type_spec == "vstr2K")
+                load_column(
+                    col_spec.col_name.c_str(),
+                    std::move(std::any_cast &>
+                        (col_spec.col_vec)),
+                    nan_policy::dont_pad_with_nans);
             else if (! ::strncmp(col_spec.type_spec.c_str(), "DateTime", 8))
                 load_column(
                     col_spec.col_name.c_str(),
@@ -1442,8 +1641,26 @@ read_binary_(std::istream &stream,
         IndexVecType    idx_vec;
 
         if constexpr (std::is_same_v)
-            _read_binary_string_(stream, idx_vec, needs_flipping,
-                                 starting_row, num_rows);
+            _read_binary_string_(stream, idx_vec, needs_flipping,
+                                              starting_row, num_rows);
+        else if constexpr (std::is_same_v)
+            _read_binary_string_(stream, idx_vec, needs_flipping,
+                                           starting_row, num_rows);
+        else if constexpr (std::is_same_v)
+            _read_binary_string_(stream, idx_vec, needs_flipping,
+                                           starting_row, num_rows);
+        else if constexpr (std::is_same_v)
+            _read_binary_string_(stream, idx_vec, needs_flipping,
+                                            starting_row, num_rows);
+        else if constexpr (std::is_same_v)
+            _read_binary_string_(stream, idx_vec, needs_flipping,
+                                            starting_row, num_rows);
+        else if constexpr (std::is_same_v)
+            _read_binary_string_(stream, idx_vec, needs_flipping,
+                                           starting_row, num_rows);
+        else if constexpr (std::is_same_v)
+            _read_binary_string_(stream, idx_vec, needs_flipping,
+                                           starting_row, num_rows);
         else if constexpr (std::is_same_v)
             _read_binary_datetime_(stream, idx_vec, needs_flipping,
                                    starting_row, num_rows);
@@ -1460,8 +1677,56 @@ read_binary_(std::istream &stream,
         if (! std::strcmp(col_type, "string"))  {
             ColumnVecType  vec;
 
-            _read_binary_string_(stream, vec, needs_flipping,
-                                 starting_row, num_rows);
+            _read_binary_string_(stream, vec, needs_flipping,
+                                              starting_row, num_rows);
+            load_column(col_name, std::move(vec),
+                        nan_policy::dont_pad_with_nans);
+        }
+        else if (! std::strcmp(col_type, "vstr32"))  {
+            ColumnVecType vec;
+
+            _read_binary_string_(stream, vec, needs_flipping,
+                                           starting_row, num_rows);
+            load_column(col_name, std::move(vec),
+                        nan_policy::dont_pad_with_nans);
+        }
+        else if (! std::strcmp(col_type, "vstr64"))  {
+            ColumnVecType vec;
+
+            _read_binary_string_(stream, vec, needs_flipping,
+                                           starting_row, num_rows);
+            load_column(col_name, std::move(vec),
+                        nan_policy::dont_pad_with_nans);
+        }
+        else if (! std::strcmp(col_type, "vstr128"))  {
+            ColumnVecType    vec;
+
+            _read_binary_string_(stream, vec, needs_flipping,
+                                            starting_row, num_rows);
+            load_column(col_name, std::move(vec),
+                        nan_policy::dont_pad_with_nans);
+        }
+        else if (! std::strcmp(col_type, "vstr512"))  {
+            ColumnVecType    vec;
+
+            _read_binary_string_(stream, vec, needs_flipping,
+                                            starting_row, num_rows);
+            load_column(col_name, std::move(vec),
+                        nan_policy::dont_pad_with_nans);
+        }
+        else if (! std::strcmp(col_type, "vstr1K"))  {
+            ColumnVecType vec;
+
+            _read_binary_string_(stream, vec, needs_flipping,
+                                           starting_row, num_rows);
+            load_column(col_name, std::move(vec),
+                        nan_policy::dont_pad_with_nans);
+        }
+        else if (! std::strcmp(col_type, "vstr2K"))  {
+            ColumnVecType vec;
+
+            _read_binary_string_(stream, vec, needs_flipping,
+                                           starting_row, num_rows);
             load_column(col_name, std::move(vec),
                         nan_policy::dont_pad_with_nans);
         }
diff --git a/include/DataFrame/Internals/DataFrame_standalone.tcc b/include/DataFrame/Internals/DataFrame_standalone.tcc
index 596f7ff9..b35fa1aa 100644
--- a/include/DataFrame/Internals/DataFrame_standalone.tcc
+++ b/include/DataFrame/Internals/DataFrame_standalone.tcc
@@ -31,6 +31,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -80,7 +81,8 @@ std::unordered_map<_TypeInfoRef_,
                    _TypeinfoHasher_,
                    _TypeinfoEqualTo_>
 _typeinfo_name_  {
-    // Fundamentals
+
+    // Numerics
     //
     { typeid(float), "float" },
     { typeid(double), "double" },
@@ -95,11 +97,22 @@ _typeinfo_name_  {
     { typeid(unsigned long long int), "ulonglong" },
     { typeid(char), "char" },
     { typeid(unsigned char), "uchar" },
+    { typeid(bool), "bool" },
+
+    // Strings
+    //
     { typeid(std::string), "string" },
     { typeid(const char *), "string" },
     { typeid(char *), "string" },
-    { typeid(bool), "bool" },
-
+    { typeid(String32), "vstr32" },
+    { typeid(String64), "vstr64" },
+    { typeid(String128), "vstr128" },
+    { typeid(String512), "vstr512" },
+    { typeid(String1K), "vstr1K" },
+    { typeid(String2K), "vstr2K" },
+
+    // DateTime
+    //
     { typeid(DateTime), "DateTime" },
 
     // Pairs
@@ -1370,7 +1383,7 @@ _read_binary_common_(STRM &strm, bool needs_flipping, std::size_t start_row)  {
 }
 // ----------------------------------------------------------------------------
 
-template
+template
 inline static STRM &
 _read_binary_string_(STRM &strm, V &str_vec, bool needs_flipping,
                      std::size_t start_row, std::size_t num_rows)  {
@@ -1399,7 +1412,7 @@ _read_binary_string_(STRM &strm, V &str_vec, bool needs_flipping,
                         ? vec_size - start_row : read_end - start_row);
     for (uint64_t i = 0; i < vec_size; ++i)  {
         if (i >= start_row && i < read_end) [[likely]]  {
-            std::string str (std::size_t(sizes[i]), 0);
+            STR_T   str (std::size_t(sizes[i]), 0);
 
             strm.read(str.data(), sizes[i] * sizeof(char));
             str_vec.emplace_back(std::move(str));
@@ -1563,7 +1576,7 @@ _read_binary_str_str_pair_(STRM &strm, V &p_vec, bool needs_flipping,
         (num_rows == std::numeric_limits::max() ||
          (start_row + num_rows) > vec_size)
             ? vec_size : uint64_t(start_row + num_rows);
-	std::size_t     sizes_idx { 0 };
+    std::size_t     sizes_idx { 0 };
 
     p_vec.reserve(read_end - start_row);
     for (uint64_t i = 0; i < vec_size; ++i, sizes_idx += 2)  {
@@ -1687,8 +1700,9 @@ _read_binary_str_vec_(STRM &strm, V &vec, bool needs_flipping,
         strm.seekg(32 * sizeof(char), std::ios_base::cur);
 
         str_vec.clear();
-        _read_binary_string_(strm, str_vec, needs_flipping,
-                             0, std::numeric_limits::max());
+        _read_binary_string_(
+            strm, str_vec, needs_flipping, 0,
+            std::numeric_limits::max());
         if (i >= start_row && i < read_end) [[likely]]
             vec.push_back(std::move(str_vec));
     }
diff --git a/include/DataFrame/Utils/FixedSizeString.h b/include/DataFrame/Utils/FixedSizeString.h
index 98814234..83023fb4 100644
--- a/include/DataFrame/Utils/FixedSizeString.h
+++ b/include/DataFrame/Utils/FixedSizeString.h
@@ -353,6 +353,16 @@ class   FixedSizeString : public VirtualString  {
     inline FixedSizeString (const VirtualString &that) noexcept
         : VirtualString (buffer_)  { *this = that; }
 
+    // This is a constructor with the same signature as std::string
+    // but here the size is ignored
+    //
+    inline FixedSizeString (size_type , value_type v) noexcept
+        : VirtualString (buffer_)  {
+
+        std::memset(buffer_, v, S);
+        buffer_[S] = 0;
+    }
+
     // Assignment methods which cannot be inherited or virtual.
     //
     inline FixedSizeString &operator = (const FixedSizeString &rhs) noexcept  {