diff --git a/docs/HTML/CountVisitor.html b/docs/HTML/CountVisitor.html new file mode 100644 index 00000000..dd56616c --- /dev/null +++ b/docs/HTML/CountVisitor.html @@ -0,0 +1,153 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Signature Description Parameters
+
#include <DataFrame/DataFrameStatsVisitors.h>
+
+template<typename T, typename I = unsigned long>
+struct CountVisitor;
+        
+
+ This is a single action visitor, meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface.

+ This functor class counts the number of data points in the given column.
+ The result is a coutn number
+
+    explicit
+    CountVisitor(bool skipnan = false);
+        
+
+ T: Column data type.
+ I: Index type.
+
+
#include <DataFrame/DataFrameStatsVisitors.h>
+
+template<typename T, typename I = unsigned long,
+         std::size_t A = 0>
+struct CumCountVisitor;
+        
+
+ This is a single action visitor, meaning it is passed the whole data vector in one call and you must use the single_act_visit() interface.

+ This functor class cumulatively counts the number of non-NaN data points in the given column.
+ The result is a vector of running counts
+
+    CumCountVisitor();
+        
+
+ T: Column data type.
+ I: Index type.
+ A: Memory alignment boundary for vectors. Default is system default alignment
+
+ +
static void test_groupby_2()  {
+
+    std::cout << "\nTesting groupby_2( ) ..." << std::endl;
+
+    StlVecType<unsigned long>  ulgvec2 ={ 123450, 123451, 123452, 123450, 123455, 123450, 123449,123450, 123451, 123450, 123452, 123450, 123455, 123450,
+                                          123454, 123450, 123450, 123457, 123458, 123459, 123450, 123441, 123442, 123432, 123450, 123450, 123435, 123450 };
+    StlVecType<unsigned long>  xulgvec2 = ulgvec2;
+    StlVecType<int>            intvec2 = { 1, 2, 3, 4, 5, 3, 7, 3, 9, 10, 3, 2, 3, 14, 2, 2, 2, 3, 2, 3, 3, 3, 3, 3, 36, 2, 45, 2 };
+    StlVecType<double>         xdblvec2 = { 10, 20, 11, 11, 30, 40, 50, 40, 60, 70, 80, 90, 50, 100, 11, 25, 20, 30, 1, 3, 4, 12, 6, 2, 3, 10, 4, 5 };
+    StlVecType<double>         dblvec22 = { 0.998, 1.545, 0.056, 0.15678, 1.545, 0.923, 0.06743, 0.1, -1.545, 0.07865, -0.9999, 1.545, 0.1002, -0.8888,
+                                            0.14, 0.0456, -1.545, -0.8999, 0.01119, 0.8002, -1.545, 0.2, 0.1056, 0.87865, -0.6999, 1.545, 0.1902, -1.545 };
+    StlVecType<std::string>    strvec2 = { "A", "B", "C", "D", "X", "Y", "W", "P", "Z", "S", "M", "B", "A", "H", "X", "Q", "V", "P", "W", "K", "I", "L", "J", "N", "Y", "G", "T", "U" };
+    StlVecType<double>         dblvec33 = { 0.998, 1.545, 0.056, 0.15678, 1.545, std::sqrt(-1), 0.06743, 0.1, -1.545, std::sqrt(-1), -0.9999, 1.545, 0.1002, -0.8888,
+                                            0.14, 0.0456, -1.545, -0.8999, std::sqrt(-1), 0.8002, -1.545, 0.2, 0.1056, 0.87865, -0.6999, std::sqrt(-1), 0.1902, -1.545 };
+
+    MyDataFrame df;
+
+    df.load_data(std::move(ulgvec2),
+                 std::make_pair("xint_col", intvec2),
+                 std::make_pair("dbl_col", xdblvec2),
+                 std::make_pair("dbl_col_2", dblvec22),
+                 std::make_pair("dbl_col_3", dblvec33),
+                 std::make_pair("str_col", strvec2),
+                 std::make_pair("ul_col", xulgvec2));
+
+    auto    vw = df.get_view<double, int, unsigned long, std::string>({ "xint_col", "dbl_col", "dbl_col_2", "str_col", "ul_col" });
+    auto    result1 = df.groupby2<unsigned long, double>(DF_INDEX_COL_NAME,
+                                                         "dbl_col_2",
+                                                         LastVisitor<MyDataFrame::IndexType, MyDataFrame::IndexType>(),
+                                                         std::make_tuple("str_col",  "sum_str",  SumVisitor<std::string>()),
+                                                         std::make_tuple("xint_col",  "max_int", MaxVisitor<int>()),
+                                                         std::make_tuple("xint_col",  "min_int", MinVisitor<int>()),
+                                                         std::make_tuple("dbl_col_2", "cnt_dbl", CountVisitor<double>()),
+                                                         std::make_tuple("dbl_col",   "sum_dbl", SumVisitor<double>()));
+    auto    result1_from_vw = vw.groupby2<unsigned long, double>(DF_INDEX_COL_NAME,
+                                                                 "dbl_col_2",
+                                                                 LastVisitor<MyDataFrame::IndexType, MyDataFrame::IndexType>(),
+                                                                 std::make_tuple("str_col",  "sum_str",  SumVisitor<std::string>()),
+                                                                 std::make_tuple("xint_col",  "max_int", MaxVisitor<int>()),
+                                                                 std::make_tuple("xint_col",  "min_int", MinVisitor<int>()),
+                                                                 std::make_tuple("dbl_col_2", "cnt_dbl", CountVisitor<double>()),
+                                                                 std::make_tuple("dbl_col",   "sum_dbl", SumVisitor<double>()));
+
+    result1.write<std::ostream, std::string, double, std::size_t, int>(std::cout, io_format::csv2);
+    assert(result1.get_index()[4] == result1_from_vw.get_index()[4]);
+    assert((result1.get_column<int>("max_int")[8] == result1_from_vw.get_column<int>("max_int")[8]));
+}
+
+ +
C++ DataFrame + + + + + diff --git a/include/DataFrame/DataFrameStatsVisitors.h b/include/DataFrame/DataFrameStatsVisitors.h index da896019..004d4f10 100644 --- a/include/DataFrame/DataFrameStatsVisitors.h +++ b/include/DataFrame/DataFrameStatsVisitors.h @@ -2304,6 +2304,44 @@ struct CumSumVisitor { // ---------------------------------------------------------------------------- +template +struct CumCountVisitor { + + DEFINE_VISIT_BASIC_TYPES + + using result_type = + std::vector::type>; + + + template + inline void + operator() (const K &idx_begin, const K &idx_end, + const H &column_begin, const H &column_end) { + + GET_COL_SIZE + + size_type running_cnt { 0 }; + result_type result; + + result.reserve(col_s); + for (size_type i = 0; i < col_s; ++i) [[likely]] { + if (! is_nan__(*(column_begin + i))) [[unlikely]] + running_cnt += 1; + result.push_back(running_cnt); + } + result_.swap(result); + } + + DEFINE_PRE_POST + DEFINE_RESULT + +private: + + result_type result_ { }; +}; + +// ---------------------------------------------------------------------------- + template struct CumProdVisitor { diff --git a/test/dataframe_tester_2.cc b/test/dataframe_tester_2.cc index e3e179cd..6a42e52e 100644 --- a/test/dataframe_tester_2.cc +++ b/test/dataframe_tester_2.cc @@ -1236,6 +1236,11 @@ static void test_groupby_2() { { "A", "B", "C", "D", "X", "Y", "W", "P", "Z", "S", "M", "B", "A", "H", "X", "Q", "V", "P", "W", "K", "I", "L", "J", "N", "Y", "G", "T", "U" }; + StlVecType dblvec33 = + { 0.998, 1.545, 0.056, 0.15678, 1.545, std::sqrt(-1), 0.06743, + 0.1, -1.545, std::sqrt(-1), -0.9999, 1.545, 0.1002, -0.8888, + 0.14, 0.0456, -1.545, -0.8999, std::sqrt(-1), 0.8002, -1.545, + 0.2, 0.1056, 0.87865, -0.6999, std::sqrt(-1), 0.1902, -1.545 }; MyDataFrame df; @@ -1243,6 +1248,7 @@ static void test_groupby_2() { std::make_pair("xint_col", intvec2), std::make_pair("dbl_col", xdblvec2), std::make_pair("dbl_col_2", dblvec22), + std::make_pair("dbl_col_3", dblvec33), std::make_pair("str_col", strvec2), std::make_pair("ul_col", xulgvec2));