From 3e8a30f62f046880310aa8127f7e2a63c6e7c649 Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Mon, 4 Nov 2024 20:54:47 +0000 Subject: [PATCH 1/6] Update dremel example --- cpp/include/cudf/lists/detail/dremel.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/lists/detail/dremel.hpp b/cpp/include/cudf/lists/detail/dremel.hpp index 96ee30dd261..152d60464de 100644 --- a/cpp/include/cudf/lists/detail/dremel.hpp +++ b/cpp/include/cudf/lists/detail/dremel.hpp @@ -74,7 +74,7 @@ struct dremel_data { * * http://www.goldsborough.me/distributed-systems/2019/05/18/21-09-00-a_look_at_dremel/ * https://akshays-blog.medium.com/wrapping-head-around-repetition-and-definition-levels-in-dremel-powering-bigquery-c1a33c9695da - * https://blog.twitter.com/engineering/en_us/a/2013/dremel-made-simple-with-parquet + * https://blog.x.com/engineering/en_us/a/2013/dremel-made-simple-with-parquet * * The remainder of this documentation assumes familiarity with the Dremel concepts. * @@ -102,7 +102,7 @@ struct dremel_data { * ``` * We can represent it in cudf format with two level of offsets like this: * ``` - * Level 0 offsets = {0, 0, 3, 5, 6} + * Level 0 offsets = {0, 0, 3, 4} * Level 1 offsets = {0, 0, 3, 5, 5} * Values = {1, 2, 3, 4, 5} * ``` From 205a3221da91bb6ab61bdb9d808b03d7f101825b Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Mon, 4 Nov 2024 20:08:12 -0800 Subject: [PATCH 2/6] Minor fix --- cpp/include/cudf/lists/detail/dremel.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/lists/detail/dremel.hpp b/cpp/include/cudf/lists/detail/dremel.hpp index 152d60464de..36c95e5d10f 100644 --- a/cpp/include/cudf/lists/detail/dremel.hpp +++ b/cpp/include/cudf/lists/detail/dremel.hpp @@ -111,7 +111,7 @@ struct dremel_data { * ``` * col = {[], [[], [1, 2, 3], [4, 5]], [[]]} * def = { 0 1, 2, 2, 2, 2, 2, 1 } - * rep = { 0, 0, 0, 2, 2, 1, 2, 0 } + * rep = { 0, 0, 1, 2, 2, 1, 2, 0 } * ``` * * Since repetition and definition levels arrays contain a value for each empty list, the size of From 4ec228ee1cc904d400bae11582594c4fb39f547d Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Tue, 5 Nov 2024 10:36:59 -0800 Subject: [PATCH 3/6] Add dremel offsets to the example --- cpp/include/cudf/lists/detail/dremel.hpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/cpp/include/cudf/lists/detail/dremel.hpp b/cpp/include/cudf/lists/detail/dremel.hpp index 36c95e5d10f..b1ea2919b26 100644 --- a/cpp/include/cudf/lists/detail/dremel.hpp +++ b/cpp/include/cudf/lists/detail/dremel.hpp @@ -58,7 +58,7 @@ struct dremel_data { }; /** - * @brief Get the dremel offsets and repetition and definition levels for a LIST column + * @brief Get the dremel offsets, and repetition and definition levels for a LIST column * * Dremel is a query system created by Google for ad hoc data analysis. The Dremel engine is * described in depth in the paper "Dremel: Interactive Analysis of Web-Scale @@ -106,12 +106,13 @@ struct dremel_data { * Level 1 offsets = {0, 0, 3, 5, 5} * Values = {1, 2, 3, 4, 5} * ``` - * The desired result of this function is the repetition and definition level values that - * correspond to the data values: + * The desired result of this function is the dremel offsets, repetition and definition level values + * that correspond to the data values: * ``` - * col = {[], [[], [1, 2, 3], [4, 5]], [[]]} - * def = { 0 1, 2, 2, 2, 2, 2, 1 } - * rep = { 0, 0, 1, 2, 2, 1, 2, 0 } + * col = {[], [[], [1, 2, 3], [4, 5]], [[]]} + * dremel_offsets = { 0, 1, 7, 8} + * def_levels = { 0 1, 2, 2, 2, 2, 2, 1 } + * rep_levels = { 0, 0, 1, 2, 2, 1, 2, 0 } * ``` * * Since repetition and definition levels arrays contain a value for each empty list, the size of From ea2e8f5de6eee5b4942b9e6160996a385e8a6697 Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Tue, 5 Nov 2024 10:37:41 -0800 Subject: [PATCH 4/6] minor updates --- cpp/include/cudf/lists/detail/dremel.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/lists/detail/dremel.hpp b/cpp/include/cudf/lists/detail/dremel.hpp index b1ea2919b26..e09c86f92e0 100644 --- a/cpp/include/cudf/lists/detail/dremel.hpp +++ b/cpp/include/cudf/lists/detail/dremel.hpp @@ -106,8 +106,8 @@ struct dremel_data { * Level 1 offsets = {0, 0, 3, 5, 5} * Values = {1, 2, 3, 4, 5} * ``` - * The desired result of this function is the dremel offsets, repetition and definition level values - * that correspond to the data values: + * The desired result of this function is the dremel offsets, and repetition and definition level + * values that correspond to the data values: * ``` * col = {[], [[], [1, 2, 3], [4, 5]], [[]]} * dremel_offsets = { 0, 1, 7, 8} From 06d61f2cfba76ec7c52af7e9c8f724141e1019ca Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Tue, 5 Nov 2024 16:20:56 -0800 Subject: [PATCH 5/6] Apply suggestions from code review Co-authored-by: Vukasin Milovanovic --- cpp/include/cudf/lists/detail/dremel.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/lists/detail/dremel.hpp b/cpp/include/cudf/lists/detail/dremel.hpp index e09c86f92e0..397395f4ba5 100644 --- a/cpp/include/cudf/lists/detail/dremel.hpp +++ b/cpp/include/cudf/lists/detail/dremel.hpp @@ -106,12 +106,12 @@ struct dremel_data { * Level 1 offsets = {0, 0, 3, 5, 5} * Values = {1, 2, 3, 4, 5} * ``` - * The desired result of this function is the dremel offsets, and repetition and definition level + * This function returns the dremel offsets, and repetition and definition level * values that correspond to the data values: * ``` * col = {[], [[], [1, 2, 3], [4, 5]], [[]]} * dremel_offsets = { 0, 1, 7, 8} - * def_levels = { 0 1, 2, 2, 2, 2, 2, 1 } + * def_levels = { 0, 1, 2, 2, 2, 2, 2, 1 } * rep_levels = { 0, 0, 1, 2, 2, 1, 2, 0 } * ``` * From 1104a327b01598e350053e3cd93fd017bb3b9289 Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Wed, 6 Nov 2024 12:59:33 -0800 Subject: [PATCH 6/6] Doc updates --- cpp/include/cudf/lists/detail/dremel.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/lists/detail/dremel.hpp b/cpp/include/cudf/lists/detail/dremel.hpp index 397395f4ba5..f45da8e8d8d 100644 --- a/cpp/include/cudf/lists/detail/dremel.hpp +++ b/cpp/include/cudf/lists/detail/dremel.hpp @@ -58,7 +58,7 @@ struct dremel_data { }; /** - * @brief Get the dremel offsets, and repetition and definition levels for a LIST column + * @brief Get the dremel offsets, repetition levels, and definition levels for a LIST column * * Dremel is a query system created by Google for ad hoc data analysis. The Dremel engine is * described in depth in the paper "Dremel: Interactive Analysis of Web-Scale @@ -106,7 +106,7 @@ struct dremel_data { * Level 1 offsets = {0, 0, 3, 5, 5} * Values = {1, 2, 3, 4, 5} * ``` - * This function returns the dremel offsets, and repetition and definition level + * This function returns the dremel offsets, repetition levels, and definition level * values that correspond to the data values: * ``` * col = {[], [[], [1, 2, 3], [4, 5]], [[]]}