Skip to content

Commit

Permalink
Merge branch 'branch-22.12' into example-strings
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Oct 14, 2022
2 parents 83055cc + e91d7d9 commit 37585a9
Show file tree
Hide file tree
Showing 18 changed files with 425 additions and 155 deletions.
26 changes: 26 additions & 0 deletions cpp/cmake/thrust.patch
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,29 @@ index d0e3f94..76774b0 100644
/**
* Dispatch between 32-bit and 64-bit index based versions of the same algorithm
* implementation. This version allows using different token sequences for callables
diff --git a/thrust/iterator/transform_input_output_iterator.h b/thrust/iterator/transform_input_output_iterator.h
index f512a36..a5f725d 100644
--- a/thrust/iterator/transform_input_output_iterator.h
+++ b/thrust/iterator/transform_input_output_iterator.h
@@ -102,6 +102,8 @@ template <typename InputFunction, typename OutputFunction, typename Iterator>
/*! \endcond
*/

+ transform_input_output_iterator() = default;
+
/*! This constructor takes as argument a \c Iterator an \c InputFunction and an
* \c OutputFunction and copies them to a new \p transform_input_output_iterator
*
diff --git a/thrust/iterator/transform_output_iterator.h b/thrust/iterator/transform_output_iterator.h
index 66fb46a..4a68cb5 100644
--- a/thrust/iterator/transform_output_iterator.h
+++ b/thrust/iterator/transform_output_iterator.h
@@ -104,6 +104,8 @@ template <typename UnaryFunction, typename OutputIterator>
/*! \endcond
*/

+ transform_output_iterator() = default;
+
/*! This constructor takes as argument an \c OutputIterator and an \c
* UnaryFunction and copies them to a new \p transform_output_iterator
*
50 changes: 48 additions & 2 deletions cpp/include/cudf/sorting.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,31 @@ std::unique_ptr<column> rank(
/**
* @brief Returns sorted order after sorting each segment in the table.
*
* If segment_offsets contains values larger than number of rows, behavior is undefined.
* If segment_offsets contains values larger than the number of rows, the behavior is undefined.
* @throws cudf::logic_error if `segment_offsets` is not `size_type` column.
*
* @code{.pseudo}
* Example:
* keys = { {9, 8, 7, 6, 5, 4, 3, 2, 1, 0} }
* offsets = {0, 3, 7, 10}
* result = cudf::segmented_sorted_order(keys, offsets);
* result is { 2,1,0, 6,5,4,3, 9,8,7 }
* @endcode
*
* If segment_offsets is empty or contains a single index, no values are sorted
* and the result is a sequence of integers from 0 to keys.size()-1.
*
* The segment_offsets are not required to include all indices. Any indices
* outside the specified segments will not be sorted.
*
* @code{.pseudo}
* Example: (offsets do not cover all indices)
* keys = { {9, 8, 7, 6, 5, 4, 3, 2, 1, 0} }
* offsets = {3, 7}
* result = cudf::segmented_sorted_order(keys, offsets);
* result is { 0,1,2, 6,5,4,3, 7,8,9 }
* @endcode
*
* @param keys The table that determines the ordering of elements in each segment
* @param segment_offsets The column of `size_type` type containing start offset index for each
* contiguous segment.
Expand Down Expand Up @@ -246,10 +268,34 @@ std::unique_ptr<column> stable_segmented_sorted_order(
/**
* @brief Performs a lexicographic segmented sort of a table
*
* If segment_offsets contains values larger than number of rows, behavior is undefined.
* If segment_offsets contains values larger than the number of rows, the behavior is undefined.
* @throws cudf::logic_error if `values.num_rows() != keys.num_rows()`.
* @throws cudf::logic_error if `segment_offsets` is not `size_type` column.
*
* @code{.pseudo}
* Example:
* keys = { {9, 8, 7, 6, 5, 4, 3, 2, 1, 0} }
* values = { {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'} }
* offsets = {0, 3, 7, 10}
* result = cudf::segmented_sort_by_key(keys, values, offsets);
* result is { 'c','b','a', 'g','f','e','d', 'j','i','h' }
* @endcode
*
* If segment_offsets is empty or contains a single index, no values are sorted
* and the result is a copy of the values.
*
* The segment_offsets are not required to include all indices. Any indices
* outside the specified segments will not be sorted.
*
* @code{.pseudo}
* Example: (offsets do not cover all indices)
* keys = { {9, 8, 7, 6, 5, 4, 3, 2, 1, 0} }
* values = { {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'} }
* offsets = {3, 7}
* result = cudf::segmented_sort_by_key(keys, values, offsets);
* result is { 'a','b','c', 'g','f','e','d', 'h','i','j' }
* @endcode
*
* @param values The table to reorder
* @param keys The table that determines the ordering of elements in each segment
* @param segment_offsets The column of `size_type` type containing start offset index for each
Expand Down
21 changes: 13 additions & 8 deletions cpp/src/io/json/json_column.cu
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ void make_device_json_column(device_span<SymbolT const> input,
std::string name = "";
auto parent_col_id = column_parent_ids[this_col_id];
if (parent_col_id == parent_node_sentinel || column_categories[parent_col_id] == NC_LIST) {
name = "element";
name = list_child_name;
} else if (column_categories[parent_col_id] == NC_FN) {
auto field_name_col_id = parent_col_id;
parent_col_id = column_parent_ids[parent_col_id];
Expand Down Expand Up @@ -689,19 +689,24 @@ std::pair<std::unique_ptr<column>, std::vector<column_name_info>> device_json_co
size_type num_rows = json_col.child_offsets.size() - 1;
std::vector<column_name_info> column_names{};
column_names.emplace_back("offsets");
column_names.emplace_back(json_col.child_columns.begin()->first);
column_names.emplace_back(
json_col.child_columns.empty() ? list_child_name : json_col.child_columns.begin()->first);

// Note: json_col modified here, reuse the memory
auto offsets_column = std::make_unique<column>(
data_type{type_id::INT32}, num_rows + 1, json_col.child_offsets.release());
// Create children column
auto [child_column, names] =
device_json_column_to_cudf_column(json_col.child_columns.begin()->second,
d_input,
options,
get_child_schema(json_col.child_columns.begin()->first),
stream,
mr);
json_col.child_columns.empty()
? std::pair<std::unique_ptr<column>,
std::vector<column_name_info>>{std::make_unique<column>(), {}}
: device_json_column_to_cudf_column(
json_col.child_columns.begin()->second,
d_input,
options,
get_child_schema(json_col.child_columns.begin()->first),
stream,
mr);
column_names.back().children = names;
auto [result_bitmask, null_count] = make_validity(json_col);
return {make_lists_column(num_rows,
Expand Down
Loading

0 comments on commit 37585a9

Please sign in to comment.