Skip to content

Commit

Permalink
Merge branch 'branch-24.06' into 15668
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar authored May 21, 2024
2 parents c41405f + 4da00ea commit 8e7453e
Show file tree
Hide file tree
Showing 12 changed files with 726 additions and 170 deletions.
9 changes: 9 additions & 0 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ outputs:
{% else %}
- {{ compiler('cuda') }}
{% endif %}
# TODO: start taking libarrow's run exports again wwhen they're correct for 16.0
# ref: https://github.com/conda-forge/arrow-cpp-feedstock/issues/1418
- libarrow
requirements:
build:
- cmake {{ cmake_version }}
Expand All @@ -105,6 +108,12 @@ outputs:
- librmm ={{ minor_version }}
- libkvikio ={{ minor_version }}
- dlpack {{ dlpack_version }}
# TODO: start taking libarrow's run exports again wwhen they're correct for 16.0
# ref: https://github.com/conda-forge/arrow-cpp-feedstock/issues/1418
- libarrow>=16.0.0,<16.1.0a0
- libarrow-acero>=16.0.0,<16.1.0a0
- libarrow-dataset>=16.0.0,<16.1.0a0
- libparquet>=16.0.0,<16.1.0a0
test:
commands:
- test -f $PREFIX/lib/libcudf.so
Expand Down
7 changes: 6 additions & 1 deletion cpp/include/cudf/detail/utilities/stream_pool.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -81,6 +81,11 @@ class cuda_stream_pool {
*/
cuda_stream_pool* create_global_cuda_stream_pool();

/**
* @brief Get the global stream pool.
*/
cuda_stream_pool& global_cuda_stream_pool();

/**
* @brief Acquire a set of `cuda_stream_view` objects and synchronize them to an event on another
* stream.
Expand Down
45 changes: 45 additions & 0 deletions cpp/include/cudf/io/json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ class json_reader_options {
bool _lines = false;
// Parse mixed types as a string column
bool _mixed_types_as_string = false;
// Delimiter separating records in JSON lines
char _delimiter = '\n';
// Prune columns on read, selected based on the _dtypes option
bool _prune_columns = false;

Expand Down Expand Up @@ -229,6 +231,13 @@ class json_reader_options {
return base_padding + num_columns * column_bytes;
}

/**
* @brief Returns delimiter separating records in JSON lines
*
* @return Delimiter separating records in JSON lines
*/
char get_delimiter() const { return _delimiter; }

/**
* @brief Whether to read the file as a json object per line.
*
Expand Down Expand Up @@ -340,6 +349,30 @@ class json_reader_options {
*/
void set_byte_range_size(size_type size) { _byte_range_size = size; }

/**
* @brief Set delimiter separating records in JSON lines
*
* @param delimiter Delimiter separating records in JSON lines
*/
void set_delimiter(char delimiter)
{
switch (delimiter) {
case '{':
case '[':
case '}':
case ']':
case ',':
case ':':
case '"':
case '\'':
case '\\':
case ' ':
case '\t':
case '\r': CUDF_FAIL("Unsupported delimiter character.", std::invalid_argument); break;
}
_delimiter = delimiter;
}

/**
* @brief Set whether to read the file as a json object per line.
*
Expand Down Expand Up @@ -507,6 +540,18 @@ class json_reader_options_builder {
return *this;
}

/**
* @brief Set delimiter separating records in JSON lines
*
* @param delimiter Delimiter separating records in JSON lines
* @return this for chaining
*/
json_reader_options_builder& delimiter(char delimiter)
{
options.set_delimiter(delimiter);
return *this;
}

/**
* @brief Set whether to read the file as a json object per line.
*
Expand Down
19 changes: 19 additions & 0 deletions cpp/include/cudf/io/memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

#include <rmm/resource_ref.hpp>

#include <optional>

namespace cudf::io {

/**
Expand All @@ -41,4 +43,21 @@ rmm::host_async_resource_ref set_host_memory_resource(rmm::host_async_resource_r
*/
rmm::host_async_resource_ref get_host_memory_resource();

/**
* @brief Options to configure the default host memory resource
*/
struct host_mr_options {
std::optional<size_t> pool_size; ///< The size of the pool to use for the default host memory
///< resource. If not set, the default pool size is used.
};

/**
* @brief Configure the size of the default host memory resource.
*
* @throws cudf::logic_error if called after the default host memory resource has been created
*
* @param opts Options to configure the default host memory resource
*/
void config_default_host_memory_resource(host_mr_options const& opts);

} // namespace cudf::io
6 changes: 4 additions & 2 deletions cpp/src/io/json/nested_json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ enum class stack_behavior_t : char {
PushPopWithoutReset,

/// Opening brackets and braces, [, {, push onto the stack, closing brackets and braces, ], }, pop
/// from the stack. Newline characters are considered delimiters and therefore reset to an empty
/// stack.
/// from the stack. Delimiter characters are passed when the stack context is constructed to
/// reset to an empty stack.
ResetOnDelimiter
};

Expand Down Expand Up @@ -198,11 +198,13 @@ namespace detail {
* within the context of a struct, a '[' represents that it is within the context of an array, and a
* '_' symbol that it is at the root of the JSON.
* @param[in] stack_behavior Specifies the stack's behavior
* @param[in] delimiter Specifies the delimiter to use as separator for JSON lines input
* @param[in] stream The cuda stream to dispatch GPU kernels to
*/
void get_stack_context(device_span<SymbolT const> json_in,
SymbolT* d_top_of_stack,
stack_behavior_t stack_behavior,
SymbolT delimiter,
rmm::cuda_stream_view stream);

/**
Expand Down
Loading

0 comments on commit 8e7453e

Please sign in to comment.