Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a flag for allowing single quotes in JSON strings. #8144

Merged
1 change: 1 addition & 0 deletions cpp/include/cudf/strings/detail/json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ namespace detail {
std::unique_ptr<cudf::column> get_json_object(
cudf::strings_column_view const& col,
cudf::string_scalar const& json_path,
thrust::optional<get_json_object_options> options,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

Expand Down
80 changes: 78 additions & 2 deletions cpp/include/cudf/strings/json.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2021, NVIDIA CORPORATION.
hyperbolic2346 marked this conversation as resolved.
Show resolved Hide resolved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,6 +17,8 @@

#include <cudf/strings/strings_column_view.hpp>

#include <thrust/optional.h>
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved

namespace cudf {
namespace strings {

Expand All @@ -26,6 +28,78 @@ namespace strings {
* @file
*/

/**
* @brief Settings for `get_json_object()`.
*/
class get_json_object_options {
ttnghia marked this conversation as resolved.
Show resolved Hide resolved
// allow single quotes to represent strings in JSON
bool allow_single_quotes = false;
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved

// individual string values are returned with quotes stripped.
bool strip_quotes_from_single_strings = false;

public:
/**
* @brief Default constructor.
*
* This has been added since Cython requires a default constructor to create objects on stack.
*/
explicit get_json_object_options() = default;

/**
* @brief Returns true/false depending on whether single-quotes for representing strings
* are allowed.
*/
CUDA_HOST_DEVICE_CALLABLE bool get_allow_single_quotes() const { return allow_single_quotes; }
nvdbaranec marked this conversation as resolved.
Show resolved Hide resolved

/**
* @brief Returns true/false depending on whether individually returned string values have
* their quotes stripped.
*
* When set to true, if the return value for a given row is an individual string
* (not an object, or an array of strings), strip the quotes from the string and return only the
* contents of the string itself. Example:
*
* @code{.pseudo}
*
* With strip_quotes_from_single_strings OFF:
* Input = {"a" : "b"}
* Query = $.a
* Output = "b"
*
* With strip_quotes_from_single_strings ON:
* Input = {"a" : "b"}
* Query = $.a
* Output = b
*
* @endcode
*/
CUDA_HOST_DEVICE_CALLABLE bool get_strip_quotes_from_single_strings() const
{
return strip_quotes_from_single_strings;
}

/**
* @brief Set whether single-quotes for strings are allowed.
*
* @param _allow_single_quotes bool indicating desired behavior.
*/
void set_allow_single_quotes(bool _allow_single_quotes)
{
allow_single_quotes = _allow_single_quotes;
}

/**
* @brief Set whether individually returned string values have their quotes stripped.
*
* @param _strip_quotes_from_single_strings bool indicating desired behavior.
*/
void set_strip_quotes_from_single_strings(bool _strip_quotes_from_single_strings)
{
strip_quotes_from_single_strings = _strip_quotes_from_single_strings;
}
};

/**
* @brief Apply a JSONPath string to all rows in an input strings column.
*
Expand All @@ -37,13 +111,15 @@ namespace strings {
*
* @param col The input strings column. Each row must contain a valid json string
* @param json_path The JSONPath string to be applied to each row
* @param options Options for controlling the behavior of the function
* @param mr Resource for allocating device memory.
* @return New strings column containing the retrieved json object strings
*/
std::unique_ptr<cudf::column> get_json_object(
cudf::strings_column_view const& col,
cudf::string_scalar const& json_path,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
thrust::optional<get_json_object_options> options = thrust::nullopt,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
} // namespace strings
Expand Down
Loading