Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Spark Compatible JSON validation #10

Merged
99 changes: 95 additions & 4 deletions cpp/include/cudf/io/json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,14 @@ class json_reader_options {
json_recovery_mode_t _recovery_mode = json_recovery_mode_t::FAIL;

// Validation checks for spark
// Should the json validation be strict of not
bool _strict_validation = false;
// Allow leading zeros for numeric values.
bool _allow_numeric_leading_zeros = true;
// Allow nonnumeric numbers. NaN/Inf
bool _allow_nonnumeric_numbers = true;
// Allow unquoted control characters
bool allowUnquotedControlChars = true;
bool _allow_unquoted_control_chars = true;
// Additional values to recognize as null values
std::vector<std::string> _na_values;

Expand Down Expand Up @@ -309,7 +313,15 @@ class json_reader_options {
[[nodiscard]] json_recovery_mode_t recovery_mode() const { return _recovery_mode; }

/**
* @brief Whether leading zeros are allowed in numeric values.
* @brief Whether json validation should be enforced strictly or not.
*
* @return true if it should be.
*/
[[nodiscard]] bool is_strict_validation() const { return _strict_validation; }

/**
* @brief Whether leading zeros are allowed in numeric values. strict validation
* must be enabled for this to work.
*
* @return true if leading zeros are allowed in numeric values
*/
Expand All @@ -318,6 +330,22 @@ class json_reader_options {
return _allow_numeric_leading_zeros;
}

/**
* @brief Whether unquoted number values should be allowed NaN, +INF, -INF, +Infinity, Infinity, and
* -Infinity. strict validation must be enabled for this to work.
*
* @return true if leading zeros are allowed in numeric values
*/
[[nodiscard]] bool is_allowed_nonnumeric_numbers() const { return _allow_nonnumeric_numbers; }

/**
* @brief Whether in a quoted string should characters greater than or equal to 0 and less than 32 be allowed
* without some form of escaping. Strict validation must be enabled for this to work.
*
* @return true if unquoted control chars are allowed.
*/
[[nodiscard]] bool is_allowed_unquoted_control_chars() const { return _allow_unquoted_control_chars; }

/**
* @brief Returns additional values to recognize as null values.
*
Expand Down Expand Up @@ -456,12 +484,37 @@ class json_reader_options {
void set_recovery_mode(json_recovery_mode_t val) { _recovery_mode = val; }

/**
* @brief Set Whether leading zeros are allowed in numeric values.
* @brief Set whether strict validation is enabled or not.
*
* @param val Boolean value to indicate whether strict validation is enabled.
*/
void set_strict_validation(bool val) { _strict_validation = val; }

/**
* @brief Set whether leading zeros are allowed in numeric values. strict validation
* must be enabled for this to work.
*
* @param val Boolean value to indicate whether leading zeros are allowed in numeric values
*/
void allow_numeric_leading_zeros(bool val) { _allow_numeric_leading_zeros = val; }

/**
* @brief Set whether unquoted number values should be allowed NaN, +INF, -INF, +Infinity,
* Infinity, and -Infinity. strict validation must be enabled for this to work.
*
* @param val Boolean value to indicate whether leading zeros are allowed in numeric values
*/
void allow_nonnumeric_numbers(bool val) { _allow_nonnumeric_numbers = val; }

/**
* @brief Set whether in a quoted string should characters greater than or equal to 0
* and less than 32 be allowed without some form of escaping. Strict validation must
* be enabled for this to work.
*
* @param val true to indicate wether unquoted control chars are allowed.
*/
void allow_unquoted_control_chars(bool val) { _allow_unquoted_control_chars = val; }

/**
* @brief Sets additional values to recognize as null values.
*
Expand Down Expand Up @@ -680,7 +733,19 @@ class json_reader_options_builder {
}

/**
* @brief Set Whether leading zeros are allowed in numeric values.
* @brief Set whether json validation should be strict or not.
*
* @param val Boolean value to indicate whether json validation should be strict or not.
*/
json_reader_options_builder& strict_validation(bool val)
{
options.set_strict_validation(val);
return *this;
}

/**
* @brief Set Whether leading zeros are allowed in numeric values. strict validation must
* be enabled for this to have any effect.
*
* @param val Boolean value to indicate whether leading zeros are allowed in numeric values
* @return this for chaining
Expand All @@ -691,6 +756,32 @@ class json_reader_options_builder {
return *this;
}

/**
* @brief Set whether specific unquoted number values are valid JSON. The values are NaN,
* +INF, -INF, +Infinity, Infinity, and -Infinity.
* strict validation must be enabled for this to have any effect.
*
* @param val Boolean value to indicate if unquoted nonnumeric values are
* valid json or not.
*/
json_reader_options_builder& nonnumeric_numbers(bool val)
{
options.allow_nonnumeric_numbers(val);
return *this;
}

/**
* @brief Set whether chars >= 0 and < 32 are allowed in a quoted string without
* some form of escaping. strict validation must be enabled for this to have any effect.
*
* @param val Boolean value to indicate if unquoted control chars are allowed or not.
*/
json_reader_options_builder& unquoted_control_chars(bool val)
{
options.allow_unquoted_control_chars(val);
return *this;
}

/**
* @brief Sets additional values to recognize as null values.
*
Expand Down
Loading
Loading