Skip to content

Commit

Permalink
Add Spark Compatible JSON validation (#10)
Browse files Browse the repository at this point in the history
  • Loading branch information
revans2 authored Aug 2, 2024
1 parent cd6a30f commit 0c2e4da
Show file tree
Hide file tree
Showing 6 changed files with 689 additions and 37 deletions.
99 changes: 95 additions & 4 deletions cpp/include/cudf/io/json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,14 @@ class json_reader_options {
json_recovery_mode_t _recovery_mode = json_recovery_mode_t::FAIL;

// Validation checks for spark
// Should the json validation be strict of not
bool _strict_validation = false;
// Allow leading zeros for numeric values.
bool _allow_numeric_leading_zeros = true;
// Allow nonnumeric numbers. NaN/Inf
bool _allow_nonnumeric_numbers = true;
// Allow unquoted control characters
bool allowUnquotedControlChars = true;
bool _allow_unquoted_control_chars = true;
// Additional values to recognize as null values
std::vector<std::string> _na_values;

Expand Down Expand Up @@ -309,7 +313,15 @@ class json_reader_options {
[[nodiscard]] json_recovery_mode_t recovery_mode() const { return _recovery_mode; }

/**
* @brief Whether leading zeros are allowed in numeric values.
* @brief Whether json validation should be enforced strictly or not.
*
* @return true if it should be.
*/
[[nodiscard]] bool is_strict_validation() const { return _strict_validation; }

/**
* @brief Whether leading zeros are allowed in numeric values. strict validation
* must be enabled for this to work.
*
* @return true if leading zeros are allowed in numeric values
*/
Expand All @@ -318,6 +330,22 @@ class json_reader_options {
return _allow_numeric_leading_zeros;
}

/**
* @brief Whether unquoted number values should be allowed NaN, +INF, -INF, +Infinity, Infinity, and
* -Infinity. strict validation must be enabled for this to work.
*
* @return true if leading zeros are allowed in numeric values
*/
[[nodiscard]] bool is_allowed_nonnumeric_numbers() const { return _allow_nonnumeric_numbers; }

/**
* @brief Whether in a quoted string should characters greater than or equal to 0 and less than 32 be allowed
* without some form of escaping. Strict validation must be enabled for this to work.
*
* @return true if unquoted control chars are allowed.
*/
[[nodiscard]] bool is_allowed_unquoted_control_chars() const { return _allow_unquoted_control_chars; }

/**
* @brief Returns additional values to recognize as null values.
*
Expand Down Expand Up @@ -456,12 +484,37 @@ class json_reader_options {
void set_recovery_mode(json_recovery_mode_t val) { _recovery_mode = val; }

/**
* @brief Set Whether leading zeros are allowed in numeric values.
* @brief Set whether strict validation is enabled or not.
*
* @param val Boolean value to indicate whether strict validation is enabled.
*/
void set_strict_validation(bool val) { _strict_validation = val; }

/**
* @brief Set whether leading zeros are allowed in numeric values. strict validation
* must be enabled for this to work.
*
* @param val Boolean value to indicate whether leading zeros are allowed in numeric values
*/
void allow_numeric_leading_zeros(bool val) { _allow_numeric_leading_zeros = val; }

/**
* @brief Set whether unquoted number values should be allowed NaN, +INF, -INF, +Infinity,
* Infinity, and -Infinity. strict validation must be enabled for this to work.
*
* @param val Boolean value to indicate whether leading zeros are allowed in numeric values
*/
void allow_nonnumeric_numbers(bool val) { _allow_nonnumeric_numbers = val; }

/**
* @brief Set whether in a quoted string should characters greater than or equal to 0
* and less than 32 be allowed without some form of escaping. Strict validation must
* be enabled for this to work.
*
* @param val true to indicate wether unquoted control chars are allowed.
*/
void allow_unquoted_control_chars(bool val) { _allow_unquoted_control_chars = val; }

/**
* @brief Sets additional values to recognize as null values.
*
Expand Down Expand Up @@ -680,7 +733,19 @@ class json_reader_options_builder {
}

/**
* @brief Set Whether leading zeros are allowed in numeric values.
* @brief Set whether json validation should be strict or not.
*
* @param val Boolean value to indicate whether json validation should be strict or not.
*/
json_reader_options_builder& strict_validation(bool val)
{
options.set_strict_validation(val);
return *this;
}

/**
* @brief Set Whether leading zeros are allowed in numeric values. strict validation must
* be enabled for this to have any effect.
*
* @param val Boolean value to indicate whether leading zeros are allowed in numeric values
* @return this for chaining
Expand All @@ -691,6 +756,32 @@ class json_reader_options_builder {
return *this;
}

/**
* @brief Set whether specific unquoted number values are valid JSON. The values are NaN,
* +INF, -INF, +Infinity, Infinity, and -Infinity.
* strict validation must be enabled for this to have any effect.
*
* @param val Boolean value to indicate if unquoted nonnumeric values are
* valid json or not.
*/
json_reader_options_builder& nonnumeric_numbers(bool val)
{
options.allow_nonnumeric_numbers(val);
return *this;
}

/**
* @brief Set whether chars >= 0 and < 32 are allowed in a quoted string without
* some form of escaping. strict validation must be enabled for this to have any effect.
*
* @param val Boolean value to indicate if unquoted control chars are allowed or not.
*/
json_reader_options_builder& unquoted_control_chars(bool val)
{
options.allow_unquoted_control_chars(val);
return *this;
}

/**
* @brief Sets additional values to recognize as null values.
*
Expand Down
Loading

0 comments on commit 0c2e4da

Please sign in to comment.