Skip to content

Commit

Permalink
[JSON] multipleOf (#102)
Browse files Browse the repository at this point in the history
Enables `multipleOf` for JSON numbers using new `derivre` features!
  • Loading branch information
hudson-ai authored Jan 6, 2025
1 parent 8ec3ccf commit 823bcf0
Show file tree
Hide file tree
Showing 8 changed files with 599 additions and 52 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ edition = "2021"

[dependencies]
toktrie = { workspace = true }
derivre = { git = "https://github.com/microsoft/derivre", rev = "68f0e0af794b53a8bd0b15549c016219376a8b6b" }
derivre = { git = "https://github.com/microsoft/derivre", rev = "bfb30e2989e55fa44e5ec9fa503491ac860a6d7c" }
serde = { version = "1.0.210", features = ["derive"] }
serde_json = { version = "1.0.132", features = ["preserve_order"] }
anyhow = "1.0.90"
Expand Down
3 changes: 3 additions & 0 deletions parser/src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,9 @@ pub enum RegexNode {
#[serde(default)]
raw_mode: bool,
},
/// MultipleOf(d, s) matches if the input, interpreted as decimal ASCII number, is a multiple of d*10^-s.
/// EmptyString is not included.
MultipleOf(u32, u32),
}

#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
Expand Down
1 change: 1 addition & 0 deletions parser/src/earley/from_guidance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ fn map_rx_nodes(
raw_mode,
},
)),
RegexNode::MultipleOf(d, s) => Ok(RegexAst::MultipleOf(d, s)),
}
}
}
Expand Down
4 changes: 1 addition & 3 deletions parser/src/grammar_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,7 @@ impl RegexBuilder {
allowed_escapes: Some(opts.allowed_escapes.clone()),
})
}
RegexAst::MultipleOf(_) => {
bail!("MultipleOf not supported")
}
RegexAst::MultipleOf(d, s) => self.add_node(RegexNode::MultipleOf(d, s)),
RegexAst::ExprRef(_) => {
bail!("ExprRef not supported")
}
Expand Down
101 changes: 57 additions & 44 deletions parser/src/json/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ use hashbrown::HashMap;
use indexmap::IndexMap;
use serde_json::{json, Value};

use super::numeric::{rx_float_range, rx_int_range};
use super::numeric::{check_number_bounds, rx_float_range, rx_int_range, Decimal};
use super::schema::{build_schema, Schema};

use crate::{
api::{GrammarWithLexer, RegexSpec, TopLevelGrammar},
GrammarBuilder, NodeRef,
Expand Down Expand Up @@ -40,40 +41,6 @@ impl std::fmt::Display for UnsatisfiableSchemaError {

const CHAR_REGEX: &str = r#"(\\([\"\\\/bfnrt]|u[a-fA-F0-9]{4})|[^\"\\\x00-\x1F\x7F])"#;

fn check_number_bounds(
minimum: Option<f64>,
maximum: Option<f64>,
exclusive_minimum: bool,
exclusive_maximum: bool,
) -> Result<()> {
if let (Some(min), Some(max)) = (minimum, maximum) {
if min > max {
return Err(anyhow!(UnsatisfiableSchemaError {
message: format!("minimum ({}) is greater than maximum ({})", min, max),
}));
}
if min == max && (exclusive_minimum || exclusive_maximum) {
let minimum_repr = if exclusive_minimum {
"exclusiveMinimum"
} else {
"minimum"
};
let maximum_repr = if exclusive_maximum {
"exclusiveMaximum"
} else {
"maximum"
};
return Err(anyhow!(UnsatisfiableSchemaError {
message: format!(
"{} ({}) is equal to {} ({})",
minimum_repr, min, maximum_repr, max
),
}));
}
}
Ok(())
}

struct Compiler {
builder: GrammarBuilder,
options: JsonCompileOptions,
Expand Down Expand Up @@ -272,8 +239,21 @@ impl Compiler {
maximum: Option<f64>,
exclusive_minimum: bool,
exclusive_maximum: bool,
multiple_of: Option<Decimal>,
) -> Result<RegexAst> {
check_number_bounds(minimum, maximum, exclusive_minimum, exclusive_maximum)?;
check_number_bounds(
minimum,
maximum,
exclusive_minimum,
exclusive_maximum,
false,
multiple_of.clone(),
)
.map_err(|e| {
anyhow!(UnsatisfiableSchemaError {
message: e.to_string(),
})
})?;
let minimum = match (minimum, exclusive_minimum) {
(Some(min_val), true) => {
if min_val.fract() != 0.0 {
Expand All @@ -298,14 +278,17 @@ impl Compiler {
_ => None,
}
.map(|val| val as i64);
// TODO: handle errors in rx_int_range; currently it just panics
let rx = rx_int_range(minimum, maximum).with_context(|| {
format!(
"Failed to generate regex for integer range: min={:?}, max={:?}",
minimum, maximum
)
})?;
Ok(RegexAst::Regex(rx))
let mut ast = RegexAst::Regex(rx);
if let Some(d) = multiple_of {
ast = RegexAst::And(vec![ast, RegexAst::MultipleOf(d.coef, d.exp)]);
}
Ok(ast)
}

fn json_number(
Expand All @@ -314,16 +297,33 @@ impl Compiler {
maximum: Option<f64>,
exclusive_minimum: bool,
exclusive_maximum: bool,
multiple_of: Option<Decimal>,
) -> Result<RegexAst> {
check_number_bounds(minimum, maximum, exclusive_minimum, exclusive_maximum)?;
check_number_bounds(
minimum,
maximum,
exclusive_minimum,
exclusive_maximum,
false,
multiple_of.clone(),
)
.map_err(|e| {
anyhow!(UnsatisfiableSchemaError {
message: e.to_string(),
})
})?;
let rx = rx_float_range(minimum, maximum, !exclusive_minimum, !exclusive_maximum)
.with_context(|| {
format!(
"Failed to generate regex for float range: min={:?}, max={:?}",
minimum, maximum
)
})?;
Ok(RegexAst::Regex(rx))
let mut ast = RegexAst::Regex(rx);
if let Some(d) = multiple_of {
ast = RegexAst::And(vec![ast, RegexAst::MultipleOf(d.coef, d.exp)]);
}
Ok(ast)
}

fn ast_lexeme(&mut self, ast: RegexAst) -> Result<NodeRef> {
Expand Down Expand Up @@ -352,7 +352,7 @@ impl Compiler {
cache!(self.any_cache, {
let json_any = self.builder.placeholder();
self.any_cache = Some(json_any); // avoid infinite recursion
let num = self.json_number(None, None, false, false).unwrap();
let num = self.json_number(None, None, false, false, None).unwrap();
let options = vec![
self.builder.string("null"),
self.builder.lexeme(mk_regex(r"true|false")),
Expand Down Expand Up @@ -528,6 +528,7 @@ impl Compiler {
exclusive_minimum,
exclusive_maximum,
integer,
multiple_of,
} => {
let (minimum, exclusive_minimum) = match (minimum, exclusive_minimum) {
(Some(min), Some(xmin)) => {
Expand All @@ -554,9 +555,21 @@ impl Compiler {
(None, None) => (None, false),
};
Some(if *integer {
self.json_int(minimum, maximum, exclusive_minimum, exclusive_maximum)?
self.json_int(
minimum,
maximum,
exclusive_minimum,
exclusive_maximum,
multiple_of.clone(),
)?
} else {
self.json_number(minimum, maximum, exclusive_minimum, exclusive_maximum)?
self.json_number(
minimum,
maximum,
exclusive_minimum,
exclusive_maximum,
multiple_of.clone(),
)?
})
}

Expand Down Expand Up @@ -791,7 +804,7 @@ fn always_non_empty(ast: &RegexAst) -> bool {
| RegexAst::ByteLiteral(_)
| RegexAst::Byte(_)
| RegexAst::ByteSet(_)
| RegexAst::MultipleOf(_) => true,
| RegexAst::MultipleOf(_, _) => true,

RegexAst::And(_)
| RegexAst::Not(_)
Expand Down
Loading

0 comments on commit 823bcf0

Please sign in to comment.