Merge pull request #1454 from ajv-validator/parse-serialize

JTD: generate parsers/serializers
ajv-validator · Mar 2, 2021 · 0b70f53 · 0b70f53
2 parents d72cdef + 61a9ebf
commit 0b70f53
Show file tree

Hide file tree

Showing 27 changed files with 1,275 additions and 62 deletions.
diff --git a/README.md b/README.md
@@ -21,8 +21,8 @@ Supports JSON Schema draft-06/07/2019-09 (draft-04 is supported in [version 6](h
 
 Ajv version 7 has these new features:
 
+- NEW: support of JSON Type Definition [RFC8927](https://datatracker.ietf.org/doc/rfc8927/) (from [v7.1.0](https://github.com/ajv-validator/ajv-keywords/releases/tag/v7.1.0)), including generation of [serializers](./docs/api.md#jtd-serialize) and [parsers](./docs/api.md#jtd-parse) from JTD schemas that are more efficient than native JSON serialization/parsing, combining JSON string parsing and validation in one function.
 - support of JSON Schema draft-2019-09 features: [`unevaluatedProperties`](./docs/json-schema.md#unevaluatedproperties) and [`unevaluatedItems`](./docs/json-schema.md#unevaluateditems), [dynamic recursive references](./docs/validation.md#extending-recursive-schemas) and other [additional keywords](./docs/json-schema.md#json-schema-draft-2019-09).
-- NEW: support of JSON Type Definition [RFC8927](https://datatracker.ietf.org/doc/rfc8927/) (from [v7.1.0](https://github.com/ajv-validator/ajv-keywords/releases/tag/v7.1.0))
 - to reduce the mistakes in JSON schemas and unexpected validation results, [strict mode](./docs/strict-mode.md) is added - it prohibits ignored or ambiguous JSON Schema elements.
 - to make code injection from untrusted schemas impossible, [code generation](./docs/codegen.md) is fully re-written to be safe and to allow code optimization (compiled schema code size is reduced by more than 10%).
 - to simplify Ajv extensions, the new keyword API that is used by pre-defined keywords is available to user-defined keywords - it is much easier to define any keywords now, especially with subschemas. [ajv-keywords](https://github.com/ajv-validator/ajv-keywords) package was updated to use the new API (in [v4.0.0](https://github.com/ajv-validator/ajv-keywords/releases/tag/v4.0.0))

diff --git a/benchmark/jtd.js b/benchmark/jtd.js
@@ -0,0 +1,108 @@
+/* eslint-disable no-empty */
+/* eslint-disable no-console */
+const Ajv = require("ajv/dist/jtd").default
+const Benchmark = require("benchmark")
+const jtdValidationTests = require("../spec/json-typedef-spec/tests/validation.json")
+
+const ajv = new Ajv()
+const suite = new Benchmark.Suite()
+const tests = []
+
+for (const testName in jtdValidationTests) {
+  const {schema, instance, errors} = jtdValidationTests[testName]
+  const valid = errors.length === 0
+  if (!valid) continue
+  tests.push({
+    serialize: ajv.compileSerializer(schema),
+    parse: ajv.compileParser(schema),
+    data: instance,
+    json: JSON.stringify(instance),
+  })
+}
+
+// suite.add("JTD test suite: compiled JTD serializers", () => {
+//   for (const test of tests) {
+//     test.serialize(test.data)
+//   }
+// })
+
+// suite.add("JTD test suite: JSON.stringify", () => {
+//   for (const test of tests) {
+//     JSON.stringify(test.data)
+//   }
+// })
+
+const testSchema = {
+  definitions: {
+    obj: {
+      properties: {
+        foo: {type: "string"},
+        bar: {type: "int8"},
+      },
+    },
+  },
+  properties: {
+    a: {ref: "obj"},
+  },
+  optionalProperties: {
+    b: {ref: "obj"},
+  },
+}
+
+const testData = {
+  a: {
+    foo: "foo1",
+    bar: 1,
+  },
+  b: {
+    foo: "foo2",
+    bar: 2,
+  },
+}
+
+// const serializer = ajv.compileSerializer(testSchema)
+
+// suite.add("test data: compiled JTD serializer", () => serializer(testData))
+// suite.add("test data: JSON.stringify", () => JSON.stringify(testData))
+
+suite.add("JTD test suite: compiled JTD parsers", () => {
+  for (const test of tests) {
+    test.parse(test.json)
+  }
+})
+
+suite.add("JTD test suite: JSON.parse", () => {
+  for (const test of tests) {
+    JSON.parse(test.json)
+  }
+})
+
+const validTestData = JSON.stringify(testData)
+
+const invalidTestData = JSON.stringify({
+  a: {
+    foo: "foo1",
+    bar: "1",
+  },
+  b: {
+    foo: "foo2",
+    bar: 2,
+  },
+})
+
+const parse = ajv.compileParser(testSchema)
+
+suite.add("valid test data: compiled JTD parser", () => parse(validTestData))
+suite.add("valid test data: JSON.parse", () => JSON.parse(validTestData))
+suite.add("invalid test data: compiled JTD parser", () => parse(invalidTestData))
+suite.add("invalid test data: JSON.parse", () => JSON.parse(invalidTestData))
+
+console.log()
+
+suite
+  .on("cycle", (event) => console.log(String(event.target)))
+  .on("complete", function () {
+    // eslint-disable-next-line no-invalid-this
+    console.log('The fastest is "' + this.filter("fastest").map("name") + '"')
+  })
+  .run({async: true})
diff --git a/benchmark/package.json b/benchmark/package.json
@@ -0,0 +1,6 @@
+{
+  "private": true,
+  "devDependencies": {
+    "benchmark": "^2.1.4"
+  }
+}
diff --git a/docs/api.md b/docs/api.md
@@ -50,6 +50,69 @@ if (validate(data)) {
 
 See more advanced example in [the test](../spec/types/json-schema.spec.ts).
 
+#### <a name="jtd-serialize"></a>ajv.compileSerializer(schema: object): (data: any) =\> string (NEW)
+
+Generate serializing function based on the [JTD schema](./json-type-definition.md) (caches the schema) - only in JTD instance of Ajv (see example below).
+
+Serializers compiled from JTD schemas can be more than 10 times faster than using `JSON.stringify`, because they do not traverse all the data, only the properties that are defined in the schema.
+
+Properties not defined in the schema will not be included in serialized JSON, unless the schema has `additionalProperties: true` flag. It can also be beneficial from the application security point of view, as it prevents leaking accidentally/temporarily added additional properties to the API responses.
+
+If you use JTD with typescript, the type for the schema can be derived from the data type, and generated serializer would only accept correct data type in this case:
+
+```typescript
+import Ajv, {JTDSchemaType} from "ajv/dist/jtd"
+const ajv = new Ajv()
+
+interface MyData = {
+  foo: number
+  bar?: string
+}
+
+const mySchema: JTDSchemaType<MyData> = {
+  properties: {
+    foo: {type: "int32"} // any JTD number type would be accepted here
+  },
+  optionalProperties: {
+    bar: {type: "string"}
+  }
+}
+
+const serializeMyData = ajv.compileSerializer(mySchema)
+
+// serializeMyData has type (x: MyData) => string
+// it prevents you from accidentally passing the wrong type
+```
+
+**Please note**: Compiled serializers do NOT validate passed data, it is assumed that the data is valid according to the schema. In the future there may be an option added that would make serializers also validate the data.
+
+#### <a name="jtd-parse"></a>ajv.compileParser(schema: object): (json: string) =\> any (NEW)
+
+Generate parsing function based on the [JTD schema](./json-type-definition.md) (caches the schema) - only in JTD instance of Ajv (see example below).
+
+Parsers compiled from JTD schemas have comparable performance to `JSON.parse`<sup>*</sup> in case JSON string is valid according to the schema (and they do not just parse JSON - they ensure that parsed JSON is valid according to the schema as they parse), but they can be many times faster in case the string is invalid - for example, if schema expects an object, and JSON string is array the parser would fail on the first character.
+
+Parsing will fail if there are properties not defined in the schema, unless the schema has `additionalProperties: true` flag.
+
+If you use JTD with typescript, the type for the schema can be derived from the data type, and generated parser will return correct data type (see definitions example in the [serialize](#jtd-serialize) section):
+
+```typescript
+const parseMyData = ajv.compileParser(mySchema)
+
+// parseMyData has type (s: string) => MyData | undefined
+// it returns correct data type in case parsing is successful and undefined if not
+
+const validData = parseMyData('{"foo":1}') // {foo: 1} - success
+
+const invalidData = parseMyData('{"x":1}') // undefined - failure
+console.log(parseMyData.position) // 4
+console.log(parseMyData.message) // property x not allowed
+```
+
+**Please note**: generated parsers is a NEW Ajv functionality (as of March 2021), there can be some edge cases that are not handled correctly - please report any issues/submit fixes.
+
+<sup>*</sup> As long as empty schema `{}` is not used - there is a possibility to improve performance in this case. Also, the performance of parsing `discriminator` schemas depends on the position of discriminator tag in the schema - the best parsing performance will be achieved if the tag is the first property - this is how compiled JTD serializers generate JSON in case of discriminator schemas.
+
 #### <a name="api-compileAsync"></a>ajv.compileAsync(schema: object, meta?: boolean): Promise\<Function\>
 
 Asynchronous version of `compile` method that loads missing remote schemas using asynchronous function in `options.loadSchema`. This function returns a Promise that resolves to a validation function. An optional callback passed to `compileAsync` will be called with 2 parameters: error (or null) and validating function. The returned promise will reject (and the callback will be called with an error) when:

diff --git a/lib/compile/codegen/index.ts b/lib/compile/codegen/index.ts
@@ -21,6 +21,7 @@ export const operators = {
   NOT: new _Code("!"),
   OR: new _Code("||"),
   AND: new _Code("&&"),
+  ADD: new _Code("+"),
 }
 
 abstract class Node {
@@ -62,11 +63,7 @@ class Def extends Node {
 }
 
 class Assign extends Node {
-  constructor(
-    private readonly lhs: Code,
-    private rhs: SafeExpr,
-    private readonly sideEffects?: boolean
-  ) {
+  constructor(readonly lhs: Code, public rhs: SafeExpr, private readonly sideEffects?: boolean) {
     super()
   }
 
@@ -86,6 +83,16 @@ class Assign extends Node {
   }
 }
 
+class AssignOp extends Assign {
+  constructor(lhs: Code, private readonly op: Code, rhs: SafeExpr, sideEffects?: boolean) {
+    super(lhs, rhs, sideEffects)
+  }
+
+  render({_n}: CGOptions): string {
+    return `${this.lhs} ${this.op}= ${this.rhs};` + _n
+  }
+}
+
 class Label extends Node {
   readonly names: UsedNames = {}
   constructor(readonly label: Name) {
@@ -508,6 +515,11 @@ export class CodeGen {
     return this._leafNode(new Assign(lhs, rhs, sideEffects))
   }
 
+  // `+=` code
+  add(lhs: Code, rhs: SafeExpr): CodeGen {
+    return this._leafNode(new AssignOp(lhs, operators.ADD, rhs))
+  }
+
   // appends passed SafeExpr to code or executes Block
   code(c: Block | SafeExpr): CodeGen {
     if (typeof c == "function") c()

diff --git a/lib/compile/index.ts b/lib/compile/index.ts
@@ -79,6 +79,10 @@ export class SchemaEnv implements SchemaEnvArgs {
   readonly dynamicAnchors: {[Ref in string]?: true} = {}
   validate?: AnyValidateFunction
   validateName?: ValueScopeName
+  serialize?: (data: unknown) => string
+  serializeName?: ValueScopeName
+  parse?: (data: string) => unknown
+  parseName?: ValueScopeName
 
   constructor(env: SchemaEnvArgs) {
     let schema: AnySchemaObject | undefined
@@ -216,7 +220,7 @@ function inlineOrCompile(this: Ajv, sch: SchemaEnv): AnySchema | SchemaEnv {
 }
 
 // Index of schema compilation in the currently compiled list
-function getCompilingSchema(this: Ajv, schEnv: SchemaEnv): SchemaEnv | void {
+export function getCompilingSchema(this: Ajv, schEnv: SchemaEnv): SchemaEnv | void {
   for (const sch of this._compilations) {
     if (sameSchemaEnv(sch, schEnv)) return sch
   }