From fc67ae71d41b4af36f8f2b13200f1569df2d4c6a Mon Sep 17 00:00:00 2001 From: Sambhav Dusad Date: Fri, 13 Sep 2024 23:58:18 +0530 Subject: [PATCH] feat(cli): add extractor witness gen (#90) * feat(cli): add extractor witness gen * fix: select subarray param bug * feat: add circuit config to `circuits.json`. Pending: http extraction * feat: add http extractor support * fix: tests * refactor files a bit * fix tests again, arghhhhhhhgstgst * simplify cli a bit more * more cli cleanup * add json proof docs * add http build docs --------- Co-authored-by: Colin Roberts --- Cargo.lock | 39 ++++ Cargo.toml | 1 + README.md | 10 +- circuits/http/extractor.circom | 2 +- circuits/test/http/codegen.test.ts | 27 +-- circuits/test/http/extractor.test.ts | 11 +- .../test/json/extractor/extractor.test.ts | 46 ++--- docs/pabuild.md | 142 ++++++++++---- examples/json/lockfile/value_string.json | 2 +- src/circuit_config.rs | 50 +++++ src/{ => codegen}/http.rs | 173 ++++++++++++++++-- src/{ => codegen}/json.rs | 155 +++++++++++++--- src/codegen/mod.rs | 2 + src/main.rs | 106 +++++------ src/witness.rs | 154 +++++++++++----- 15 files changed, 696 insertions(+), 224 deletions(-) create mode 100644 src/circuit_config.rs rename src/{ => codegen}/http.rs (73%) rename src/{ => codegen}/json.rs (80%) create mode 100644 src/codegen/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 06d63c3..4433a59 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "anstream" version = "0.6.15" @@ -126,6 +135,7 @@ name = "pabuild" version = "0.0.0" dependencies = [ "clap", + "regex", "serde", "serde_json", ] @@ -148,6 +158,35 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "regex" +version = "1.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + [[package]] name = "ryu" version = "1.0.18" diff --git a/Cargo.toml b/Cargo.toml index d252b64..3d0f952 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,3 +6,4 @@ edition = "2021" serde = { version = "1.0.209", features = ["derive"] } serde_json = "1.0.127" clap = { version = "4.5.16", features = ["derive"] } +regex = "1.10.6" diff --git a/README.md b/README.md index f9265aa..3314937 100644 --- a/README.md +++ b/README.md @@ -3,13 +3,13 @@
- + Contributors - + Tests - + Lint
@@ -120,10 +120,10 @@ From the root of this repository, run: ```sh cargo install --path . ``` -to install the `wpbuild` binary. +to install the `pabuild` binary. You can see a help menu with the subcommands by: ```sh -wpbuild --help +pabuild --help ``` This is our local Rust command line application. Please see the [documentation](docs/pabuild.md) for how to use this alongside the other tools. diff --git a/circuits/http/extractor.circom b/circuits/http/extractor.circom index 03ad0c2..c2b0925 100644 --- a/circuits/http/extractor.circom +++ b/circuits/http/extractor.circom @@ -78,7 +78,7 @@ template ExtractResponse(DATA_BYTES, maxContentLength) { valueStartingIndex[i] <== valueStartingIndex[i-1] + i * (1-isZeroMask[i]) * isPrevStartingIndex[i]; } - response <== SelectSubArray(DATA_BYTES, maxContentLength)(dataMask, valueStartingIndex[DATA_BYTES-1]+1, DATA_BYTES - valueStartingIndex[DATA_BYTES-1]); + response <== SelectSubArray(DATA_BYTES, maxContentLength)(dataMask, valueStartingIndex[DATA_BYTES-1]+1, maxContentLength); } template ExtractHeaderValue(DATA_BYTES, headerNameLength, maxValueLength) { diff --git a/circuits/test/http/codegen.test.ts b/circuits/test/http/codegen.test.ts index d46ec48..aa209c6 100644 --- a/circuits/test/http/codegen.test.ts +++ b/circuits/test/http/codegen.test.ts @@ -41,11 +41,12 @@ interface Response { } -function executeCodegen(inputFilename: string, outputFilename: string) { +function executeCodegen(circuitName: string, inputFileName: string, lockfileName: string) { return new Promise((resolve, reject) => { - const inputPath = join(__dirname, "..", "..", "..", "examples", "http", "lockfile", inputFilename); + const inputFilePath = join(__dirname, "..", "..", "..", "examples", "http", inputFileName); + const lockfilePath = join(__dirname, "..", "..", "..", "examples", "http", "lockfile", lockfileName); - const codegen = spawn("cargo", ["run", "http", "--lockfile", inputPath, "--output-filename", outputFilename]); + const codegen = spawn("cargo", ["run", "codegen", "http", "--circuit-name", circuitName, "--input-file", inputFilePath, "--lockfile", lockfilePath]); codegen.stdout.on('data', (data) => { console.log(`stdout: ${data}`); @@ -71,9 +72,10 @@ describe("HTTP :: Codegen :: Request", async () => { it("(valid) GET:", async () => { let lockfile = "request.lock"; let inputfile = "get_request.http"; + let circuitName = "get_request_test"; // generate extractor circuit using codegen - await executeCodegen(`${lockfile}.json`, lockfile); + await executeCodegen(circuitName, inputfile, `${lockfile}.json`); const lockData = readLockFile(`${lockfile}.json`); console.log("lockData: ", JSON.stringify(lockData)); @@ -89,7 +91,7 @@ describe("HTTP :: Codegen :: Request", async () => { circuit = await circomkit.WitnessTester(`Extract`, { - file: `main/${lockfile}`, + file: `main/http_${circuitName}`, template: "LockHTTPRequest", params: params, }); @@ -113,9 +115,10 @@ describe("HTTP :: Codegen :: Request", async () => { it("(invalid) GET:", async () => { let lockfile = "request.lock"; let inputfile = "get_request.http"; + let circuitName = "get_request_test"; // generate extractor circuit using codegen - await executeCodegen(`${lockfile}.json`, lockfile); + await executeCodegen(circuitName, inputfile, `${lockfile}.json`); const lockData = readLockFile(`${lockfile}.json`); @@ -130,7 +133,7 @@ describe("HTTP :: Codegen :: Request", async () => { circuit = await circomkit.WitnessTester(`Extract`, { - file: `main/${lockfile}`, + file: `main/http_${circuitName}`, template: "LockHTTPRequest", params: params, }); @@ -159,9 +162,10 @@ describe("HTTP :: Codegen :: Response", async () => { it("(valid) GET:", async () => { let lockfile = "response.lock"; let inputfile = "get_response.http"; + let circuitName = "get_response_test"; // generate extractor circuit using codegen - await executeCodegen(`${lockfile}.json`, lockfile); + await executeCodegen(circuitName, inputfile, `${lockfile}.json`); const lockData = readLockFile(`${lockfile}.json`); console.log("lockData: ", JSON.stringify(lockData)); @@ -179,7 +183,7 @@ describe("HTTP :: Codegen :: Response", async () => { circuit = await circomkit.WitnessTester(`Extract`, { - file: `main/${lockfile}`, + file: `main/http_${circuitName}`, template: "LockHTTPResponse", params: params, }); @@ -205,9 +209,10 @@ describe("HTTP :: Codegen :: Response", async () => { it("(invalid) GET:", async () => { let lockfile = "response.lock"; let inputfile = "get_response.http"; + let circuitName = "get_response_test"; // generate extractor circuit using codegen - await executeCodegen(`${lockfile}.json`, lockfile); + await executeCodegen(circuitName, inputfile, `${lockfile}.json`); const lockData = readLockFile(`${lockfile}.json`); @@ -224,7 +229,7 @@ describe("HTTP :: Codegen :: Response", async () => { circuit = await circomkit.WitnessTester(`Extract`, { - file: `main/${lockfile}`, + file: `main/http_${circuitName}`, template: "LockHTTPResponse", params: params, }); diff --git a/circuits/test/http/extractor.test.ts b/circuits/test/http/extractor.test.ts index c69c7a7..6dbc0a4 100644 --- a/circuits/test/http/extractor.test.ts +++ b/circuits/test/http/extractor.test.ts @@ -31,7 +31,7 @@ describe("HTTP :: body Extractor", async () => { let output3 = parsedHttp.bodyBytes.slice(0); output3.pop(); - // output3.pop(); // TODO: fails due to shift subarray bug + output3.pop(); generatePassCase(parsedHttp.input, output3, "output length less than actual length"); }); @@ -75,15 +75,6 @@ describe("HTTP :: header Extractor", async () => { let parsedHttp = readHTTPInputFile("get_response.http"); generatePassCase(parsedHttp.input, toByte("Content-Length"), toByte(parsedHttp.headers["Content-Length"]), ""); - - // let output2 = parsedHttp.bodyBytes.slice(0); - // output2.push(0, 0, 0, 0); - // generatePassCase(parsedHttp.input, output2, "output length more than actual length"); - - // let output3 = parsedHttp.bodyBytes.slice(0); - // output3.pop(); - // // output3.pop(); // TODO: fails due to shift subarray bug - // generatePassCase(parsedHttp.input, output3, "output length less than actual length"); }); }); diff --git a/circuits/test/json/extractor/extractor.test.ts b/circuits/test/json/extractor/extractor.test.ts index 9b9b6b3..c67a70b 100644 --- a/circuits/test/json/extractor/extractor.test.ts +++ b/circuits/test/json/extractor/extractor.test.ts @@ -3,11 +3,12 @@ import { join } from "path"; import { spawn } from "child_process"; -function executeCodegen(inputFilename: string, outputFilename: string) { +function executeCodegen(circuitName: string, inputFileName: string, lockfileName: string) { return new Promise((resolve, reject) => { - const inputPath = join(__dirname, "..", "..", "..", "..", "examples", "json", "lockfile", inputFilename); + const inputFilePath = join(__dirname, "..", "..", "..", "..", "examples", "json", "test", inputFileName); + const lockfilePath = join(__dirname, "..", "..", "..", "..", "examples", "json", "lockfile", lockfileName); - const codegen = spawn("cargo", ["run", "json", "--template", inputPath, "--output-filename", outputFilename]); + const codegen = spawn("cargo", ["run", "codegen", "json", "--circuit-name", circuitName, "--input-file", inputFilePath, "--lockfile", lockfilePath]); codegen.stdout.on('data', (data) => { console.log(`stdout: ${data}`); @@ -34,13 +35,13 @@ describe("ExtractValue", async () => { let filename = "value_string"; // generate extractor circuit using codegen - await executeCodegen(`${filename}.json`, filename); + await executeCodegen(`${filename}_test`, `${filename}.json`, `${filename}.json`); // read JSON input file into bytes let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, ["k"]); circuit = await circomkit.WitnessTester(`Extract`, { - file: `main/${filename}`, + file: `main/json_${filename}_test`, template: "ExtractStringValue", params: [input.length, 1, 1, 0, 1], }); @@ -56,11 +57,11 @@ describe("ExtractValue", async () => { it("two_keys: {\"key1\": \"abc\", \"key2\": \"def\" }", async () => { let filename = "two_keys" - await executeCodegen(`${filename}.json`, filename); + await executeCodegen(`${filename}_test`, `${filename}.json`, `${filename}.json`); let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, ["key2"]); circuit = await circomkit.WitnessTester(`Extract`, { - file: `main/${filename}`, + file: `main/json_${filename}_test`, template: "ExtractStringValue", params: [input.length, 1, 4, 0, 3], }); @@ -71,11 +72,11 @@ describe("ExtractValue", async () => { it("value_number: {\"k\": 69 }", async () => { let filename = "value_number"; - await executeCodegen(`${filename}.json`, filename); + await executeCodegen(`${filename}_test`, `${filename}.json`, `${filename}.json`); let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, ["k"]); circuit = await circomkit.WitnessTester(`Extract`, { - file: `main/${filename}`, + file: `main/json_${filename}_test`, template: "ExtractNumValue", params: [input.length, 1, 1, 0, 2], }); @@ -88,13 +89,14 @@ describe("ExtractValue", async () => { it("value_array_string: { \"k\" : [ 420 , 69 , 4200 , 600 ], \"b\": [ \"ab\" , \"ba\", \"ccc\", \"d\" ] }", async () => { let filename = "value_array_string"; - await executeCodegen(`${filename}.json`, filename); + let inputFileName = "value_array.json"; + await executeCodegen(`${filename}_test`, inputFileName, `${filename}.json`); for (let i = 0; i < 4; i++) { - let [input, keyUnicode, output] = readJSONInputFile("value_array.json", ["b", i]); + let [input, keyUnicode, output] = readJSONInputFile(inputFileName, ["b", i]); circuit = await circomkit.WitnessTester(`Extract`, { - file: `main/${filename}`, + file: `main/json_${filename}_test`, template: "ExtractStringValue", params: [input.length, 2, 1, 0, i, 1, output.length], }); @@ -106,13 +108,15 @@ describe("ExtractValue", async () => { it("value_array_number: { \"k\" : [ 420 , 69 , 4200 , 600 ], \"b\": [ \"ab\" , \"ba\", \"ccc\", \"d\" ] }", async () => { let filename = "value_array_number"; - await executeCodegen(`${filename}.json`, filename); + let inputFileName = "value_array.json"; + + await executeCodegen(`${filename}_test`, inputFileName, `${filename}.json`); for (let i = 0; i < 4; i++) { - let [input, keyUnicode, output] = readJSONInputFile("value_array.json", ["k", i]); + let [input, keyUnicode, output] = readJSONInputFile(inputFileName, ["k", i]); circuit = await circomkit.WitnessTester(`Extract`, { - file: `main/${filename}`, + file: `main/json_${filename}_test`, template: "ExtractNumValue", params: [input.length, 2, 1, 0, i, 1, output.length], }); @@ -125,13 +129,13 @@ describe("ExtractValue", async () => { it("value_array_nested: { \"a\": [[1,0],[0,1,3]] }", async () => { let filename = "value_array_nested"; - await executeCodegen(`${filename}.json`, filename); + await executeCodegen(`${filename}_test`, `${filename}.json`, `${filename}.json`); let index_0 = 1; let index_1 = 0; let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, ["a", index_0, index_1]); circuit = await circomkit.WitnessTester(`Extract`, { - file: `main/${filename}`, + file: `main/json_${filename}_test`, template: "ExtractNumValue", params: [input.length, 3, 1, 0, index_0, 1, index_1, 2, 1], }); @@ -150,12 +154,12 @@ describe("ExtractValueMultiDepth", () => { it("value_object: { \"a\": { \"d\" : \"e\", \"e\": \"c\" }, \"e\": { \"f\": \"a\", \"e\": \"2\" } }", async () => { let filename = "value_object"; - await executeCodegen(`${filename}.json`, filename); + await executeCodegen(`${filename}_test`, `${filename}.json`, `${filename}.json`); let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, ["e", "e"]); circuit = await circomkit.WitnessTester(`Extract`, { - file: `main/${filename}`, + file: `main/json_${filename}_test`, template: "ExtractStringValue", params: [input.length, 3, 1, 0, 1, 1, 1], }); @@ -176,14 +180,14 @@ describe("ExtractValueArrayObject", () => { it("value_array_object: {\"a\":[{\"b\":[1,4]},{\"c\":\"b\"}]}", async () => { let filename = "value_array_object"; - await executeCodegen(`${filename}.json`, filename); + await executeCodegen(`${filename}_test`, `${filename}.json`, `${filename}.json`); let index_0 = 0; let index_1 = 0; let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, ["a", index_0, "b", index_1]); circuit = await circomkit.WitnessTester(`Extract`, { - file: `main/${filename}`, + file: `main/json_${filename}_test`, template: "ExtractNumValue", params: [input.length, 4, 1, 0, index_0, 1, 1, 2, index_1, 3, 1], }); diff --git a/docs/pabuild.md b/docs/pabuild.md index 97191c2..6fbe722 100644 --- a/docs/pabuild.md +++ b/docs/pabuild.md @@ -17,14 +17,17 @@ To get the basic idea, run ```sh pabuild witness --help ``` -It can process and generate JSON files to be used for these circuits. +It can process and generate input JSON files to be used for parser/extractor circuits. + +> [!NOTE] +> `circuit-name` need to be **same** for witness generator and codegen. ### Examples **JSON Parsing:** If we have a given JSON file we want to parse such as [`examples/json/test/example.json`](../examples/json/test/example.json) for the `json-parser` circuit (see [`circuits.json`](../circuits.json)), then we can: ```sh -pabuild witness json --input-file examples/json/test/example.json --output-dir inputs/json-parser --output-filename input.json json +pabuild witness parser json --input-file examples/json/test/example.json --circuit-name json-parser ``` Afterwards, you can run `npx circomkit compile json-parser` then `circomkit witness json-parser input`. @@ -33,81 +36,146 @@ Afterwards, you can run `npx circomkit compile json-parser` then `circomkit witn If we have a given HTTP request/response (as a file) we want to parse such as [`examples/http/get_request.http`](../examples/http/get_request.http) for the `http-parser` circuit (see `circuits.json`), then we can: ```sh -pabuild witness http --input-file examples/json/get_request.http --output-dir inputs/http-parser --output-filename input.json http +pabuild witness parser http --input-file examples/http/get_request.http --circuit-name http-parser ``` Afterwards, you can run `npx circomkit compile http-parser` then `circomkit witness http-parser input`. -## Codegen +**JSON Extractor:** +To extract a value out of a JSON, we need a lockfile that contains keys and value type. -### JSON Extraction -JSON extractor circuit is generated using rust to handle arbitrary keys and array indices. +```sh +pabuild witness extractor json --input-file examples/json/test/value_string.json --lockfile examples/json/lockfile/value_string.json --circuit-name value_string +``` + +**HTTP Extractor:** +To extract reponse from HTTP, a lockfile need to be given with start line (method, status, version) and headers to be matched. Example can be found in [examples/http/lockfile](../examples/http/lockfile/). + +```sh +pabuild witness extractor http --input-file examples/http/get_response.http --lockfile examples/http/lockfile/response.lock.json --circuit-name get-response +``` + +## Codegen +Extractor circuit is generated using rust to handle arbitrary keys and array indices. Run: ```sh -pabuild json --help +pabuild codegen --help ``` to get options: ``` -Usage: pabuild json [OPTIONS] --template