From 6984bc9738229df7eda8da3c73fdb4140b463e65 Mon Sep 17 00:00:00 2001 From: lonerapier Date: Wed, 9 Oct 2024 01:33:05 +0530 Subject: [PATCH] feat: remove keyLen and add maxKeyLen --- circuits/json/extractor.circom | 11 ++- circuits/json/interpreter.circom | 57 +++++++++++ .../test/json/extractor/extractor.test.ts | 95 ++++++------------- .../test/json/extractor/interpreter.test.ts | 56 +++++++++++ circuits/test/utils/search.test.ts | 42 ++++++++ circuits/utils/search.circom | 16 ++++ examples/json/test/value_object.json | 2 +- 7 files changed, 207 insertions(+), 72 deletions(-) diff --git a/circuits/json/extractor.circom b/circuits/json/extractor.circom index 7f5fafd..3a37942 100644 --- a/circuits/json/extractor.circom +++ b/circuits/json/extractor.circom @@ -2,10 +2,13 @@ pragma circom 2.1.9; include "interpreter.circom"; -template ObjectExtractor(DATA_BYTES, MAX_STACK_HEIGHT, keyLen, maxValueLen) { +template ObjectExtractor(DATA_BYTES, MAX_STACK_HEIGHT, maxKeyLen, maxValueLen) { + assert(MAX_STACK_HEIGHT >= 2); + // Declaration of signals. signal input data[DATA_BYTES]; - signal input key[keyLen]; + signal input key[maxKeyLen]; + signal input keyLen; signal output value[maxValueLen]; @@ -40,7 +43,7 @@ template ObjectExtractor(DATA_BYTES, MAX_STACK_HEIGHT, keyLen, maxValueLen) { parsing_key[0] <== InsideKey(MAX_STACK_HEIGHT)(State[0].next_stack, State[0].next_parsing_string, State[0].next_parsing_number); parsing_value[0] <== InsideValueObject()(State[0].next_stack[0], State[0].next_stack[1], State[0].next_parsing_string, State[0].next_parsing_number); - is_key_match[0] <== KeyMatchAtDepth(DATA_BYTES, MAX_STACK_HEIGHT, keyLen, 0)(data, key, 0, parsing_key[0], State[0].next_stack); + is_key_match[0] <== KeyMatchAtDepthWithIndex(DATA_BYTES, MAX_STACK_HEIGHT, maxKeyLen, 0)(data, key, keyLen, 0, parsing_key[0], State[0].next_stack); is_next_pair_at_depth[0] <== NextKVPairAtDepth(MAX_STACK_HEIGHT, 0)(State[0].next_stack, data[0]); is_key_match_for_value[1] <== Mux1()([is_key_match_for_value[0] * (1-is_next_pair_at_depth[0]), is_key_match[0] * (1-is_next_pair_at_depth[0])], is_key_match[0]); is_value_match[0] <== parsing_value[0] * is_key_match_for_value[1]; @@ -71,7 +74,7 @@ template ObjectExtractor(DATA_BYTES, MAX_STACK_HEIGHT, keyLen, maxValueLen) { // - key matches at current index and depth of key is as specified // - whether next KV pair starts // - whether key matched for a value (propogate key match until new KV pair of lower depth starts) - is_key_match[data_idx] <== KeyMatchAtDepth(DATA_BYTES, MAX_STACK_HEIGHT, keyLen, 0)(data, key, data_idx, parsing_key[data_idx], State[data_idx].next_stack); + is_key_match[data_idx] <== KeyMatchAtDepthWithIndex(DATA_BYTES, MAX_STACK_HEIGHT, maxKeyLen, 0)(data, key, keyLen, data_idx, parsing_key[data_idx], State[data_idx].next_stack); is_next_pair_at_depth[data_idx] <== NextKVPairAtDepth(MAX_STACK_HEIGHT, 0)(State[data_idx].next_stack, data[data_idx]); is_key_match_for_value[data_idx+1] <== Mux1()([is_key_match_for_value[data_idx] * (1-is_next_pair_at_depth[data_idx]), is_key_match[data_idx] * (1-is_next_pair_at_depth[data_idx])], is_key_match[data_idx]); is_value_match[data_idx] <== is_key_match_for_value[data_idx+1] * parsing_value[data_idx]; diff --git a/circuits/json/interpreter.circom b/circuits/json/interpreter.circom index 4d6a8a9..82c06ec 100644 --- a/circuits/json/interpreter.circom +++ b/circuits/json/interpreter.circom @@ -371,5 +371,62 @@ template KeyMatchAtDepth(dataLen, n, keyLen, depth) { signal is_parsing_correct_key_at_depth <== is_parsing_correct_key * is_key_at_depth; + signal output out <== substring_match * is_parsing_correct_key_at_depth; +} + +/// Matches a JSON key at an `index` using Substring Matching at specified depth +/// +/// # Arguments +/// - `dataLen`: parsed data length +/// - `n`: maximum stack height +/// - `keyLen`: key length +/// - `depth`: depth of key to be matched +/// +/// # Inputs +/// - `data`: data bytes +/// - `key`: key bytes +/// - `r`: random number for substring matching. **Need to be chosen carefully.** +/// - `index`: data index to match from +/// - `parsing_key`: if current byte is inside a key +/// - `stack`: parser stack output +/// +/// # Output +/// - `out`: Returns `1` if `key` matches `data` at `index` +template KeyMatchAtDepthWithIndex(dataLen, n, maxKeyLen, depth) { + signal input data[dataLen]; + signal input key[maxKeyLen]; + signal input keyLen; + signal input index; + signal input parsing_key; + signal input stack[n][2]; + + component topOfStack = GetTopOfStack(n); + topOfStack.stack <== stack; + signal pointer <== topOfStack.pointer; + _ <== topOfStack.value; + + // `"` -> 34 + + // end of key equals `"` + signal end_of_key <== IndexSelector(dataLen)(data, index + keyLen); + signal is_end_of_key_equal_to_quote <== IsEqual()([end_of_key, 34]); + + // start of key equals `"` + signal start_of_key <== IndexSelector(dataLen)(data, index - 1); + signal is_start_of_key_equal_to_quote <== IsEqual()([start_of_key, 34]); + + // key matches + signal substring_match <== SubstringMatchWithIndexx(dataLen, maxKeyLen)(data, key, keyLen, index); + + // key should be a string + signal is_key_between_quotes <== is_start_of_key_equal_to_quote * is_end_of_key_equal_to_quote; + + // is the index given correct? + signal is_parsing_correct_key <== is_key_between_quotes * parsing_key; + // is the key given by index at correct depth? + signal is_key_at_depth <== IsEqual()([pointer-1, depth]); + + signal is_parsing_correct_key_at_depth <== is_parsing_correct_key * is_key_at_depth; + signal output out <== substring_match * is_parsing_correct_key_at_depth; } \ No newline at end of file diff --git a/circuits/test/json/extractor/extractor.test.ts b/circuits/test/json/extractor/extractor.test.ts index fb7466b..74576fa 100644 --- a/circuits/test/json/extractor/extractor.test.ts +++ b/circuits/test/json/extractor/extractor.test.ts @@ -291,95 +291,56 @@ describe("array-only", async () => { }); describe("object-extractor", async () => { - let circuit: WitnessTester<["data", "key"], ["value"]>; + let circuit: WitnessTester<["data", "key", "keyLen"], ["value"]>; let jsonFilename = "value_object"; + let jsonFile: number[] = []; + let maxKeyLen = 10; let maxValueLen = 30; - it("key: \"a\", value: \"{ \"d\" : \"e\", \"e\": \"c\" }\"", async () => { + before(async () => { let [inputJson, key, output] = readJSONInputFile( `${jsonFilename}.json`, [ "a" ] ); + jsonFile = inputJson; circuit = await circomkit.WitnessTester(`Extract`, { file: `json/extractor`, template: "ObjectExtractor", - params: [inputJson.length, 3, 1, maxValueLen], + params: [inputJson.length, 3, maxKeyLen, maxValueLen], }); console.log("#constraints:", await circuit.getConstraintCount()); - - // { "d" : "e", "e": "c" } - let outputs = [123, 32, 34, 100, 34, 32, 58, 32, 34, 101, 34, 44, 32, 34, 101, 34, 58, 32, 34, 99, 34, 32, 125]; - outputs.fill(0, outputs.length, maxValueLen); - - await circuit.expectPass({ data: inputJson, key: key }, { value: outputs }); }); - it("key: \"g\", value: \"{ \"h\": { \"a\": \"c\" }}\"", async () => { - let [inputJson, key, output] = readJSONInputFile( - `${jsonFilename}.json`, - [ - "g" - ] - ); + function generatePassCase(key: number[], output: number[]) { + output = output.concat(Array(maxValueLen - output.length).fill(0)); + let padded_key = key.concat(Array(maxKeyLen - key.length).fill(0)); - circuit = await circomkit.WitnessTester(`Extract`, { - file: `json/extractor`, - template: "ObjectExtractor", - params: [inputJson.length, 3, 1, maxValueLen], + it(`key: ${key}, output: ${output}`, async () => { + await circuit.expectPass({ data: jsonFile, key: padded_key, keyLen: key.length }, { value: output }); }); - console.log("#constraints:", await circuit.getConstraintCount()); + } - // { "h": { "a": "c" }} - let outputs = [123, 32, 34, 104, 34, 58, 32, 123, 32, 34, 97, 34, 58, 32, 34, 99, 34, 32, 125, 125]; - outputs.fill(0, outputs.length, maxValueLen); + // { "d" : "e", "e": "c" } + let output1 = [123, 32, 34, 100, 34, 32, 58, 32, 34, 101, 34, 44, 32, 34, 101, 34, 58, 32, 34, 99, 34, 32, 125]; + generatePassCase(toByte("a"), output1); - await circuit.expectPass({ data: inputJson, key: key }, { value: outputs }); - }); + // { "h": { "a": "c" }} + let output2 = [123, 32, 34, 104, 34, 58, 32, 123, 32, 34, 97, 34, 58, 32, 34, 99, 34, 32, 125, 125]; + generatePassCase(toByte("g"), output2); - it("key: \"ab\", value: \"foobar\"", async () => { - let [inputJson, key, output] = readJSONInputFile( - `${jsonFilename}.json`, - [ - "ab" - ] - ); + // "foobar" + let output3 = [34, 102, 111, 111, 98, 97, 114, 34]; + generatePassCase(toByte("ab"), output3); - circuit = await circomkit.WitnessTester(`Extract`, { - file: `json/extractor`, - template: "ObjectExtractor", - params: [inputJson.length, 3, 2, maxValueLen], - }); - console.log("#constraints:", await circuit.getConstraintCount()); - - // "foobar" - let outputs = [34, 102, 111, 111, 98, 97, 114, 34]; - outputs.fill(0, outputs.length, maxValueLen); - - await circuit.expectPass({ data: inputJson, key: key }, { value: outputs }); - }); + // "42" + // TODO: currently number gives an extra byte. Fix this. + let output4 = [52, 50, 44]; + generatePassCase(toByte("bc"), output4); - it("key: \"ab\", value: 42", async () => { - let [inputJson, key, output] = readJSONInputFile( - `${jsonFilename}.json`, - [ - "bc" - ] - ); - - circuit = await circomkit.WitnessTester(`Extract`, { - file: `json/extractor`, - template: "ObjectExtractor", - params: [inputJson.length, 3, 2, maxValueLen], - }); - console.log("#constraints:", await circuit.getConstraintCount()); - - // "foobar" - let outputs = [52, 50]; - outputs.fill(0, outputs.length, maxValueLen); - - await circuit.expectPass({ data: inputJson, key: key }, { value: outputs }); - }); + // [ 0, 1, "a"] + let output5 = [91, 32, 48, 44, 32, 49, 44, 32, 34, 97, 34, 93]; + generatePassCase(toByte("dc"), output5); }); \ No newline at end of file diff --git a/circuits/test/json/extractor/interpreter.test.ts b/circuits/test/json/extractor/interpreter.test.ts index bf6a220..44318c8 100644 --- a/circuits/test/json/extractor/interpreter.test.ts +++ b/circuits/test/json/extractor/interpreter.test.ts @@ -354,4 +354,60 @@ describe("Interpreter", async () => { let input7 = { data: input[0], key: input[1][0], index: 2, parsing_key: 1, stack: [[1, 0], [0, 0], [0, 0], [0, 0]] }; generatePassCase(input6, { out: 0 }, 1, "wrong depth"); }); + + describe("KeyMatchAtDepthWithIndex", async () => { + let circuit: WitnessTester<["data", "key", "keyLen", "index", "parsing_key", "stack"], ["out"]>; + let maxKeyLen = 10; + + function generatePassCase(input: any, expected: any, depth: number, desc: string) { + const description = generateDescription(input); + + it(`(valid) witness: ${description} ${desc}`, async () => { + // pad key with 0's + let padded_key = input.key.concat(Array(maxKeyLen - input.key.length).fill(0)); + input.key = padded_key; + + circuit = await circomkit.WitnessTester(`KeyMatchAtDepthWithIndex`, { + file: "json/interpreter", + template: "KeyMatchAtDepthWithIndex", + params: [input.data.length, 4, maxKeyLen, depth], + }); + console.log("#constraints:", await circuit.getConstraintCount()); + + await circuit.expectPass(input, expected); + }); + } + + let input = readJSONInputFile("value_array_object.json", ["a", 0, "b", 0]); + + let output = { out: 1 }; + + let key1 = input[1][0]; + let input1 = { data: input[0], key: key1, keyLen: key1.length, index: 2, parsing_key: 1, stack: [[1, 0], [0, 0], [0, 0], [0, 0]] }; + generatePassCase(input1, output, 0, ""); + + let key2 = input[1][2]; + let input2 = { data: input[0], key: key2, keyLen: key2.length, index: 8, parsing_key: 1, stack: [[1, 1], [2, 0], [1, 0], [0, 0]] }; + generatePassCase(input2, output, 2, ""); + + let input3 = { data: input[0], key: [99], keyLen: 1, index: 20, parsing_key: 1, stack: [[1, 1], [2, 1], [1, 1], [0, 0]] }; + generatePassCase(input3, output, 2, "wrong stack"); + + // fail cases + + let key4 = input[1][1]; + let input4 = { data: input[0], key: key4, keyLen: key4.length, index: 3, parsing_key: 1, stack: [[1, 0], [2, 0], [1, 0], [0, 0]] }; + generatePassCase(input4, { out: 0 }, 2, "wrong key"); + + let input5 = { data: input[0], key: [97], keyLen: 1, index: 12, parsing_key: 0, stack: [[1, 1], [2, 0], [1, 1], [0, 0]] }; + generatePassCase(input5, { out: 0 }, 3, "not parsing key"); + + let input6Data = input[0].slice(0); + input6Data.splice(1, 1, 35); + let input6 = { data: input6Data, key: input[1][0], keyLen: input[1][0].length, index: 2, parsing_key: 1, stack: [[1, 0], [0, 0], [0, 0], [0, 0]] }; + generatePassCase(input6, { out: 0 }, 0, "invalid key (not surrounded by quotes)"); + + let input7 = { data: input[0], key: input[1][0], keyLen: input[1][0].length, index: 2, parsing_key: 1, stack: [[1, 0], [0, 0], [0, 0], [0, 0]] }; + generatePassCase(input6, { out: 0 }, 1, "wrong depth"); + }); }); \ No newline at end of file diff --git a/circuits/test/utils/search.test.ts b/circuits/test/utils/search.test.ts index 91f2ded..98b8bd8 100644 --- a/circuits/test/utils/search.test.ts +++ b/circuits/test/utils/search.test.ts @@ -152,6 +152,48 @@ describe("search", () => { }); }); + describe("SubstringMatchWithIndexx", () => { + let circuit: WitnessTester<["data", "key", "keyLen", "start"], ["out"]>; + let maxKeyLen = 30; + + before(async () => { + circuit = await circomkit.WitnessTester(`SubstringSearch`, { + file: "utils/search", + template: "SubstringMatchWithIndexx", + params: [787, maxKeyLen], + }); + console.log("#constraints:", await circuit.getConstraintCount()); + }); + + it("data = witness.json:data, key = witness.json:key, r = hash(key+data)", async () => { + let key = witness["key"]; + let pad_key = key.concat(Array(maxKeyLen - key.length).fill(0)); + await circuit.expectPass( + { + data: witness["data"], + key: pad_key, + keyLen: witness["key"].length, + start: 6 + }, + { out: 1 }, + ); + }); + + it("data = witness.json:data, key = witness.json:key, r = hash(key+data), output false", async () => { + let key = witness["key"]; + let pad_key = key.concat(Array(maxKeyLen - key.length).fill(0)); + await circuit.expectPass( + { + data: witness["data"], + key: pad_key, + keyLen: witness["key"].length, + start: 98 + }, + { out: 0 } + ); + }); + }); + describe("SubstringMatch", () => { let circuit: WitnessTester<["data", "key"], ["position"]>; diff --git a/circuits/utils/search.circom b/circuits/utils/search.circom index b839fd8..821051d 100644 --- a/circuits/utils/search.circom +++ b/circuits/utils/search.circom @@ -232,6 +232,22 @@ template SubstringMatchWithIndex(dataLen, keyLen) { signal output out <== isStartLessThanMaxLength * isSubarrayMatch; } +template SubstringMatchWithIndexx(dataLen, maxKeyLen) { + signal input data[dataLen]; + signal input key[maxKeyLen]; + signal input keyLen; + signal input start; + + var logDataLen = log2Ceil(dataLen + maxKeyLen + 1); + + signal isStartLessThanMaxLength <== LessThan(logDataLen)([start, dataLen]); + signal index <== start * isStartLessThanMaxLength; + + signal subarray[maxKeyLen] <== SelectSubArray(dataLen, maxKeyLen)(data, index, keyLen); + signal isSubarrayMatch <== IsEqualArray(maxKeyLen)([key, subarray]); + signal output out <== isStartLessThanMaxLength * isSubarrayMatch; +} + /* SubstringMatch: Matches a substring with an input string and returns the position diff --git a/examples/json/test/value_object.json b/examples/json/test/value_object.json index bc60bcf..8437b4d 100644 --- a/examples/json/test/value_object.json +++ b/examples/json/test/value_object.json @@ -1 +1 @@ -{ "a": { "d" : "e", "e": "c" }, "e": { "f": "a", "e": "2" }, "g": { "h": { "a": "c" }}, "ab": "foobar", "bc": 42 } \ No newline at end of file +{ "a": { "d" : "e", "e": "c" }, "e": { "f": "a", "e": "2" }, "g": { "h": { "a": "c" }}, "ab": "foobar", "bc": 42, "dc": [ 0, 1, "a"] } \ No newline at end of file