From ed2c440f149ed5afafd1a7a3f558072afde5da1f Mon Sep 17 00:00:00 2001 From: Colin Roberts Date: Thu, 15 Aug 2024 16:32:44 -0600 Subject: [PATCH 1/2] tests/refactor: state update and improved JSON parsing (#11) * add: `reddit_response.json` * refactor tests + add failing case * easier fix * test: parse to key * tests: key parsing * bug: `next_end_of_kv` on read `:` * fix: `end_of_kv` bug * test: find value * tests: `inside_value` and `inside_value_to_exit` * test: parse to NEXT key * parses JSON with two string keys * WIP: value inside value * comment * refactor (#10) * wip: start with bitmask * WIP: time to start testing * tests: `ArrayAdd` and `ArrayMul` * tests passing * update comments * feat: 2 key depth 1 json * 2 kv json and all tests passing * nested json works!!! * reduce constraints * cleanup * rename variables * more cleaning * more cleanup * make comments clean * WAYLON NITPICKING ME LOL --- circuits.json | 24 ++++ circuits/extract.circom | 35 ++--- circuits/operators.circom | 20 +++ circuits/parser.circom | 171 +++++++++++++--------- circuits/test/operators.test.ts | 40 ++++++ circuits/test/parser.test.ts | 191 ++++++++++++++++++++++--- create_witness/src/main.rs | 4 +- inputs/test_extract_depth/input.json | 74 ++++++++++ inputs/test_extract_sambhav/input.json | 115 +++++++++++++++ inputs/test_extract_two_key/input.json | 50 +++++++ json_examples/reddit_response.json | 14 ++ json_examples/sambhav_example.json | 7 + json_examples/test_depth.json | 6 + json_examples/test_two_key.json | 4 + 14 files changed, 648 insertions(+), 107 deletions(-) create mode 100644 inputs/test_extract_depth/input.json create mode 100644 inputs/test_extract_sambhav/input.json create mode 100644 inputs/test_extract_two_key/input.json create mode 100644 json_examples/reddit_response.json create mode 100644 json_examples/sambhav_example.json create mode 100644 json_examples/test_depth.json create mode 100644 json_examples/test_two_key.json diff --git a/circuits.json b/circuits.json index 3308dae..43017d8 100644 --- a/circuits.json +++ b/circuits.json @@ -7,6 +7,30 @@ 21 ] }, + "test_extract_two_key": { + "file": "extract", + "template": "Extract", + "params": [ + 4, + 40 + ] + }, + "test_extract_depth": { + "file": "extract", + "template": "Extract", + "params": [ + 4, + 64 + ] + }, + "test_extract_sambhav": { + "file": "extract", + "template": "Extract", + "params": [ + 4, + 105 + ] + }, "test_extract_hard": { "file": "extract", "template": "Extract", diff --git a/circuits/extract.circom b/circuits/extract.circom index 2997cc7..76ebcae 100644 --- a/circuits/extract.circom +++ b/circuits/extract.circom @@ -24,35 +24,28 @@ template Extract(KEY_BYTES, DATA_BYTES) { // Initialze the parser component State[DATA_BYTES]; State[0] = StateUpdate(); - State[0].byte <== data[0]; - State[0].tree_depth <== 0; - State[0].parsing_to_key <== 1; // Initialize by saying we are parsing to the first key - State[0].inside_key <== 0; - State[0].parsing_to_value <== 0; - State[0].inside_value <== 0; - State[0].escaping <== 0; - State[0].end_of_kv <== 0; + State[0].byte <== data[0]; + State[0].tree_depth <== 0; + State[0].parsing_key <== 0; + State[0].inside_key <== 0; + State[0].parsing_value <== 0; + State[0].inside_value <== 0; for(var data_pointer = 1; data_pointer < DATA_BYTES; data_pointer++) { State[data_pointer] = StateUpdate(); - State[data_pointer].byte <== data[data_pointer]; - State[data_pointer].tree_depth <== State[data_pointer - 1].next_tree_depth; - State[data_pointer].parsing_to_key <== State[data_pointer - 1].next_parsing_to_key; - State[data_pointer].inside_key <== State[data_pointer - 1].next_inside_key; - State[data_pointer].parsing_to_value <== State[data_pointer - 1].next_parsing_to_value; - State[data_pointer].inside_value <== State[data_pointer - 1].next_inside_value; - State[data_pointer].end_of_kv <== State[data_pointer - 1].next_end_of_kv; - // TODO: For the next state, we should use `next_`, this is only to make this compile for now. - State[data_pointer].escaping <== State[data_pointer - 1].escaping; - + State[data_pointer].byte <== data[data_pointer]; + State[data_pointer].tree_depth <== State[data_pointer - 1].next_tree_depth; + State[data_pointer].parsing_key <== State[data_pointer - 1].next_parsing_key; + State[data_pointer].inside_key <== State[data_pointer - 1].next_inside_key; + State[data_pointer].parsing_value <== State[data_pointer - 1].next_parsing_value; + State[data_pointer].inside_value <== State[data_pointer - 1].next_inside_value; // Debugging log("State[", data_pointer, "].tree_depth", "= ", State[data_pointer].tree_depth); - log("State[", data_pointer, "].parsing_to_key", "= ", State[data_pointer].parsing_to_key); + log("State[", data_pointer, "].parsing_key", "= ", State[data_pointer].parsing_key); log("State[", data_pointer, "].inside_key", "= ", State[data_pointer].inside_key); - log("State[", data_pointer, "].parsing_to_value", "= ", State[data_pointer].parsing_to_value); + log("State[", data_pointer, "].parsing_value", "= ", State[data_pointer].parsing_value); log("State[", data_pointer, "].inside_value", "= ", State[data_pointer].inside_value); - log("State[", data_pointer, "].end_of_kv", "= ", State[data_pointer].end_of_kv); log("---"); } diff --git a/circuits/operators.circom b/circuits/operators.circom index 27d6616..d62d5f0 100644 --- a/circuits/operators.circom +++ b/circuits/operators.circom @@ -123,3 +123,23 @@ template Contains(n) { // Apply `not` to this by 1-x out <== 1 - someEqual.out; } + +template ArrayAdd(n) { + signal input lhs[n]; + signal input rhs[n]; + signal output out[n]; + + for(var i = 0; i < n; i++) { + out[i] <== lhs[i] + rhs[i]; + } +} + +template ArrayMul(n) { + signal input lhs[n]; + signal input rhs[n]; + signal output out[n]; + + for(var i = 0; i < n; i++) { + out[i] <== lhs[i] * rhs[i]; + } +} \ No newline at end of file diff --git a/circuits/parser.circom b/circuits/parser.circom index 3eb446d..e4ec021 100644 --- a/circuits/parser.circom +++ b/circuits/parser.circom @@ -22,40 +22,30 @@ State[20]| " | COMPLETE WITH KV PARSING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ State[20].next_tree_depth == 0 | VALID JSON xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx - - - -Notes: -- If there is no comma after leaving a value, then we should not be parsing to key. If anything breaks here, JSON was bad. */ /* TODO */ template StateUpdate() { - signal input byte; - - signal input tree_depth; // STATUS_INDICATOR -- how deep in a JSON branch we are, e.g., `user.balance.value` key should be at depth `3`. - // Should always be greater than or equal to `0` (TODO: implement this constraint). + signal input byte; - signal input parsing_to_key; // BIT_FLAG -- whether we are currently parsing bytes until we find the next key (mutally exclusive with `inside_key` and both `*_value flags). - signal input inside_key; // BIT_FLAG -- whether we are currently inside a key (mutually exclusive with `parsing_to_key` and both `*_value` flags). - - signal input parsing_to_value; // BIT_FLAG -- whether we are currently parsing bytes until we find the next value (mutually exclusive with `inside_value` and both `*_key` flags). - signal input inside_value; // BIT_FLAG -- whether we are currently inside a value (mutually exclusive with `parsing_to_value` and both `*_key` flags). - - signal input escaping; // BIT_FLAG -- whether we have hit an escape ASCII symbol inside of a key or value. + signal input tree_depth; // STATUS_INDICATOR -- how deep in a JSON branch we are, e.g., `user.balance.value` key should be at depth `3`. + // constrainted to be greater than or equal to `0`. + signal input parsing_key; // BIT_FLAG -- whether we are currently parsing bytes until we find the next key (mutally exclusive with `inside_key` and both `*_value flags). + signal input inside_key; // BIT_FLAG -- whether we are currently inside a key (mutually exclusive with `parsing_key` and both `*_value` flags). + signal input parsing_value; // BIT_FLAG -- whether we are currently parsing bytes until we find the next value (mutually exclusive with `inside_value` and both `*_key` flags). + signal input inside_value; // BIT_FLAG -- whether we are currently inside a value (mutually exclusive with `parsing_value` and both `*_key` flags). - signal input end_of_kv; // BIT_FLAG -- reached end of key-value sequence, looking for comma delimiter or end of file signified by `tree_depth == 0`. + signal output next_tree_depth; // STATUS_INDICATOR -- next state for `tree_depth`. + signal output next_parsing_key; // BIT_FLAG -- next state for `parsing_key`. + signal output next_inside_key; // BIT_FLAG -- next state for `inside_key`. + signal output next_parsing_value; // BIT_FLAG -- next state for `parsing_value`. + signal output next_inside_value; // BIT_FLAG -- next state for `inside_value`. - signal output next_tree_depth; // BIT_FLAG -- next state for `tree_depth`. - signal output next_parsing_to_key; // BIT_FLAG -- next state for `parsing_to_key`. - signal output next_inside_key; // BIT_FLAG -- next state for `inside_key`. - signal output next_parsing_to_value; // BIT_FLAG -- next state for `parsing_to_value`. - signal output next_inside_value; // BIT_FLAG -- next state for `inside_value`. - signal output next_end_of_kv; // BIT_FLAG -- next state for `end_of_kv`. - - // signal output escaping; // TODO: Add this in! + // TODO: Add this in! + // signal input escaping; // BIT_FLAG -- whether we have hit an escape ASCII symbol inside of a key or value. + // signal output escaping; //--------------------------------------------------------------------------------------------// //-Delimeters---------------------------------------------------------------------------------// @@ -85,51 +75,67 @@ template StateUpdate() { var escape = 92; //--------------------------------------------------------------------------------------------// - //--------------------------------------------------------------------------------------------// - //-MACHINE INSTRUCTIONS-----------------------------------------------------------------------// // TODO: ADD CASE FOR `is_number` for in range 48-57 https://www.ascii-code.com since a value may just be a number - // Output management - component matcher = Switch(8, 3); - var do_nothing[3] = [ 0, 0, 0]; // Command returned by switch if we want to do nothing, e.g. read a whitespace char while looking for a key - var increase_depth[3] = [ 1, 0, 0]; // Command returned by switch if we hit a start brace `{` - var decrease_depth[3] = [-1, 0, 0]; // Command returned by switch if we hit a end brace `}` - var hit_quote[3] = [ 0, 1, 0]; // Command returned by switch if we hit a quote `"` - var hit_colon[3] = [ 0, 0, 1]; // Command returned by switch if we hit a colon `:` - - matcher.branches <== [start_brace, end_brace, quote, colon, start_bracket, end_bracket, comma, escape ]; - matcher.vals <== [increase_depth, decrease_depth, hit_quote, hit_colon, do_nothing, do_nothing, do_nothing, do_nothing]; - matcher.case <== byte; - - - // TODO: These could likely go into a switch statement with the output of the `Switch` above. - // TODO: Also could probably clean up things with de Morgan's laws or whatever. - // An `IF ELSE` template would also be handy! - next_inside_key <== inside_key + (parsing_to_key - inside_key) * matcher.out[1]; // IF (`parsing_to_key` AND `hit_quote`) THEN `next_inside_key <== 1` ELSEIF (`inside_key` AND `hit_quote`) THEN `next_inside_key <== 0` - // - note: can rewrite as -> `inside_key * (1-matcher.out[1]) + parsing_to_key * matcher.out[1]`, but this will not be quadratic (according to circom) - next_parsing_to_key <== parsing_to_key * (1 - matcher.out[1]); // IF (`parsing_to_key` AND `hit_quote`) THEN `parsing_to_key <== 0` - - next_inside_value <== inside_value + (parsing_to_value - inside_value) * matcher.out[1]; // IF (`parsing_to_value` AND `hit_quote`) THEN `next_inside_value <== 1` ELSEIF (`inside_value` AND `hit_quote`) THEN `next_inside_value <==0` - // -note: can rewrite as -> `(1 - inside_value) * matcher_out[1] + parsing_to_value * matcher.out[1] + //--------------------------------------------------------------------------------------------// + //-Instructions for ASCII---------------------------------------------------------------------// + var state[5] = [tree_depth, parsing_key, inside_key, parsing_value, inside_value]; + var do_nothing[5] = [ 0, 0, 0, 0, 0 ]; // Command returned by switch if we want to do nothing, e.g. read a whitespace char while looking for a key + var hit_start_brace[5] = [ 1, 1, 0, -1, 0 ]; // Command returned by switch if we hit a start brace `{` + var hit_end_brace[5] = [-1, 0, 0, 0, 0 ]; // Command returned by switch if we hit a end brace `}` + var hit_quote[5] = [ 0, 0, 1, 0, 1 ]; // Command returned by switch if we hit a quote `"` + var hit_colon[5] = [ 0, -1, 0, 1, 0 ]; // Command returned by switch if we hit a colon `:` + var hit_comma[5] = [ 0, 1, 0, -1, 0 ]; // Command returned by switch if we hit a comma `,` + //--------------------------------------------------------------------------------------------// - signal NOT_PARSING_TO_KEY_AND_NOT_INSIDE_KEY <== (1 - parsing_to_key) * (1 - inside_key); // (NOT `parsing_to_key`) AND (NOT `inside_key`) - signal PARSING_TO_VALUE_AND_NOT_HIT_QUOTE <== parsing_to_value * (1 - matcher.out[1]); // `parsing_to_value` AND (NOT `hit_quote`) - next_parsing_to_value <== PARSING_TO_VALUE_AND_NOT_HIT_QUOTE + NOT_PARSING_TO_KEY_AND_NOT_INSIDE_KEY * matcher.out[2]; // IF (`parsing_to_value` AND (NOT `hit_quote`)) THEN `next_parsing_to_value <== 1 ELSEIF ((NOT `parsing_to_value` AND (NOT `inside_value)) AND `hit_colon`) THEN `next_parsing_to_value <== 1` - - signal NOT_PARSING_TO_VALUE_AND_NOT_INSIDE_VALUE <== (1 - parsing_to_value) * (1 - inside_value); // (NOT `parsing_to_value`) AND (NOT `inside_value`) - next_end_of_kv <== NOT_PARSING_TO_KEY_AND_NOT_INSIDE_KEY * NOT_PARSING_TO_VALUE_AND_NOT_INSIDE_VALUE; // IF ((NOT `parsing_to_key`) AND (NOT `inside_key`)) AND (NOT(`parsing_to_value`) AND NOT( `inside_value)) THEN `next_end_of_kv <== 1` - - - // TODO: Assert this never goes below zero (mod p) - next_tree_depth <== tree_depth + (parsing_to_key + next_end_of_kv) * matcher.out[0]; // IF ((`parsing_to_key` OR `next_end_of_kv`) AND `read_brace` THEN `increase/decrease_depth` + //--------------------------------------------------------------------------------------------// + //-State machine updating---------------------------------------------------------------------// + // * yield instruction based on what byte we read * + component matcher = Switch(5, 5); + matcher.branches <== [start_brace, end_brace, quote, colon, comma ]; + matcher.vals <== [hit_start_brace, hit_end_brace, hit_quote, hit_colon, hit_comma]; + matcher.case <== byte; + // * get the instruction mask based on current state * + component mask = StateToMask(); + mask.state <== state; + // * multiply the mask array elementwise with the instruction array * + component mulMaskAndOut = ArrayMul(5); + mulMaskAndOut.lhs <== mask.mask; + mulMaskAndOut.rhs <== matcher.out; + // * add the masked instruction to the state to get new state * + component addToState = ArrayAdd(5); + addToState.lhs <== state; + addToState.rhs <== mulMaskAndOut.out; + // * set the new state * + next_tree_depth <== addToState.out[0]; + next_parsing_key <== addToState.out[1]; + next_inside_key <== addToState.out[2]; + next_parsing_value <== addToState.out[3]; + next_inside_value <== addToState.out[4]; + //--------------------------------------------------------------------------------------------// - // Constrain bit flags - next_parsing_to_key * (1 - next_parsing_to_key) === 0; // - constrain that `next_parsing_to_key` remain a bit flag - next_inside_key * (1 - next_inside_key) === 0; // - constrain that `next_inside_key` remain a bit flag - next_parsing_to_value * (1 - next_parsing_to_value) === 0; // - constrain that `next_parsing_to_value` remain a bit flag - next_inside_value * (1 - next_inside_value) === 0; // - constrain that `next_inside_value` remain a bit flag - next_end_of_kv * (1 - next_end_of_kv) === 0; // - constrain that `next_end_of_kv` remain a bit flag + //--------------------------------------------------------------------------------------------// + // // DEBUGGING: internal state + // for(var i = 0; i<5; i++) { + // log("-----------------------"); + // log("mask[",i,"]: ", mask.mask[i]); + // log("mulMaskAndOut[",i,"]:", mulMaskAndOut.out[i]); + // log("state[",i,"]: ", state[i]); + // log("next_state[",i,"]: ", addToState.out[i]); + // } + //--------------------------------------------------------------------------------------------// - // TODO: Can hit comma and then be sent to next KV, so comma will engage `parsing_to_key` + //--------------------------------------------------------------------------------------------// + //-Constraints--------------------------------------------------------------------------------// + // * constrain bit flags * + next_parsing_key * (1 - next_parsing_key) === 0; // - constrain that `next_parsing_key` remain a bit flag + next_inside_key * (1 - next_inside_key) === 0; // - constrain that `next_inside_key` remain a bit flag + next_parsing_value * (1 - next_parsing_value) === 0; // - constrain that `next_parsing_value` remain a bit flag + next_inside_value * (1 - next_inside_value) === 0; // - constrain that `next_inside_value` remain a bit flag + // * constrain `tree_depth` to never hit -1 (TODO: should always moves in 1 bit increments?) + component isMinusOne = IsEqual(); + isMinusOne.in[0] <== -1; + isMinusOne.in[1] <== next_tree_depth; + isMinusOne.out === 0; //--------------------------------------------------------------------------------------------// } @@ -177,4 +183,37 @@ template Switch(m, n) { match <== matchChecker.out; out <== sum; +} + +// TODO: Note at the moment mask 2 and 4 are the same, so this can be removed if it maintains. +template StateToMask() { + signal input state[5]; + signal output mask[5]; + + var tree_depth = state[0]; + var parsing_key = state[1]; + var inside_key = state[2]; + var parsing_value = state[3]; + var inside_value = state[4]; + + signal NOT_INSIDE_KEY_AND_NOT_INSIDE_VALUE <== (1 - inside_key) * (1 - inside_value); + signal NOT_PARSING_VALUE_NOT_INSIDE_VALUE <== (1 - parsing_value) * (1 - inside_value); + + component init_tree = IsZero(); + init_tree.in <== tree_depth; + + // `tree_depth` can change: `IF (parsing_key XOR parsing_value XOR end_of_kv)` + mask[0] <== init_tree.out + parsing_key + parsing_value; // TODO: Make sure these are never both 1! + + // `parsing_key` can change: `IF ((NOT inside_key) AND (NOT inside_value) AND (NOT parsing_value))` + mask[1] <== NOT_INSIDE_KEY_AND_NOT_INSIDE_VALUE; + + // `inside_key` can change: `IF ((NOT parsing_value) AND (NOT inside_value) AND inside_key) THEN mask <== -1 ELSEIF (NOT parsing_value) AND (NOT inside_value) THEN mask <== 1` + mask[2] <== NOT_PARSING_VALUE_NOT_INSIDE_VALUE - 2 * inside_key; + + // `parsing_value` can change: `IF ((NOT inside_key) AND (NOT inside_value) AND (tree_depth != 0))` + mask[3] <== NOT_INSIDE_KEY_AND_NOT_INSIDE_VALUE * (1 - init_tree.out); + + // `inside_value` can change: `IF (parsing_value AND (NOT inside_value)) THEN mask <== 1 ELSEIF (inside_value) mask <== -1` + mask[4] <== parsing_value - 2 * inside_value; } \ No newline at end of file diff --git a/circuits/test/operators.test.ts b/circuits/test/operators.test.ts index 378708b..50baec2 100644 --- a/circuits/test/operators.test.ts +++ b/circuits/test/operators.test.ts @@ -158,4 +158,44 @@ describe("operators", () => { }); }); + + describe("ArrayAdd", () => { + let circuit: WitnessTester<["lhs", "rhs"], ["out"]>; + before(async () => { + circuit = await circomkit.WitnessTester(`ArrayAdd`, { + file: "circuits/operators", + template: "ArrayAdd", + params: [3], + }); + console.log("#constraints:", await circuit.getConstraintCount()); + }); + + it("witness: lhs = [0,1,2], rhs = [3,5,7]", async () => { + await circuit.expectPass( + { lhs: [0, 1, 2], rhs: [3, 5, 7] }, + { out: [3, 6, 9] } + ); + }); + + }); + + describe("ArrayMul", () => { + let circuit: WitnessTester<["lhs", "rhs"], ["out"]>; + before(async () => { + circuit = await circomkit.WitnessTester(`ArrayMul`, { + file: "circuits/operators", + template: "ArrayMul", + params: [3], + }); + console.log("#constraints:", await circuit.getConstraintCount()); + }); + + it("witness: lhs = [0,1,2], rhs = [3,5,7]", async () => { + await circuit.expectPass( + { lhs: [0, 1, 2], rhs: [3, 5, 7] }, + { out: [0, 5, 14] } + ); + }); + + }); }); diff --git a/circuits/test/parser.test.ts b/circuits/test/parser.test.ts index 5f3488d..3e6fbf2 100644 --- a/circuits/test/parser.test.ts +++ b/circuits/test/parser.test.ts @@ -1,3 +1,4 @@ +import { start } from "repl"; import { circomkit, WitnessTester } from "./common"; describe("parser", () => { @@ -56,59 +57,213 @@ describe("parser", () => { }); + //--------------------------------------------------------------------------------------------// + //-Delimeters---------------------------------------------------------------------------------// + // - ASCII char: `{` + const start_brace = 123; + // - ASCII char: `}` + const end_brace = 125; + // - ASCII char `[` + const start_bracket = 91; + // - ASCII char `]` + const end_bracket = 93; + // - ASCII char `"` + const quote = 34; + // - ASCII char `:` + const colon = 58; + // - ASCII char `,` + const comma = 44; + //--------------------------------------------------------------------------------------------// + // White space + // - ASCII char: `\n` + const newline = 10; + // - ASCII char: ` ` + const space = 32; + //--------------------------------------------------------------------------------------------// + // Escape + // - ASCII char: `\` + const escape = 92; + //--------------------------------------------------------------------------------------------// + describe("StateUpdate", () => { let circuit: WitnessTester< - ["byte", "tree_depth", "parsing_to_key", "inside_key", "parsing_to_value", "inside_value", "escaping", "end_of_kv"], - ["next_tree_depth", "next_parsing_to_key", "next_inside_key", "next_parsing_to_value", "next_inside_value", "next_end_of_kv"] + ["byte", "tree_depth", "parsing_key", "inside_key", "parsing_value", "inside_value", "end_of_kv"], + ["next_tree_depth", "next_parsing_key", "next_inside_key", "next_parsing_value", "next_inside_value", "next_end_of_kv"] >; - function generateTestCase(input: any, expected: any) { + function generatePassCase(input: any, expected: any, desc: string) { const description = Object.entries(input) .map(([key, value]) => `${key} = ${value}`) .join(", "); - it(`witness: ${description}`, async () => { + it(`(valid) witness: ${description}\n${desc}`, async () => { await circuit.expectPass(input, expected); }); } + function generateFailCase(input: any, desc: string) { + const description = Object.entries(input) + .map(([key, value]) => `${key} = ${value}`) + .join(", "); + + it(`(invalid) witness: ${description}\n${desc}`, async () => { + await circuit.expectFail(input); + }); + } + before(async () => { circuit = await circomkit.WitnessTester(`StateUpdate`, { file: "circuits/parser", template: "StateUpdate", }); console.log("#constraints:", await circuit.getConstraintCount()); + }); let init = { byte: 0, tree_depth: 0, - parsing_to_key: 1, + parsing_key: 0, inside_key: 0, - parsing_to_value: 0, + parsing_value: 0, inside_value: 0, - escaping: 0, - end_of_kv: 0, }; - - // Test 1: init setup -> `do_nothing` byte let out = { next_tree_depth: init.tree_depth, - next_parsing_to_key: init.parsing_to_key, + next_parsing_key: init.parsing_key, next_inside_key: init.inside_key, - next_parsing_to_value: init.parsing_to_value, + next_parsing_value: init.parsing_value, next_inside_value: init.inside_value, - next_end_of_kv: init.end_of_kv }; - generateTestCase(init, out); + + // Test 1: init setup -> `do_nothing` byte + generatePassCase(init, out, ">>>> `NUL` read"); // Test 2: init setup -> `{` is read - let read_start_brace = init; - read_start_brace.byte = 123; - let read_start_brace_out = out; + let read_start_brace = { ...init }; + read_start_brace.byte = start_brace; + let read_start_brace_out = { ...out }; read_start_brace_out.next_tree_depth = 1; - generateTestCase(read_start_brace, read_start_brace_out); + read_start_brace_out.next_parsing_key = 1; + generatePassCase(read_start_brace, read_start_brace_out, ">>>> `{` read"); + + // Test 3: init setup -> `}` is read (should be INVALID) + let read_end_brace = { ...init }; + read_end_brace.byte = end_brace; + generateFailCase(read_end_brace, ">>>> `}` read --> (FAIL! NEGATIVE TREE DEPTH!)"); + + // Test 4: `tree_depth == 1` and `parsing_key == 1` setup -> `"` is read + let in_tree_find_key = { ...init }; + in_tree_find_key.tree_depth = 1; + in_tree_find_key.parsing_key = 1; + in_tree_find_key.byte = quote; + let in_tree_find_key_out = { ...out }; + in_tree_find_key_out.next_parsing_key = 1; + in_tree_find_key_out.next_inside_key = 1; + in_tree_find_key_out.next_tree_depth = 1; + generatePassCase(in_tree_find_key, in_tree_find_key_out, ">>>> `\"` read"); + + // Test 5: `tree_depth == 1` AND `inside_key ==1` setup -> ` ` is read + let in_key = { ...init }; + in_key.tree_depth = 1; + in_key.inside_key = 1; + in_key.byte = space; + let in_key_out = { ...out }; + in_key_out.next_inside_key = 1; + in_key_out.next_tree_depth = 1; + generatePassCase(in_key, in_key_out, ">>>> ` ` read"); + + // Test 6: `tree_depth == 1` AND `inside_key == 1 AND `parsing_key == 0` setup -> `"` is read + let in_key_to_exit = { ...init }; + in_key_to_exit.tree_depth = 1; + in_key_to_exit.inside_key = 1; + in_key_to_exit.byte = quote; + let in_key_to_exit_out = { ...out }; + in_key_to_exit_out.next_tree_depth = 1; + generatePassCase(in_key_to_exit, in_key_to_exit_out, "`\"` read"); + + // Test 7: `tree_depth == 1` AND parsed through key` setup -> `:` is read + let parsed_key_wait_to_parse_value = { ...init }; + parsed_key_wait_to_parse_value.tree_depth = 1; + parsed_key_wait_to_parse_value.parsing_key = 1; + parsed_key_wait_to_parse_value.byte = colon; + let parsed_key_wait_to_parse_value_out = { ...out }; + parsed_key_wait_to_parse_value_out.next_tree_depth = 1; + parsed_key_wait_to_parse_value_out.next_parsing_value = 1; + generatePassCase(parsed_key_wait_to_parse_value, parsed_key_wait_to_parse_value_out, ">>>> `:` read"); + + // Test 8: `tree_depth == 1` AND parsing_value == 1` setup -> `"` is read + let in_tree_find_value = { ...init }; + in_tree_find_value.tree_depth = 1; + in_tree_find_value.parsing_value = 1; + in_tree_find_value.byte = quote; + let in_tree_find_value_out = { ...out }; + in_tree_find_value_out.next_tree_depth = 1; + in_tree_find_value_out.next_inside_value = 1; + in_tree_find_value_out.next_parsing_value = 1; + generatePassCase(in_tree_find_value, in_tree_find_value_out, ">>>> `\"` read"); + + // Test 9: `tree_depth == 1` AND inside_value` setup -> ` ` is read + let in_value = { ...init }; + in_value.tree_depth = 1; + in_value.inside_value = 1; + in_value.byte = space; + let in_value_out = { ...out }; + in_value_out.next_tree_depth = 1; + in_value_out.next_inside_value = 1; + generatePassCase(in_value, in_value_out, ">>>> ` ` is read"); + + // Test 10: `tree_depth == 1` AND inside_value` setup -> `"` is read + let in_value_to_exit = { ...init }; + in_value_to_exit.tree_depth = 1; + in_value_to_exit.parsing_value = 1; + in_value_to_exit.inside_value = 1; + in_value_to_exit.byte = quote; + let in_value_to_exit_out = { ...out }; + in_value_to_exit_out.next_tree_depth = 1; + // in_value_to_exit_out.next_end_of_kv = 1; + in_value_to_exit_out.next_parsing_value = 1; + generatePassCase(in_value_to_exit, in_value_to_exit_out, ">>>> `\"` is read"); + + // Test 11: `tree_depth == 1` AND end_of_kv` setup -> ` ` is read + let in_end_of_kv = { ...init }; + in_end_of_kv.tree_depth = 1; + in_end_of_kv.byte = space; + let in_end_of_kv_out = { ...out }; + in_end_of_kv_out.next_tree_depth = 1; + generatePassCase(in_end_of_kv, in_end_of_kv_out, ">>>> ` ` is read"); + + // Test 12: `tree_depth == 1` AND end_of_kv` setup -> `,` is read + let end_of_kv_to_parse_to_key = { ...init }; + end_of_kv_to_parse_to_key.tree_depth = 1; + end_of_kv_to_parse_to_key.parsing_value = 1; + // end_of_kv_to_parse_to_key.end_of_kv = 1; + end_of_kv_to_parse_to_key.byte = comma; + let end_of_kv_to_parse_to_key_out = { ...out }; + end_of_kv_to_parse_to_key_out.next_tree_depth = 1; + end_of_kv_to_parse_to_key_out.next_parsing_key = 1; + generatePassCase(end_of_kv_to_parse_to_key, end_of_kv_to_parse_to_key_out, ">>>> ` ` is read"); + + // Test 13: `tree_depth == 1` AND end_of_kv` setup -> `}` is read + let end_of_kv_to_exit_json = { ...init }; + end_of_kv_to_exit_json.tree_depth = 1; + end_of_kv_to_exit_json.parsing_value = 1; + end_of_kv_to_exit_json.byte = end_brace; + let end_of_kv_to_exit_json_out = { ...out }; + end_of_kv_to_exit_json_out.next_parsing_value = 1; + generatePassCase(end_of_kv_to_exit_json, end_of_kv_to_exit_json_out, ">>>> `}` is read"); + + // NOTE: At this point, we can parse JSON that has 2 keys at depth 1! + // Test 14: `tree_depth == 1` AND parsing_value` setup -> `{` is read + let end_of_key_to_inner_object = { ...init }; + end_of_key_to_inner_object.tree_depth = 1; + end_of_key_to_inner_object.parsing_value = 1; + end_of_key_to_inner_object.byte = start_brace; + let end_of_key_to_inner_object_out = { ...out }; + end_of_key_to_inner_object_out.next_tree_depth = 2; + end_of_key_to_inner_object_out.next_parsing_key = 1; + generatePassCase(end_of_key_to_inner_object, end_of_key_to_inner_object_out, ">>>> `{` is read"); }); }); diff --git a/create_witness/src/main.rs b/create_witness/src/main.rs index 469478a..dbe494b 100644 --- a/create_witness/src/main.rs +++ b/create_witness/src/main.rs @@ -9,7 +9,7 @@ pub const KEYS: &[&[u8]] = &[ b"\"title\"".as_slice(), ]; // pub const DATA: &[u8] = include_bytes!("../../json_examples/example.json"); -pub const DATA: &[u8] = include_bytes!("../../json_examples/test.json"); +pub const DATA: &[u8] = include_bytes!("../../json_examples/sambhav_example.json"); #[derive(serde::Serialize)] pub struct Witness { @@ -59,7 +59,7 @@ pub fn main() { // num_data_bytes: DATA.len(), // For now we can set this to be the same data: DATA.to_vec(), }; - let mut file = std::fs::File::create("inputs/test_extract/input.json").unwrap(); + let mut file = std::fs::File::create("inputs/test_extract_sambhav/input.json").unwrap(); file.write_all(serde_json::to_string_pretty(&witness).unwrap().as_bytes()) .unwrap(); } diff --git a/inputs/test_extract_depth/input.json b/inputs/test_extract_depth/input.json new file mode 100644 index 0000000..f4608b1 --- /dev/null +++ b/inputs/test_extract_depth/input.json @@ -0,0 +1,74 @@ +{ + "key": [ + 107, + 101, + 121, + 49 + ], + "data": [ + 123, + 10, + 32, + 32, + 32, + 32, + 34, + 107, + 101, + 121, + 49, + 34, + 58, + 32, + 34, + 97, + 98, + 99, + 34, + 44, + 10, + 32, + 32, + 32, + 32, + 34, + 107, + 101, + 121, + 50, + 34, + 58, + 32, + 123, + 10, + 32, + 32, + 32, + 32, + 32, + 32, + 32, + 32, + 34, + 107, + 101, + 121, + 51, + 34, + 58, + 32, + 34, + 100, + 101, + 102, + 34, + 10, + 32, + 32, + 32, + 32, + 125, + 10, + 125 + ] +} \ No newline at end of file diff --git a/inputs/test_extract_sambhav/input.json b/inputs/test_extract_sambhav/input.json new file mode 100644 index 0000000..9e6cbba --- /dev/null +++ b/inputs/test_extract_sambhav/input.json @@ -0,0 +1,115 @@ +{ + "key": [ + 107, + 101, + 121, + 49 + ], + "data": [ + 123, + 10, + 32, + 32, + 32, + 32, + 34, + 101, + 120, + 116, + 114, + 97, + 99, + 116, + 34, + 58, + 32, + 123, + 10, + 32, + 32, + 32, + 32, + 32, + 32, + 32, + 32, + 34, + 102, + 105, + 108, + 101, + 34, + 58, + 32, + 34, + 101, + 120, + 116, + 114, + 97, + 99, + 116, + 34, + 44, + 10, + 32, + 32, + 32, + 32, + 32, + 32, + 32, + 32, + 34, + 116, + 101, + 109, + 112, + 108, + 97, + 116, + 101, + 34, + 58, + 32, + 34, + 69, + 120, + 116, + 114, + 97, + 99, + 116, + 34, + 44, + 10, + 32, + 32, + 32, + 32, + 32, + 32, + 32, + 32, + 34, + 112, + 97, + 114, + 97, + 109, + 115, + 34, + 58, + 32, + 34, + 34, + 10, + 32, + 32, + 32, + 32, + 125, + 10, + 125 + ] +} \ No newline at end of file diff --git a/inputs/test_extract_two_key/input.json b/inputs/test_extract_two_key/input.json new file mode 100644 index 0000000..619812a --- /dev/null +++ b/inputs/test_extract_two_key/input.json @@ -0,0 +1,50 @@ +{ + "key": [ + 107, + 101, + 121, + 49 + ], + "data": [ + 123, + 10, + 32, + 32, + 32, + 32, + 34, + 107, + 101, + 121, + 49, + 34, + 58, + 32, + 34, + 97, + 98, + 99, + 34, + 44, + 10, + 32, + 32, + 32, + 32, + 34, + 107, + 101, + 121, + 50, + 34, + 58, + 32, + 34, + 100, + 101, + 102, + 34, + 10, + 125 + ] +} \ No newline at end of file diff --git a/json_examples/reddit_response.json b/json_examples/reddit_response.json new file mode 100644 index 0000000..163e6b4 --- /dev/null +++ b/json_examples/reddit_response.json @@ -0,0 +1,14 @@ +{ + "data": { + "redditorInfoByName": { + "id": "t2_bepsb", + "karma": { + "fromAwardsGiven": 0, + "fromAwardsReceived": 470, + "fromComments": 9583, + "fromPosts": 13228, + "total": 23281 + } + } + } +} \ No newline at end of file diff --git a/json_examples/sambhav_example.json b/json_examples/sambhav_example.json new file mode 100644 index 0000000..ab9c57e --- /dev/null +++ b/json_examples/sambhav_example.json @@ -0,0 +1,7 @@ +{ + "extract": { + "file": "extract", + "template": "Extract", + "params": "" + } +} \ No newline at end of file diff --git a/json_examples/test_depth.json b/json_examples/test_depth.json new file mode 100644 index 0000000..ad2f816 --- /dev/null +++ b/json_examples/test_depth.json @@ -0,0 +1,6 @@ +{ + "key1": "abc", + "key2": { + "key3": "def" + } +} \ No newline at end of file diff --git a/json_examples/test_two_key.json b/json_examples/test_two_key.json new file mode 100644 index 0000000..437a6c5 --- /dev/null +++ b/json_examples/test_two_key.json @@ -0,0 +1,4 @@ +{ + "key1": "abc", + "key2": "def" +} \ No newline at end of file From 9b6f6945347e75ae7e59941341f15b5fd68d6bc3 Mon Sep 17 00:00:00 2001 From: Colin Roberts Date: Thu, 15 Aug 2024 16:36:19 -0600 Subject: [PATCH 2/2] feat: upgrade input JSON creation (#14) * add: `reddit_response.json` * refactor tests + add failing case * easier fix * test: parse to key * tests: key parsing * bug: `next_end_of_kv` on read `:` * fix: `end_of_kv` bug * test: find value * tests: `inside_value` and `inside_value_to_exit` * test: parse to NEXT key * parses JSON with two string keys * WIP: value inside value * comment * refactor (#10) * wip: start with bitmask * WIP: time to start testing * tests: `ArrayAdd` and `ArrayMul` * tests passing * update comments * feat: 2 key depth 1 json * 2 kv json and all tests passing * nested json works!!! * reduce constraints * cleanup * rename variables * more cleaning * more cleanup * make comments clean * WAYLON NITPICKING ME LOL * feat: improved CLI for witness * gitignore input.json * Update main.rs * Squashed commit of the following: --- .gitignore | 3 +- Cargo.lock | 220 +++++++- Cargo.toml | 11 +- create_witness/Cargo.toml | 7 - create_witness/src/main.rs | 96 ---- inputs/extract/input.json | 803 ---------------------------- inputs/test_extract/input.json | 31 -- inputs/test_extract_hard/input.json | 58 -- src/item.rs | 37 -- src/lib.rs | 122 ----- src/main.rs | 66 +++ 11 files changed, 282 insertions(+), 1172 deletions(-) delete mode 100644 create_witness/Cargo.toml delete mode 100644 create_witness/src/main.rs delete mode 100644 inputs/extract/input.json delete mode 100644 inputs/test_extract/input.json delete mode 100644 inputs/test_extract_hard/input.json delete mode 100644 src/item.rs delete mode 100644 src/lib.rs create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore index 5b586b4..40d4c9c 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ node_modules/* build/* ptau/* circuits/test/*.circom -circuits/main/* \ No newline at end of file +circuits/main/* +inputs/**/*.json \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 256c9d6..2479f30 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,16 +3,111 @@ version = 3 [[package]] -name = "create-witness" -version = "0.0.0" +name = "anstream" +version = "0.6.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" dependencies = [ - "serde", - "serde_json", + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" + +[[package]] +name = "anstyle-parse" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "clap" +version = "4.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c937d4061031a6d0c8da4b9a4f98a172fc2976dfb1c19213a9cf7d0d3c837e36" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85379ba512b21a328adf887e85f7742d12e96eb31f3ef077df4ffc26b506ffed" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", ] [[package]] -name = "extractor" -version = "0.1.0" +name = "clap_derive" +version = "4.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" + +[[package]] +name = "colorchoice" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] name = "itoa" @@ -20,6 +115,12 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + [[package]] name = "proc-macro2" version = "1.0.86" @@ -46,18 +147,18 @@ checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "serde" -version = "1.0.204" +version = "1.0.205" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" +checksum = "e33aedb1a7135da52b7c21791455563facbbcc43d0f0f66165b42c21b3dfb150" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.204" +version = "1.0.205" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" +checksum = "692d6f5ac90220161d6774db30c662202721e64aed9058d2c394f451261420c1" dependencies = [ "proc-macro2", "quote", @@ -66,15 +167,22 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.120" +version = "1.0.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5" +checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "syn" version = "2.0.72" @@ -91,3 +199,91 @@ name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "witness" +version = "0.0.0" +dependencies = [ + "clap", + "serde", + "serde_json", +] diff --git a/Cargo.toml b/Cargo.toml index 5a51ecc..25d2171 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,8 @@ -[workspace] -members = ["create_witness"] - [package] -name = "extractor" -version = "0.1.0" +name = "witness" edition = "2021" + +[dependencies] +serde = { version = "1.0.204", features = ["derive"] } +serde_json = "1.0.120" +clap = { version = "4.5.14", features = ["derive"] } diff --git a/create_witness/Cargo.toml b/create_witness/Cargo.toml deleted file mode 100644 index ffa0959..0000000 --- a/create_witness/Cargo.toml +++ /dev/null @@ -1,7 +0,0 @@ -[package] -name = "create-witness" -edition = "2021" - -[dependencies] -serde = { version = "1.0.204", features = ["derive"] } -serde_json = "1.0.120" diff --git a/create_witness/src/main.rs b/create_witness/src/main.rs deleted file mode 100644 index dbe494b..0000000 --- a/create_witness/src/main.rs +++ /dev/null @@ -1,96 +0,0 @@ -use std::io::Write; - -// pub const KEY: &[u8] = b"\"glossary\"".as_slice(); -pub const KEY: &[u8] = b"key1".as_slice(); - -pub const KEYS: &[&[u8]] = &[ - b"\"glossary\"".as_slice(), - b"\"GlossDiv\"".as_slice(), - b"\"title\"".as_slice(), -]; -// pub const DATA: &[u8] = include_bytes!("../../json_examples/example.json"); -pub const DATA: &[u8] = include_bytes!("../../json_examples/sambhav_example.json"); - -#[derive(serde::Serialize)] -pub struct Witness { - // num_keys: usize, - // key_sizes: Vec, - // keys: Vec>, - // num_key_bytes: usize, - key: Vec, - // num_data_bytes: usize, - data: Vec, -} - -pub fn main() { - // Properly serialize information about the keys we want to extract - // let mut max_num_keys = 0; - // let mut max_num_key_bytes = 0; - // let mut key_sizes = vec![]; - // let mut keys = vec![]; - // for &key in KEYS { - // let key_len = key.len(); - // key_sizes.push(key_len); - // if key_len > max_num_key_bytes { - // max_num_key_bytes = key_len; - // } - // keys.push(key.to_vec()); - // max_num_keys += 1; - // } - // println!("MAX_NUM_KEYS: {max_num_keys}"); - // println!("MAX_NUM_KEY_BYTES: {max_num_key_bytes}"); - - // Enforce that each key comes in as af fixed length (TODO: we need to make sure we encode this somehow, perhaps we pass in a vector of key lengths) - // for key in &mut keys { - // key.extend(vec![0; max_num_key_bytes - key.len()]); - // } - - // Properly serialize information about the data we extract from - println!("NUM_KEY_BYTES: {}", KEY.len()); - println!("NUM_DATA_BYTES: {}", DATA.len()); - - // Create a witness file as `input.json` - let witness = Witness { - // num_keys: max_num_keys, // For now we can set this to be the same - // key_sizes, - // keys, - // num_key_bytes: KEY.len(), - key: KEY.to_vec(), - // num_data_bytes: DATA.len(), // For now we can set this to be the same - data: DATA.to_vec(), - }; - let mut file = std::fs::File::create("inputs/test_extract_sambhav/input.json").unwrap(); - file.write_all(serde_json::to_string_pretty(&witness).unwrap().as_bytes()) - .unwrap(); -} - -// fn get_bits(bytes: &[u8]) -> Vec { -// bytes -// .iter() -// .flat_map(|&byte| { -// (0..8) -// .rev() -// .map(move |i| ((byte.to_be_bytes()[0] >> i) & 1) == 1) // ensure this is all big-endian -// }) -// .collect() -// } - -// #[cfg(test)] -// mod tests { -// use super::*; -// // Use example.json which has first two ASCII chars: `{` and `\n` -// // ASCII code for `{` 01111011 -// // ASCII code for `\n` 00001010 -// #[test] -// fn test_get_bits() { -// let bits = get_bits(DATA); -// #[allow(clippy::inconsistent_digit_grouping)] -// let compare_bits: Vec = vec![0, 1, 1, 1, 1, 0, 1, 1_, 0, 0, 0, 0, 1, 0, 1, 0] -// .into_iter() -// .map(|x| x == 1) -// .collect(); -// bits.iter() -// .zip(compare_bits.iter()) -// .for_each(|(x, y)| assert_eq!(x, y)); -// } -// } diff --git a/inputs/extract/input.json b/inputs/extract/input.json deleted file mode 100644 index 556be54..0000000 --- a/inputs/extract/input.json +++ /dev/null @@ -1,803 +0,0 @@ -{ - "key": [ - 34, - 103, - 108, - 111, - 115, - 115, - 97, - 114, - 121, - 34 - ], - "data": [ - 123, - 10, - 32, - 32, - 32, - 32, - 34, - 103, - 108, - 111, - 115, - 115, - 97, - 114, - 121, - 34, - 58, - 32, - 123, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 116, - 105, - 116, - 108, - 101, - 34, - 58, - 32, - 34, - 101, - 120, - 97, - 109, - 112, - 108, - 101, - 32, - 103, - 108, - 111, - 115, - 115, - 97, - 114, - 121, - 34, - 44, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 71, - 108, - 111, - 115, - 115, - 68, - 105, - 118, - 34, - 58, - 32, - 123, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 116, - 105, - 116, - 108, - 101, - 34, - 58, - 32, - 34, - 83, - 34, - 44, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 71, - 108, - 111, - 115, - 115, - 76, - 105, - 115, - 116, - 34, - 58, - 32, - 123, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 71, - 108, - 111, - 115, - 115, - 69, - 110, - 116, - 114, - 121, - 34, - 58, - 32, - 123, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 73, - 68, - 34, - 58, - 32, - 34, - 83, - 71, - 77, - 76, - 34, - 44, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 83, - 111, - 114, - 116, - 65, - 115, - 34, - 58, - 32, - 34, - 83, - 71, - 77, - 76, - 34, - 44, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 71, - 108, - 111, - 115, - 115, - 84, - 101, - 114, - 109, - 34, - 58, - 32, - 34, - 83, - 116, - 97, - 110, - 100, - 97, - 114, - 100, - 32, - 71, - 101, - 110, - 101, - 114, - 97, - 108, - 105, - 122, - 101, - 100, - 32, - 77, - 97, - 114, - 107, - 117, - 112, - 32, - 76, - 97, - 110, - 103, - 117, - 97, - 103, - 101, - 34, - 44, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 65, - 99, - 114, - 111, - 110, - 121, - 109, - 34, - 58, - 32, - 34, - 83, - 71, - 77, - 76, - 34, - 44, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 65, - 98, - 98, - 114, - 101, - 118, - 34, - 58, - 32, - 34, - 73, - 83, - 79, - 32, - 56, - 56, - 55, - 57, - 58, - 49, - 57, - 56, - 54, - 34, - 44, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 71, - 108, - 111, - 115, - 115, - 68, - 101, - 102, - 34, - 58, - 32, - 123, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 112, - 97, - 114, - 97, - 34, - 58, - 32, - 34, - 65, - 32, - 109, - 101, - 116, - 97, - 45, - 109, - 97, - 114, - 107, - 117, - 112, - 32, - 108, - 97, - 110, - 103, - 117, - 97, - 103, - 101, - 44, - 32, - 117, - 115, - 101, - 100, - 32, - 116, - 111, - 32, - 99, - 114, - 101, - 97, - 116, - 101, - 32, - 109, - 97, - 114, - 107, - 117, - 112, - 32, - 108, - 97, - 110, - 103, - 117, - 97, - 103, - 101, - 115, - 32, - 115, - 117, - 99, - 104, - 32, - 97, - 115, - 32, - 68, - 111, - 99, - 66, - 111, - 111, - 107, - 46, - 34, - 44, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 71, - 108, - 111, - 115, - 115, - 83, - 101, - 101, - 65, - 108, - 115, - 111, - 34, - 58, - 32, - 91, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 71, - 77, - 76, - 34, - 44, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 88, - 77, - 76, - 34, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 93, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 125, - 44, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 34, - 71, - 108, - 111, - 115, - 115, - 83, - 101, - 101, - 34, - 58, - 32, - 34, - 109, - 97, - 114, - 107, - 117, - 112, - 34, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 125, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 125, - 10, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 32, - 125, - 10, - 32, - 32, - 32, - 32, - 125, - 10, - 125 - ] -} \ No newline at end of file diff --git a/inputs/test_extract/input.json b/inputs/test_extract/input.json deleted file mode 100644 index d99f3f3..0000000 --- a/inputs/test_extract/input.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "key": [ - 107, - 101, - 121, - 49 - ], - "data": [ - 123, - 10, - 32, - 32, - 32, - 32, - 34, - 107, - 101, - 121, - 49, - 34, - 58, - 32, - 34, - 97, - 98, - 99, - 34, - 10, - 125 - ] -} \ No newline at end of file diff --git a/inputs/test_extract_hard/input.json b/inputs/test_extract_hard/input.json deleted file mode 100644 index a80958f..0000000 --- a/inputs/test_extract_hard/input.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "key": [ - 107, - 101, - 121, - 49 - ], - "data": [ - 123, - 10, - 32, - 32, - 32, - 32, - 34, - 107, - 101, - 121, - 49, - 34, - 58, - 32, - 34, - 123, - 125, - 97, - 44, - 98, - 99, - 125, - 34, - 44, - 10, - 32, - 32, - 32, - 32, - 34, - 107, - 101, - 121, - 50, - 34, - 58, - 32, - 34, - 92, - 34, - 97, - 98, - 99, - 92, - 34, - 34, - 10, - 125 - ] -} \ No newline at end of file diff --git a/src/item.rs b/src/item.rs deleted file mode 100644 index 8823da8..0000000 --- a/src/item.rs +++ /dev/null @@ -1,37 +0,0 @@ -#[derive(Clone, Copy, Debug)] -pub struct Item<'a, T> { - pub label: &'a str, - inner: T, -} - -impl<'a> Item<'a, ()> { - pub fn create(label: &'a str) -> Self { - Item { label, inner: () } - } -} - -impl<'a, T: Copy> Item<'a, T> { - pub fn inner(&self) -> T { - self.inner - } - - pub fn append(self, label: &'a str) -> Item<'a, Item> { - Item { label, inner: self } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn create_nested_item() { - let item = Item::create("data") - .append("profile") - .append("identity") - .append("balance") - .append("userBalance") - .append("value"); - println!("{item:?}"); - } -} diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index 71db018..0000000 --- a/src/lib.rs +++ /dev/null @@ -1,122 +0,0 @@ -pub const EXAMPLE_JSON: &[u8] = include_bytes!("../json_examples/example.json"); -pub const VENMO_JSON: &[u8] = include_bytes!("../json_examples/venmo_response.json"); - -pub mod item; - -// TODO: Mark when we're in a key versus in a value, versus whitespace, etc.? - -pub struct Machine<'a> { - pub keys: Vec<&'a [u8]>, - depth: usize, - pointer: usize, -} - -#[derive(Debug)] -pub enum Instruction { - IncreaseDepth(usize), - DecreaseDepth(usize), - EOF, -} - -impl<'a> Machine<'a> { - pub fn new(keys: Vec<&'a [u8]>) -> Self { - Machine { - keys, - depth: 0, - pointer: 0, - } - } - - pub fn extract(&mut self, data_bytes: &'a [u8]) -> Option<&'a [u8]> { - // Make sure that there is more data in the JSON than what we have expressed in all of our keys else this makes no sense at all. - assert!(data_bytes.len() > self.keys.iter().map(|k| k.len()).sum()); - // Make sure the JSON begins with an opening bracket - assert_eq!(data_bytes[0] ^ b"{"[0], 0); - - while self.depth < self.keys.len() { - match get_key(self.keys[self.depth], &data_bytes[self.pointer..]) { - Instruction::EOF => return None, - _inst @ Instruction::DecreaseDepth(offset) => { - // dbg!(inst); - self.depth -= 1; - self.pointer += offset; - // dbg!(String::from_utf8_lossy(&[data_bytes[self.pointer]])); - } - _inst @ Instruction::IncreaseDepth(offset) => { - // dbg!(inst); - self.depth += 1; - self.pointer += offset; - // dbg!(String::from_utf8_lossy(&[data_bytes[self.pointer]])); - } - } - } - - // Get the value as a raw str at this location in the JSON and offset by one to bypass a `:` - let value_start = self.pointer + 1; - let mut value_length = 0; - // Grab the value up to the next delimiter doken (TODO: if a `,` or `}` is present in a string, we are doomed, so we need to track these objects better!) - while (data_bytes[value_start + value_length] != b"}"[0]) - & (data_bytes[value_start + value_length] != b","[0]) - { - value_length += 1; - } - Some(&data_bytes[value_start..value_start + value_length]) - } -} - -fn get_key(key: &[u8], data_bytes: &[u8]) -> Instruction { - let key_length = key.len(); - - // dbg!(String::from_utf8_lossy(key)); - - 'outer: for i in 0..(data_bytes.len() - key_length) { - #[allow(clippy::needless_range_loop)] - for j in 0..key_length { - // dbg!(String::from_utf8_lossy(&[data_bytes[i..i + key_length][j]])); - if data_bytes[i..i + key_length][j] == b"}"[0] { - // Hit an end brace "}" so we need to return the current pointer as an offset and decrease depth - return Instruction::DecreaseDepth(i + j); - } - if key[j] ^ data_bytes[i..i + key_length][j] != 0 { - continue 'outer; - } - } - // If we hit here then we must have fully matched a key so we return the current pointer as an offset - return Instruction::IncreaseDepth(i + key_length); - } - // If we hit here, we must have hit EOF (which is actually an error?) - Instruction::EOF -} - -#[cfg(test)] -mod tests { - - use super::*; - - #[test] - fn get_value_venmo() { - let keys = vec![ - b"\"data\"".as_slice(), - b"\"profile\"".as_slice(), - b"\"identity\"".as_slice(), - b"\"balance\"".as_slice(), - b"\"userBalance\"".as_slice(), - b"\"value\"".as_slice(), - ]; - let mut machine = Machine::new(keys); - let value = String::from_utf8_lossy(machine.extract(VENMO_JSON).unwrap()); - assert_eq!(value, " 523.69\n ") - } - - #[test] - fn get_value_example() { - let keys = vec![ - b"\"glossary\"".as_slice(), - b"\"GlossDiv\"".as_slice(), - b"\"title\"".as_slice(), - ]; - let mut machine = Machine::new(keys); - let value = String::from_utf8_lossy(machine.extract(EXAMPLE_JSON).unwrap()); - assert_eq!(value, " \"S\"a") - } -} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..cf94f9d --- /dev/null +++ b/src/main.rs @@ -0,0 +1,66 @@ +use clap::Parser; +use serde_json::Value; +use std::io::Write; +use std::path::PathBuf; + +#[derive(Parser, Debug)] +#[command(name = "witness")] +struct Args { + /// Path to the JSON file + #[arg(short, long)] + json_file: PathBuf, + + /// Keys to extract (can be specified multiple times) + #[arg(short, long)] + keys: Vec, + + /// Output directory (will be created if it doesn't exist) + #[arg(short, long, default_value = ".")] + output_dir: PathBuf, + + /// Output filename (will be created if it doesn't exist) + #[arg(short, long, default_value = "output.json")] + filename: String, +} + +#[derive(serde::Serialize)] +pub struct Witness { + #[serde(flatten)] + keys: serde_json::Map, + data: Vec, +} + +pub fn main() -> Result<(), Box> { + let args = Args::parse(); + + // Read the JSON file + let data = std::fs::read(&args.json_file)?; + + // Create a map to store keys + let mut keys_map = serde_json::Map::new(); + for (index, key) in args.keys.iter().enumerate() { + keys_map.insert( + format!("key{}", index + 1), + Value::Array( + key.as_bytes() + .iter() + .map(|x| serde_json::json!(x)) + .collect(), + ), + ); + } + + // Create a witness file as `input.json` + let witness = Witness { + keys: keys_map, + data, + }; + + let output_file = args.output_dir.join(args.filename); + let mut file = std::fs::File::create(output_file)?; + file.write_all(serde_json::to_string_pretty(&witness)?.as_bytes())?; + + println!("Input file created successfully."); + + Ok(()) +}