diff --git a/Cargo.toml b/Cargo.toml index f7ea615..068a24c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,10 +19,10 @@ byteorder = "1.4.3" ff = { version = "0.13", features = ["derive"]} hex-literal = "0.3.4" itertools = "0.9.0" -nova-snark = { version = "0.21.0", default-features = false } +nova-snark = "0.22.0" num-bigint = { version = "0.4", features = ["serde", "rand"] } num-traits = "0.2.15" -pasta_curves = { version = "0.5.1", features = ["repr-c", "serde"] } +pasta_curves = "0.5" serde = "1.0" serde_json = "1.0.85" diff --git a/README.md b/README.md index 8ee508b..609ca0c 100644 --- a/README.md +++ b/README.md @@ -12,15 +12,15 @@ Nova is the state of the art for recursive SNARKs, Circom is the state of the ar As [Justin Drake talks about it](https://youtu.be/SwonTtOQzAk), I think the right way to think of Nova is as a preprocessor for zkSNARKs with lots of repeated structure -- Nova can shrink the cost (in number of R1CS constraints) of checking N instances of a problem to ~one instance of the same problem. This is clean and magical and lends itself well to a world where we take the output of Nova and then verify it in a "real" zkSNARK (like PLONK/groth16/Spartan) to obtain a actually fully minified proof (that is sublinear even in the size of one instance). Notably, [this pattern is already used](https://youtu.be/VmYpbFxBdtM?t=155) in settings like [zkEVMs](https://youtu.be/j7An-33_Zs0), but with STARK proofs instead of Nova proofs. IMO, Nova (and folding scheme-like things in particular) lend themselves better to the properties we want with the preprocessing layer vs. STARKs: fast compression, minimal cryptographic assumptions and low recursive overhead.[^1] +Nova Scotia comes with extensive [examples](https://github.com/nalinbhardwaj/Nova-Scotia/tree/main/examples), as well as a [in-browser usage example](https://github.com/nalinbhardwaj/Nova-Scotia/tree/main/browser-test). We will describe the proving/verifying workflow in more detail below. + [^1]: But currently, Nova/R1CS lacks the customizability of STARKS (custom gates and lookup tables in particular), so there is a tradeoff here. ## How? ![Nova Scotia](https://user-images.githubusercontent.com/6984346/201644973-fb084b6c-3807-4bf4-99bf-a1461271f1b5.png) -To use it yourself, install this branch of [Circom](https://docs.circom.io) which adds support for the [Pasta Curves](https://electriccoin.co/blog/the-pasta-curves-for-halo-2-and-beyond/) to the C++ witness generator: [nalinbhardwaj/pasta](https://github.com/nalinbhardwaj/circom/tree/pasta). To install this branch, clone the git repo (using `git clone https://github.com/nalinbhardwaj/circom.git && git checkout pasta`). Then build and install the `circom` binary by running `cargo install --path circom`. This will overwrite any existing `circom` binary. Refer to the [Circom documentation](https://docs.circom.io/getting-started/installation/#installing-dependencies) for more information. - -Note that if you are interested in generating and verifying proofs in browsers, you must use the WASM witness generator. We will describe in-browser proving and verification later in the README. +To use it yourself, start by installing [Circom](https://docs.circom.io) as described in the [Circom documentation](https://docs.circom.io/getting-started/installation/#installing-dependencies). ### Writing Nova Step Circuits in Circom @@ -28,24 +28,30 @@ To write Nova Scotia circuits in Circom, we operate on the abstraction of one st When you're ready, compile your circuit using `circom [file].circom --r1cs --sym --c --prime vesta` for the vesta curve. Compile the C++ witness generator in `[file]_cpp` by running `make` in that folder. Alternately, you can compile the WASM witness generator using `circom [file].circom --r1cs --sym --wasm --prime vesta`. We will later use the R1CS file and the witness generator binary (either C++ binary or WASM), so make note of their filepaths. You can independently test these step circuits by running witness generation as described in the [Circom documentation](https://docs.circom.io/getting-started/computing-the-witness/). +Since Nova runs on a cycle of elliptic curves, you must specify the curve via traits and in the Circom compilation command. Currently, Nova Scotia supports any cycle supported by Nova upstream in [provider](https://github.com/microsoft/Nova/tree/main/src/provider) and by Circom's `--prime` flag. You can see example circuits for both the [Pasta (pallas/vesta)](https://github.com/nalinbhardwaj/Nova-Scotia/blob/main/examples/toy_pasta.rs) and [bn254/grumpkin](https://github.com/nalinbhardwaj/Nova-Scotia/blob/main/examples/toy_bn254.rs) curves in the examples directory. + ### Rust shimming for Nova Scotia -Now, start a new Rust project and add Nova Scotia to your dependencies. Then, you can start using your Circom step circuits with Nova. Start by defining the paths to the Circom output and loading the R1CS file: +Start a new Rust project and add Nova Scotia to your dependencies. Then, you can start using your Circom step circuits with Nova. Start by defining the paths to the Circom output and loading the R1CS file: ```rust +// The cycle of curves we use, can be any cycle supported by Nova +type G1 = pasta_curves::pallas::Point; +type G2 = pasta_curves::vesta::Point; + let circuit_file = root.join("examples/bitcoin/circom/bitcoin_benchmark.r1cs"); let witness_generator_file = root.join("examples/bitcoin/circom/bitcoin_benchmark_cpp/bitcoin_benchmark"); -let r1cs = load_r1cs(&circuit_file); // loads R1CS file into memory +let r1cs = load_r1cs::(&circuit_file); // loads R1CS file into memory ``` -Circom supports witness generation using both C++ and WASM, so you can choose which one to use by passing `witness_generator_file` either as the generated C++ binary or as the WASM output of Circom (the `circuit.wasm` file). If you use WASM, we assume you have a compatible version of `node` installed on your system. +Circom supports witness generation using both C++ and WASM, so you can choose which one to use by passing `witness_generator_file` either as the generated C++ binary or as the WASM output of Circom (the `circuit.wasm` file). If you use WASM, we assume you have a compatible version of `node` installed on your system. Note that for proving locally, we recommend using the C++ witness generator for performance (except on M1/M2 Macs where it is not supported). For in-browser proving/verifying, you must use the WASM witness generator. We will describe in-browser proving and verification workflow later in the README. Then, create the public parameters (CRS) using the `create_public_params` function: ```rust -let pp = create_public_params(r1cs.clone()); +let pp = create_public_params::(r1cs.clone()); ``` Now, construct the input to Circom witness generator at each step of recursion. This is a HashMap representation of the JSON input to your Circom input. For instance, in the case of the [bitcoin](https://github.com/nalinbhardwaj/Nova-Scotia/blob/main/examples/bitcoin.rs#L40) example, `private_inputs` is a list of `HashMap`s, each containing block headers and block hashes for the blocks that step of recursion verifies, and the public input `step_in` is the previous block hash in the chain. @@ -54,15 +60,15 @@ To instantiate this recursion, we use `create_recursive_circuit` from Nova Scoti ```rust let recursive_snark = create_recursive_circuit( - witness_generator_file, + FileLocation::PathBuf(witness_generator_file), r1cs, private_inputs, - start_public_input.clone(), + start_public_input.to_vec(), &pp, ).unwrap(); ``` -Verification is done using the `verify` function defined by Nova, which additionally takes secondary inputs that Nova Scotia will initialise to `vec![::Scalar::zero()]`, so just pass that in: +Verification is done using the `verify` function defined by Nova, which additionally takes secondary inputs that Nova Scotia will initialise to `[F::zero()]`, so just pass that in: ```rust println!("Verifying a RecursiveSNARK..."); @@ -70,8 +76,8 @@ let start = Instant::now(); let res = recursive_snark.verify( &pp, iteration_count, - start_public_input.clone(), - vec![::Scalar::zero()], + &start_public_input.clone(), + &[F::zero()], ); println!( "RecursiveSNARK::verify: {:?}, took {:?}", @@ -94,11 +100,11 @@ bitcoin.rs is a more complex example that uses Nova to create a prover for bitco | Number of recursion steps | Blocks verified per step | Prover time | Verifier time (uncompressed) | | ------------------------- | ------------------------ | ----------- | ---------------------------- | -| 120 | 1 | 57.33s | 197.20ms | -| 60 | 2 | 46.11s | 307.08ms | -| 40 | 3 | 43.60s | 449.02ms | -| 30 | 4 | 41.17s | 560.53ms | -| 24 | 5 | 39.73s | 728.09ms | +| 120 | 1 | 55.38s | 214.43ms | +| 60 | 2 | 49.05s | 434.96ms | +| 40 | 3 | 42.08s | 509.03ms | +| 30 | 4 | 45.40s | 923.23ms | +| 24 | 5 | 48.43s | 991.89ms | Note that the verification times are linear in the number of blocks per step of recursion, while the proving time reduces with fewer recursive steps. In practice, you would use the output of Nova as an input to another SNARK scheme like Plonk/groth16 (as previously mentioned) to obtain full succinctness. @@ -112,15 +118,7 @@ Nova Scotia also supports proving and verification of proofs in browser, along w ## Notes for interested contributors -### TODO list - -- [ ] Switch Nova to BN254/grumpkin cycle to make it work on Ethereum chain! This should be doable since Nova only needs DLOG hardness. -- [ ] Write Relaxed R1CS verifiers in plonk/groth16 libraries (ex. Halo 2, Circom). -- [ ] Make Nova work with secp/secq cycle for efficient ECDSA signature verification + aggregation - -Seperately, since Nova's `StepCircuit` trait is pretty much the same as Bellperson's `Circuit` trait, we can probably also use the transpilation in this repo to use [Bellperson](https://github.com/filecoin-project/bellperson) with Circom circuits/proofs, along with its [snarkpack](https://eprint.iacr.org/2021/529) aggregation features. - -If you are interested in any of these tasks and want to work on them, please reach out! [0xPARC's PARC Squad](https://0xparc.org/blog/parc-squad) may also be able to provide financial and technical support for related work. +Please see [GitHub issues](https://github.com/nalinbhardwaj/Nova-Scotia/issues) if you are interested in contributing. You can reach out to me directly on Telegram at @nibnalin if you have any questions. ### Credits diff --git a/browser-test/Cargo.toml b/browser-test/Cargo.toml index e62291f..6715061 100644 --- a/browser-test/Cargo.toml +++ b/browser-test/Cargo.toml @@ -7,11 +7,12 @@ edition = "2021" [dependencies] nova-scotia = { path = "../" } -nova-snark = { version = "0.21.0", default-features = false } +nova-snark = "0.22.0" num-bigint = { version = "0.4", features = ["serde", "rand"] } num-traits = "0.2.15" serde = "1.0" serde_json = "1.0.85" +pasta_curves = "0.5" [target.'cfg(target_family = "wasm")'.dependencies] getrandom = { version = "0.2", features = ["js"]} diff --git a/browser-test/env.sh b/browser-test/env.sh new file mode 100644 index 0000000..8c1e9a2 --- /dev/null +++ b/browser-test/env.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +export PATH="/usr/local/opt/llvm/bin/:$PATH" +export CC=/usr/local/opt/llvm/bin/clang +export AR=/usr/local/opt/llvm/bin/llvm-ar \ No newline at end of file diff --git a/browser-test/rust-toolchain b/browser-test/rust-toolchain index 8caa299..467f338 100644 --- a/browser-test/rust-toolchain +++ b/browser-test/rust-toolchain @@ -1 +1 @@ -nightly-2022-08-09 \ No newline at end of file +nightly-2022-12-12 \ No newline at end of file diff --git a/browser-test/src/wasm.rs b/browser-test/src/wasm.rs index 1f797b5..10e1bfc 100644 --- a/browser-test/src/wasm.rs +++ b/browser-test/src/wasm.rs @@ -1,12 +1,10 @@ use std::collections::HashMap; -use nova_scotia::FileLocation; use nova_scotia::{ circom::{circuit::CircomCircuit, reader::load_r1cs}, - create_public_params, create_recursive_circuit, EE1, EE2, F1, F2, G1, G2, S1, S2, + create_public_params, create_recursive_circuit, FileLocation, F, S, }; use nova_snark::{ - spartan::RelaxedR1CSSNARK, traits::{circuit::TrivialTestCircuit, Group}, CompressedSNARK, PublicParams, }; @@ -48,13 +46,16 @@ pub fn init_panic_hook() { const WEBSITE_ROOT: &str = "https://effulgent-liger-07e9d0.netlify.app/"; +type G1 = pasta_curves::pallas::Point; +type G2 = pasta_curves::vesta::Point; + #[wasm_bindgen] pub async fn generate_params() -> String { - let r1cs = load_r1cs(&FileLocation::URL( + let r1cs = load_r1cs::(&FileLocation::URL( WEBSITE_ROOT.to_string().clone() + &"toy.r1cs".to_string(), )) .await; - let pp = create_public_params(r1cs.clone()); + let pp: PublicParams = create_public_params(r1cs.clone()); let serialised = serde_json::to_string(&pp).unwrap(); return serialised; } @@ -63,7 +64,7 @@ pub async fn generate_params() -> String { pub async fn generate_proof(pp_str: String) -> String { let iteration_count = 5; - let r1cs = load_r1cs(&FileLocation::URL( + let r1cs = load_r1cs::(&FileLocation::URL( WEBSITE_ROOT.to_string().clone() + &"toy.r1cs".to_string(), )) .await; @@ -77,13 +78,12 @@ pub async fn generate_proof(pp_str: String) -> String { private_inputs.push(private_input); } - let start_public_input = vec![F1::from(10), F1::from(10)]; + let start_public_input = [F::::from(10), F::::from(10)]; - let pp = - serde_json::from_str::, TrivialTestCircuit>>( - &pp_str, - ) - .unwrap(); + let pp = serde_json::from_str::< + PublicParams>, TrivialTestCircuit>>, + >(&pp_str) + .unwrap(); console_log!( "Number of constraints per step (primary circuit): {}", @@ -108,29 +108,24 @@ pub async fn generate_proof(pp_str: String) -> String { witness_generator_wasm, r1cs, private_inputs, - start_public_input.clone(), + start_public_input.to_vec(), &pp, ) .await .unwrap(); // TODO: empty? - let z0_secondary = vec![::Scalar::zero()]; + let z0_secondary = [F::::zero()]; // verify the recursive SNARK console_log!("Verifying a RecursiveSNARK..."); - let res = recursive_snark.verify( - &pp, - iteration_count, - start_public_input.clone(), - z0_secondary.clone(), - ); + let res = recursive_snark.verify(&pp, iteration_count, &start_public_input, &z0_secondary); assert!(res.is_ok()); // produce a compressed SNARK console_log!("Generating a CompressedSNARK using Spartan with IPA-PC..."); - let (pk, _vk) = CompressedSNARK::<_, _, _, _, S1, S2>::setup(&pp).unwrap(); - let res = CompressedSNARK::<_, _, _, _, S1, S2>::prove(&pp, &pk, &recursive_snark); + let (pk, _vk) = CompressedSNARK::<_, _, _, _, S, S>::setup(&pp).unwrap(); + let res = CompressedSNARK::<_, _, _, _, S, S>::prove(&pp, &pk, &recursive_snark); assert!(res.is_ok()); let compressed_snark = res.unwrap(); return serde_json::to_string(&compressed_snark).unwrap(); @@ -138,25 +133,17 @@ pub async fn generate_proof(pp_str: String) -> String { #[wasm_bindgen] pub async fn verify_compressed_proof(pp_str: String, proof_str: String) -> bool { - let pp = - serde_json::from_str::, TrivialTestCircuit>>( - &pp_str, - ) - .unwrap(); - let (_pk, vk) = CompressedSNARK::<_, _, _, _, S1, S2>::setup(&pp).unwrap(); + let pp = serde_json::from_str::< + PublicParams>, TrivialTestCircuit>>, + >(&pp_str) + .unwrap(); + let (_pk, vk) = CompressedSNARK::<_, _, _, _, S, S>::setup(&pp).unwrap(); let iteration_count = 5; - let start_public_input = vec![F1::from(10), F1::from(10)]; - let z0_secondary = vec![::Scalar::zero()]; + let start_public_input = vec![F::::from(10), F::::from(10)]; + let z0_secondary = vec![F::::zero()]; let compressed_proof = serde_json::from_str::< - CompressedSNARK< - G1, - G2, - CircomCircuit, - TrivialTestCircuit, - RelaxedR1CSSNARK, - RelaxedR1CSSNARK, - >, + CompressedSNARK>, TrivialTestCircuit>, S, S>, >(&proof_str) .unwrap(); let res = compressed_proof.verify( diff --git a/examples/bitcoin.rs b/examples/bitcoin.rs index dabb48f..7a65764 100644 --- a/examples/bitcoin.rs +++ b/examples/bitcoin.rs @@ -7,7 +7,7 @@ use std::{ use ff::PrimeField; use nova_scotia::{ - circom::reader::load_r1cs, create_public_params, create_recursive_circuit, FileLocation, F1, G2, + circom::reader::load_r1cs, create_public_params, create_recursive_circuit, FileLocation, F, }; use nova_snark::traits::Group; use serde::{Deserialize, Serialize}; @@ -22,10 +22,13 @@ struct Blocks { } fn bench(iteration_count: usize, per_iteration_count: usize) -> (Duration, Duration) { + type G1 = pasta_curves::pallas::Point; + type G2 = pasta_curves::vesta::Point; + let root = current_dir().unwrap(); let circuit_file = root.join("examples/bitcoin/circom/bitcoin_benchmark.r1cs"); - let r1cs = load_r1cs(&FileLocation::PathBuf(circuit_file)); + let r1cs = load_r1cs::(&FileLocation::PathBuf(circuit_file)); let witness_generator_file = root.join("examples/bitcoin/circom/bitcoin_benchmark_cpp/bitcoin_benchmark"); @@ -33,9 +36,9 @@ fn bench(iteration_count: usize, per_iteration_count: usize) -> (Duration, Durat let btc_blocks: Blocks = serde_json::from_str(include_str!("bitcoin/fetcher/btc-blocks.json")).unwrap(); - let start_public_input = vec![ - F1::from_str_vartime(&btc_blocks.prevBlockHash[0]).unwrap(), - F1::from_str_vartime(&btc_blocks.prevBlockHash[1]).unwrap(), + let start_public_input = [ + F::::from_str_vartime(&btc_blocks.prevBlockHash[0]).unwrap(), + F::::from_str_vartime(&btc_blocks.prevBlockHash[1]).unwrap(), ]; let mut private_inputs = Vec::new(); @@ -61,7 +64,7 @@ fn bench(iteration_count: usize, per_iteration_count: usize) -> (Duration, Durat // println!("{:?} {:?}", start_public_input, private_inputs); - let pp = create_public_params(r1cs.clone()); + let pp = create_public_params::(r1cs.clone()); println!( "Number of constraints per step (primary circuit): {}", @@ -87,24 +90,19 @@ fn bench(iteration_count: usize, per_iteration_count: usize) -> (Duration, Durat FileLocation::PathBuf(witness_generator_file), r1cs, private_inputs, - start_public_input.clone(), + start_public_input.to_vec(), &pp, ) .unwrap(); let prover_time = start.elapsed(); println!("RecursiveSNARK creation took {:?}", start.elapsed()); - let z0_secondary = vec![::Scalar::zero()]; + let z0_secondary = [::Scalar::zero()]; // verify the recursive SNARK println!("Verifying a RecursiveSNARK..."); let start = Instant::now(); - let res = recursive_snark.verify( - &pp, - iteration_count, - start_public_input.clone(), - z0_secondary.clone(), - ); + let res = recursive_snark.verify(&pp, iteration_count, &start_public_input, &z0_secondary); println!( "RecursiveSNARK::verify: {:?}, took {:?}", res, diff --git a/examples/bitcoin/benchmark.csv b/examples/bitcoin/benchmark.csv index 8715b80..6cb4411 100644 --- a/examples/bitcoin/benchmark.csv +++ b/examples/bitcoin/benchmark.csv @@ -1,6 +1,6 @@ iteration_count,per_iteration_count,prover_time,verifier_time -120,1,71.309186774s,263.608988ms -60,2,60.127530781s,463.162481ms -40,3,52.164369497s,671.983056ms -30,4,54.131651799s,878.674828ms -24,5,56.433211616s,1.111124065s +120,1,55.385328292s,214.435377ms +60,2,49.05568792s,434.960619ms +40,3,42.080714823s,509.038888ms +30,4,45.407602287s,923.23241ms +24,5,48.438942637s,991.893288ms diff --git a/examples/bitcoin/circom/bitcoin.circom b/examples/bitcoin/circom/bitcoin.circom index c4402fc..1aa94b0 100644 --- a/examples/bitcoin/circom/bitcoin.circom +++ b/examples/bitcoin/circom/bitcoin.circom @@ -174,7 +174,7 @@ template CheckOneBlock() { template Main(BLOCK_COUNT) { signal input step_in[2]; - signal output lastBlockHash[2]; + signal output step_out[2]; // last block hash signal input blockHashes[BLOCK_COUNT][2]; signal input blockHeaders[BLOCK_COUNT][80]; @@ -196,7 +196,7 @@ template Main(BLOCK_COUNT) { checker[i].blockHeaders[j] <== blockHeaders[i][j]; } } - for (var j = 0;j < 2;j++) lastBlockHash[j] <== blockHashes[BLOCK_COUNT - 1][j]; + for (var j = 0;j < 2;j++) step_out[j] <== blockHashes[BLOCK_COUNT - 1][j]; } component main { public [step_in] } = Main(1); diff --git a/examples/bitcoin/circom/bitcoin_benchmark.circom b/examples/bitcoin/circom/bitcoin_benchmark.circom index 2c0e53c..1aa94b0 100644 --- a/examples/bitcoin/circom/bitcoin_benchmark.circom +++ b/examples/bitcoin/circom/bitcoin_benchmark.circom @@ -174,7 +174,7 @@ template CheckOneBlock() { template Main(BLOCK_COUNT) { signal input step_in[2]; - signal output lastBlockHash[2]; + signal output step_out[2]; // last block hash signal input blockHashes[BLOCK_COUNT][2]; signal input blockHeaders[BLOCK_COUNT][80]; @@ -196,7 +196,7 @@ template Main(BLOCK_COUNT) { checker[i].blockHeaders[j] <== blockHeaders[i][j]; } } - for (var j = 0;j < 2;j++) lastBlockHash[j] <== blockHashes[BLOCK_COUNT - 1][j]; + for (var j = 0;j < 2;j++) step_out[j] <== blockHashes[BLOCK_COUNT - 1][j]; } -component main { public [step_in] } = Main(5); +component main { public [step_in] } = Main(1); diff --git a/examples/toy/bn254/toy.r1cs b/examples/toy/bn254/toy.r1cs new file mode 100644 index 0000000..3332eb6 Binary files /dev/null and b/examples/toy/bn254/toy.r1cs differ diff --git a/examples/toy/toy.sym b/examples/toy/bn254/toy.sym similarity index 100% rename from examples/toy/toy.sym rename to examples/toy/bn254/toy.sym diff --git a/examples/toy/toy_cpp/Makefile b/examples/toy/bn254/toy_cpp/Makefile similarity index 100% rename from examples/toy/toy_cpp/Makefile rename to examples/toy/bn254/toy_cpp/Makefile diff --git a/examples/toy/toy_cpp/calcwit.cpp b/examples/toy/bn254/toy_cpp/calcwit.cpp similarity index 100% rename from examples/toy/toy_cpp/calcwit.cpp rename to examples/toy/bn254/toy_cpp/calcwit.cpp diff --git a/examples/toy/toy_cpp/calcwit.hpp b/examples/toy/bn254/toy_cpp/calcwit.hpp similarity index 95% rename from examples/toy/toy_cpp/calcwit.hpp rename to examples/toy/bn254/toy_cpp/calcwit.hpp index 5f5ae24..363de21 100644 --- a/examples/toy/toy_cpp/calcwit.hpp +++ b/examples/toy/bn254/toy_cpp/calcwit.hpp @@ -10,7 +10,7 @@ #include "circom.hpp" #include "fr.hpp" -#define NMUTEXES 12 //512 +#define NMUTEXES 32 //512 u64 fnv1a(std::string s); @@ -32,9 +32,9 @@ class Circom_CalcWit { // parallelism std::mutex numThreadMutex; std::condition_variable ntcvs; - uint numThread; + int numThread; - uint maxThread; + int maxThread; // Functions called by the circuit Circom_CalcWit(Circom_Circuit *aCircuit, uint numTh = NMUTEXES); diff --git a/examples/toy/bn254/toy_cpp/calcwit.o b/examples/toy/bn254/toy_cpp/calcwit.o new file mode 100644 index 0000000..28d1cc3 Binary files /dev/null and b/examples/toy/bn254/toy_cpp/calcwit.o differ diff --git a/examples/toy/toy_cpp/circom.hpp b/examples/toy/bn254/toy_cpp/circom.hpp similarity index 85% rename from examples/toy/toy_cpp/circom.hpp rename to examples/toy/bn254/toy_cpp/circom.hpp index d9c8df1..eabc686 100644 --- a/examples/toy/toy_cpp/circom.hpp +++ b/examples/toy/bn254/toy_cpp/circom.hpp @@ -47,12 +47,12 @@ struct Circom_Component { std::string templateName; std::string componentName; u64 idFather; - u32* subcomponents; - bool* subcomponentsParallel; - bool *outputIsSet; //one for each output - std::mutex *mutexes; //one for each output - std::condition_variable *cvs; - std::thread *sbct; //subcomponent threads + u32* subcomponents = NULL; + bool* subcomponentsParallel = NULL; + bool *outputIsSet = NULL; //one for each output + std::mutex *mutexes = NULL; //one for each output + std::condition_variable *cvs = NULL; + std::thread *sbct = NULL;//subcomponent threads }; /* diff --git a/examples/toy/bn254/toy_cpp/fr.asm b/examples/toy/bn254/toy_cpp/fr.asm new file mode 100644 index 0000000..611e89c --- /dev/null +++ b/examples/toy/bn254/toy_cpp/fr.asm @@ -0,0 +1,8794 @@ + + + global Fr_copy + global Fr_copyn + global Fr_add + global Fr_sub + global Fr_neg + global Fr_mul + global Fr_square + global Fr_band + global Fr_bor + global Fr_bxor + global Fr_bnot + global Fr_shl + global Fr_shr + global Fr_eq + global Fr_neq + global Fr_lt + global Fr_gt + global Fr_leq + global Fr_geq + global Fr_land + global Fr_lor + global Fr_lnot + global Fr_toNormal + global Fr_toLongNormal + global Fr_toMontgomery + global Fr_toInt + global Fr_isTrue + global Fr_q + global Fr_R3 + + global Fr_rawCopy + global Fr_rawZero + global Fr_rawSwap + global Fr_rawAdd + global Fr_rawSub + global Fr_rawNeg + global Fr_rawMMul + global Fr_rawMSquare + global Fr_rawToMontgomery + global Fr_rawFromMontgomery + global Fr_rawIsEq + global Fr_rawIsZero + global Fr_rawq + global Fr_rawR3 + + extern Fr_fail + DEFAULT REL + + section .text + + + + + + + + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; copy +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rsi <= the src +; rdi <= the dest +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_copy: + + mov rax, [rsi + 0] + mov [rdi + 0], rax + + mov rax, [rsi + 8] + mov [rdi + 8], rax + + mov rax, [rsi + 16] + mov [rdi + 16], rax + + mov rax, [rsi + 24] + mov [rdi + 24], rax + + mov rax, [rsi + 32] + mov [rdi + 32], rax + + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawCopy +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rsi <= the src +; rdi <= the dest +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_rawCopy: + + mov rax, [rsi + 0] + mov [rdi + 0], rax + + mov rax, [rsi + 8] + mov [rdi + 8], rax + + mov rax, [rsi + 16] + mov [rdi + 16], rax + + mov rax, [rsi + 24] + mov [rdi + 24], rax + + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawZero +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rsi <= the src +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_rawZero: + xor rax, rax + + mov [rdi + 0], rax + + mov [rdi + 8], rax + + mov [rdi + 16], rax + + mov [rdi + 24], rax + + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawSwap +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rdi <= a +; rsi <= p +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_rawSwap: + + mov rax, [rsi + 0] + mov rcx, [rdi + 0] + mov [rdi + 0], rax + mov [rsi + 0], rbx + + mov rax, [rsi + 8] + mov rcx, [rdi + 8] + mov [rdi + 8], rax + mov [rsi + 8], rbx + + mov rax, [rsi + 16] + mov rcx, [rdi + 16] + mov [rdi + 16], rax + mov [rsi + 16], rbx + + mov rax, [rsi + 24] + mov rcx, [rdi + 24] + mov [rdi + 24], rax + mov [rsi + 24], rbx + + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; copy an array of integers +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rsi <= the src +; rdi <= the dest +; rdx <= number of integers to copy +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_copyn: +Fr_copyn_loop: + mov r8, rsi + mov r9, rdi + mov rax, 5 + mul rdx + mov rcx, rax + cld + rep movsq + mov rsi, r8 + mov rdi, r9 + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawCopyS2L +;;;;;;;;;;;;;;;;;;;;;; +; Convert a 64 bit integer to a long format field element +; Params: +; rsi <= the integer +; rdi <= Pointer to the overwritted element +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; + +rawCopyS2L: + mov al, 0x80 + shl rax, 56 + mov [rdi], rax ; set the result to LONG normal + + cmp rsi, 0 + js u64toLong_adjust_neg + + mov [rdi + 8], rsi + xor rax, rax + + mov [rdi + 16], rax + + mov [rdi + 24], rax + + mov [rdi + 32], rax + + ret + +u64toLong_adjust_neg: + add rsi, [q] ; Set the first digit + mov [rdi + 8], rsi ; + + mov rsi, -1 ; all ones + + mov rax, rsi ; Add to q + adc rax, [q + 8 ] + mov [rdi + 16], rax + + mov rax, rsi ; Add to q + adc rax, [q + 16 ] + mov [rdi + 24], rax + + mov rax, rsi ; Add to q + adc rax, [q + 24 ] + mov [rdi + 32], rax + + ret + +;;;;;;;;;;;;;;;;;;;;;; +; toInt +;;;;;;;;;;;;;;;;;;;;;; +; Convert a 64 bit integer to a long format field element +; Params: +; rsi <= Pointer to the element +; Returs: +; rax <= The value +;;;;;;;;;;;;;;;;;;;;;;; +Fr_toInt: + mov rax, [rdi] + bt rax, 63 + jc Fr_long + movsx rax, eax + ret + +Fr_long: + push rbp + push rsi + push rdx + mov rbp, rsp + bt rax, 62 + jnc Fr_longNormal +Fr_longMontgomery: + + sub rsp, 40 + push rsi + mov rsi, rdi + mov rdi, rsp + call Fr_toNormal + pop rsi + + +Fr_longNormal: + mov rax, [rdi + 8] + mov rcx, rax + shr rcx, 31 + jnz Fr_longNeg + + mov rcx, [rdi + 16] + test rcx, rcx + jnz Fr_longNeg + + mov rcx, [rdi + 24] + test rcx, rcx + jnz Fr_longNeg + + mov rcx, [rdi + 32] + test rcx, rcx + jnz Fr_longNeg + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +Fr_longNeg: + mov rax, [rdi + 8] + sub rax, [q] + jnc Fr_longErr + + mov rcx, [rdi + 16] + sbb rcx, [q + 8] + jnc Fr_longErr + + mov rcx, [rdi + 24] + sbb rcx, [q + 16] + jnc Fr_longErr + + mov rcx, [rdi + 32] + sbb rcx, [q + 24] + jnc Fr_longErr + + mov rcx, rax + sar rcx, 31 + add rcx, 1 + jnz Fr_longErr + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +Fr_longErr: + push rdi + mov rdi, 0 + call Fr_fail + pop rdi + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +Fr_rawMMul: + push r15 + push r14 + push r13 + push r12 + mov rcx,rdx + mov r9,[ np ] + xor r10,r10 + +; FirstLoop + mov rdx,[rsi + 0] + mulx rax,r11,[rcx] + mulx r8,r12,[rcx +8] + adcx r12,rax + mulx rax,r13,[rcx +16] + adcx r13,r8 + mulx r8,r14,[rcx +24] + adcx r14,rax + mov r15,r10 + adcx r15,r8 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +; FirstLoop + mov rdx,[rsi + 8] + mov r15,r10 + mulx r8,rax,[rcx +0] + adcx r11,rax + adox r12,r8 + mulx r8,rax,[rcx +8] + adcx r12,rax + adox r13,r8 + mulx r8,rax,[rcx +16] + adcx r13,rax + adox r14,r8 + mulx r8,rax,[rcx +24] + adcx r14,rax + adox r15,r8 + adcx r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +; FirstLoop + mov rdx,[rsi + 16] + mov r15,r10 + mulx r8,rax,[rcx +0] + adcx r11,rax + adox r12,r8 + mulx r8,rax,[rcx +8] + adcx r12,rax + adox r13,r8 + mulx r8,rax,[rcx +16] + adcx r13,rax + adox r14,r8 + mulx r8,rax,[rcx +24] + adcx r14,rax + adox r15,r8 + adcx r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +; FirstLoop + mov rdx,[rsi + 24] + mov r15,r10 + mulx r8,rax,[rcx +0] + adcx r11,rax + adox r12,r8 + mulx r8,rax,[rcx +8] + adcx r12,rax + adox r13,r8 + mulx r8,rax,[rcx +16] + adcx r13,rax + adox r14,r8 + mulx r8,rax,[rcx +24] + adcx r14,rax + adox r15,r8 + adcx r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +;comparison + cmp r14,[q + 24] + jc Fr_rawMMul_done + jnz Fr_rawMMul_sq + cmp r13,[q + 16] + jc Fr_rawMMul_done + jnz Fr_rawMMul_sq + cmp r12,[q + 8] + jc Fr_rawMMul_done + jnz Fr_rawMMul_sq + cmp r11,[q + 0] + jc Fr_rawMMul_done + jnz Fr_rawMMul_sq +Fr_rawMMul_sq: + sub r11,[q +0] + sbb r12,[q +8] + sbb r13,[q +16] + sbb r14,[q +24] +Fr_rawMMul_done: + mov [rdi + 0],r11 + mov [rdi + 8],r12 + mov [rdi + 16],r13 + mov [rdi + 24],r14 + pop r12 + pop r13 + pop r14 + pop r15 + ret +Fr_rawMSquare: + push r15 + push r14 + push r13 + push r12 + mov rcx,rdx + mov r9,[ np ] + xor r10,r10 + +; FirstLoop + mov rdx,[rsi + 0] + mulx rax,r11,rdx + mulx r8,r12,[rsi +8] + adcx r12,rax + mulx rax,r13,[rsi +16] + adcx r13,r8 + mulx r8,r14,[rsi +24] + adcx r14,rax + mov r15,r10 + adcx r15,r8 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +; FirstLoop + mov rdx,[rsi + 8] + mov r15,r10 + mulx r8,rax,[rsi +0] + adcx r11,rax + adox r12,r8 + mulx r8,rax,[rsi +8] + adcx r12,rax + adox r13,r8 + mulx r8,rax,[rsi +16] + adcx r13,rax + adox r14,r8 + mulx r8,rax,[rsi +24] + adcx r14,rax + adox r15,r8 + adcx r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +; FirstLoop + mov rdx,[rsi + 16] + mov r15,r10 + mulx r8,rax,[rsi +0] + adcx r11,rax + adox r12,r8 + mulx r8,rax,[rsi +8] + adcx r12,rax + adox r13,r8 + mulx r8,rax,[rsi +16] + adcx r13,rax + adox r14,r8 + mulx r8,rax,[rsi +24] + adcx r14,rax + adox r15,r8 + adcx r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +; FirstLoop + mov rdx,[rsi + 24] + mov r15,r10 + mulx r8,rax,[rsi +0] + adcx r11,rax + adox r12,r8 + mulx r8,rax,[rsi +8] + adcx r12,rax + adox r13,r8 + mulx r8,rax,[rsi +16] + adcx r13,rax + adox r14,r8 + mulx r8,rax,[rsi +24] + adcx r14,rax + adox r15,r8 + adcx r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +;comparison + cmp r14,[q + 24] + jc Fr_rawMSquare_done + jnz Fr_rawMSquare_sq + cmp r13,[q + 16] + jc Fr_rawMSquare_done + jnz Fr_rawMSquare_sq + cmp r12,[q + 8] + jc Fr_rawMSquare_done + jnz Fr_rawMSquare_sq + cmp r11,[q + 0] + jc Fr_rawMSquare_done + jnz Fr_rawMSquare_sq +Fr_rawMSquare_sq: + sub r11,[q +0] + sbb r12,[q +8] + sbb r13,[q +16] + sbb r14,[q +24] +Fr_rawMSquare_done: + mov [rdi + 0],r11 + mov [rdi + 8],r12 + mov [rdi + 16],r13 + mov [rdi + 24],r14 + pop r12 + pop r13 + pop r14 + pop r15 + ret +Fr_rawMMul1: + push r15 + push r14 + push r13 + push r12 + mov rcx,rdx + mov r9,[ np ] + xor r10,r10 + +; FirstLoop + mov rdx,rcx + mulx rax,r11,[rsi] + mulx r8,r12,[rsi +8] + adcx r12,rax + mulx rax,r13,[rsi +16] + adcx r13,r8 + mulx r8,r14,[rsi +24] + adcx r14,rax + mov r15,r10 + adcx r15,r8 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + + mov r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + + mov r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + + mov r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +;comparison + cmp r14,[q + 24] + jc Fr_rawMMul1_done + jnz Fr_rawMMul1_sq + cmp r13,[q + 16] + jc Fr_rawMMul1_done + jnz Fr_rawMMul1_sq + cmp r12,[q + 8] + jc Fr_rawMMul1_done + jnz Fr_rawMMul1_sq + cmp r11,[q + 0] + jc Fr_rawMMul1_done + jnz Fr_rawMMul1_sq +Fr_rawMMul1_sq: + sub r11,[q +0] + sbb r12,[q +8] + sbb r13,[q +16] + sbb r14,[q +24] +Fr_rawMMul1_done: + mov [rdi + 0],r11 + mov [rdi + 8],r12 + mov [rdi + 16],r13 + mov [rdi + 24],r14 + pop r12 + pop r13 + pop r14 + pop r15 + ret +Fr_rawFromMontgomery: + push r15 + push r14 + push r13 + push r12 + mov rcx,rdx + mov r9,[ np ] + xor r10,r10 + +; FirstLoop + mov r11,[rsi +0] + mov r12,[rsi +8] + mov r13,[rsi +16] + mov r14,[rsi +24] + mov r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + + mov r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + + mov r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + + mov r15,r10 +; SecondLoop + mov rdx,r9 + mulx rax,rdx,r11 + mulx r8,rax,[q] + adcx rax,r11 + mulx rax,r11,[q +8] + adcx r11,r8 + adox r11,r12 + mulx r8,r12,[q +16] + adcx r12,rax + adox r12,r13 + mulx rax,r13,[q +24] + adcx r13,r8 + adox r13,r14 + mov r14,r10 + adcx r14,rax + adox r14,r15 + +;comparison + cmp r14,[q + 24] + jc Fr_rawFromMontgomery_done + jnz Fr_rawFromMontgomery_sq + cmp r13,[q + 16] + jc Fr_rawFromMontgomery_done + jnz Fr_rawFromMontgomery_sq + cmp r12,[q + 8] + jc Fr_rawFromMontgomery_done + jnz Fr_rawFromMontgomery_sq + cmp r11,[q + 0] + jc Fr_rawFromMontgomery_done + jnz Fr_rawFromMontgomery_sq +Fr_rawFromMontgomery_sq: + sub r11,[q +0] + sbb r12,[q +8] + sbb r13,[q +16] + sbb r14,[q +24] +Fr_rawFromMontgomery_done: + mov [rdi + 0],r11 + mov [rdi + 8],r12 + mov [rdi + 16],r13 + mov [rdi + 24],r14 + pop r12 + pop r13 + pop r14 + pop r15 + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawToMontgomery +;;;;;;;;;;;;;;;;;;;;;; +; Convert a number to Montgomery +; rdi <= Pointer destination element +; rsi <= Pointer to src element +;;;;;;;;;;;;;;;;;;;; +Fr_rawToMontgomery: + push rdx + lea rdx, [R2] + call Fr_rawMMul + pop rdx + ret + +;;;;;;;;;;;;;;;;;;;;;; +; toMontgomery +;;;;;;;;;;;;;;;;;;;;;; +; Convert a number to Montgomery +; rdi <= Destination +; rdi <= Pointer element to convert +; Modified registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;; +Fr_toMontgomery: + mov rax, [rsi] + bt rax, 62 ; check if montgomery + jc toMontgomery_doNothing + bt rax, 63 + jc toMontgomeryLong + +toMontgomeryShort: + movsx rdx, eax + mov [rdi], rdx + add rdi, 8 + lea rsi, [R2] + cmp rdx, 0 + js negMontgomeryShort +posMontgomeryShort: + call Fr_rawMMul1 + sub rdi, 8 + mov r11b, 0x40 + shl r11d, 24 + mov [rdi+4], r11d + ret + +negMontgomeryShort: + neg rdx ; Do the multiplication positive and then negate the result. + call Fr_rawMMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + mov r11b, 0x40 + shl r11d, 24 + mov [rdi+4], r11d + ret + + +toMontgomeryLong: + mov [rdi], rax + add rdi, 8 + add rsi, 8 + lea rdx, [R2] + call Fr_rawMMul + sub rsi, 8 + sub rdi, 8 + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + ret + + +toMontgomery_doNothing: + call Fr_copy + ret + +;;;;;;;;;;;;;;;;;;;;;; +; toNormal +;;;;;;;;;;;;;;;;;;;;;; +; Convert a number from Montgomery +; rdi <= Destination +; rsi <= Pointer element to convert +; Modified registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;; +Fr_toNormal: + mov rax, [rsi] + bt rax, 62 ; check if montgomery + jnc toNormal_doNothing + bt rax, 63 ; if short, it means it's converted + jnc toNormal_doNothing + +toNormalLong: + add rdi, 8 + add rsi, 8 + call Fr_rawFromMontgomery + sub rsi, 8 + sub rdi, 8 + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + ret + +toNormal_doNothing: + call Fr_copy + ret + +;;;;;;;;;;;;;;;;;;;;;; +; toLongNormal +;;;;;;;;;;;;;;;;;;;;;; +; Convert a number to long normal +; rdi <= Destination +; rsi <= Pointer element to convert +; Modified registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;; +Fr_toLongNormal: + mov rax, [rsi] + bt rax, 63 ; check if long + jnc toLongNormal_fromShort + bt rax, 62 ; check if montgomery + jc toLongNormal_fromMontgomery + call Fr_copy ; It is already long + ret + +toLongNormal_fromMontgomery: + add rdi, 8 + add rsi, 8 + call Fr_rawFromMontgomery + sub rsi, 8 + sub rdi, 8 + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + ret + +toLongNormal_fromShort: + mov r8, rsi ; save rsi + movsx rsi, eax + call rawCopyS2L + mov rsi, r8 ; recover rsi + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + ret + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; add +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_add: + push rbp + push rsi + push rdx + mov rbp, rsp + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is short first operand + jc add_l1 + bt rcx, 63 ; Check if is short second operand + jc add_s1l2 + +add_s1s2: ; Both operands are short + + xor rdx, rdx + mov edx, eax + add edx, ecx + jo add_manageOverflow ; rsi already is the 64bits result + + mov [rdi], rdx ; not necessary to adjust so just save and return + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +add_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rsi, eax + movsx rdx, ecx + add rsi, rdx + call rawCopyS2L + pop rsi + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +add_l1: + bt rcx, 63 ; Check if is short second operand + jc add_l1l2 + +;;;;;;;; +add_l1s2: + bt rax, 62 ; check if montgomery first + jc add_l1ms2 +add_l1ns2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rsi, 8 + movsx rdx, ecx + add rdi, 8 + cmp rdx, 0 + + jns tmp_1 + neg rdx + call rawSubLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret +tmp_1: + call rawAddLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +add_l1ms2: + bt rcx, 62 ; check if montgomery second + jc add_l1ms2m +add_l1ms2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_l1ms2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;; +add_s1l2: + bt rcx, 62 ; check if montgomery second + jc add_s1l2m +add_s1l2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + lea rsi, [rdx + 8] + movsx rdx, eax + add rdi, 8 + cmp rdx, 0 + + jns tmp_2 + neg rdx + call rawSubLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret +tmp_2: + call rawAddLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_s1l2m: + bt rax, 62 ; check if montgomery first + jc add_s1ml2m +add_s1nl2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_s1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +;;;; +add_l1l2: + bt rax, 62 ; check if montgomery first + jc add_l1ml2 +add_l1nl2: + bt rcx, 62 ; check if montgomery second + jc add_l1nl2m +add_l1nl2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_l1nl2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_l1ml2: + bt rcx, 62 ; check if montgomery seconf + jc add_l1ml2m +add_l1ml2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +add_l1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawAddLL +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of type long +; Params: +; rsi <= Pointer to the long data of element 1 +; rdx <= Pointer to the long data of element 2 +; rdi <= Pointer to the long data of result +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawAddLL: +Fr_rawAdd: + ; Add component by component with carry + + mov rax, [rsi + 0] + add rax, [rdx + 0] + mov [rdi + 0], rax + + mov rax, [rsi + 8] + adc rax, [rdx + 8] + mov [rdi + 8], rax + + mov rax, [rsi + 16] + adc rax, [rdx + 16] + mov [rdi + 16], rax + + mov rax, [rsi + 24] + adc rax, [rdx + 24] + mov [rdi + 24], rax + + jc rawAddLL_sq ; if overflow, substract q + + ; Compare with q + + + cmp rax, [q + 24] + jc rawAddLL_done ; q is bigget so done. + jnz rawAddLL_sq ; q is lower + + + mov rax, [rdi + 16] + + cmp rax, [q + 16] + jc rawAddLL_done ; q is bigget so done. + jnz rawAddLL_sq ; q is lower + + + mov rax, [rdi + 8] + + cmp rax, [q + 8] + jc rawAddLL_done ; q is bigget so done. + jnz rawAddLL_sq ; q is lower + + + mov rax, [rdi + 0] + + cmp rax, [q + 0] + jc rawAddLL_done ; q is bigget so done. + jnz rawAddLL_sq ; q is lower + + ; If equal substract q +rawAddLL_sq: + + mov rax, [q + 0] + sub [rdi + 0], rax + + mov rax, [q + 8] + sbb [rdi + 8], rax + + mov rax, [q + 16] + sbb [rdi + 16], rax + + mov rax, [q + 24] + sbb [rdi + 24], rax + +rawAddLL_done: + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawAddLS +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of type long +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to the long data of element 1 +; rdx <= Value to be added +;;;;;;;;;;;;;;;;;;;;;; +rawAddLS: + ; Add component by component with carry + + add rdx, [rsi] + mov [rdi] ,rdx + + mov rdx, 0 + adc rdx, [rsi + 8] + mov [rdi + 8], rdx + + mov rdx, 0 + adc rdx, [rsi + 16] + mov [rdi + 16], rdx + + mov rdx, 0 + adc rdx, [rsi + 24] + mov [rdi + 24], rdx + + jc rawAddLS_sq ; if overflow, substract q + + ; Compare with q + + mov rax, [rdi + 24] + cmp rax, [q + 24] + jc rawAddLS_done ; q is bigget so done. + jnz rawAddLS_sq ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 16] + jc rawAddLS_done ; q is bigget so done. + jnz rawAddLS_sq ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 8] + jc rawAddLS_done ; q is bigget so done. + jnz rawAddLS_sq ; q is lower + + mov rax, [rdi + 0] + cmp rax, [q + 0] + jc rawAddLS_done ; q is bigget so done. + jnz rawAddLS_sq ; q is lower + + ; If equal substract q +rawAddLS_sq: + + mov rax, [q + 0] + sub [rdi + 0], rax + + mov rax, [q + 8] + sbb [rdi + 8], rax + + mov rax, [q + 16] + sbb [rdi + 16], rax + + mov rax, [q + 24] + sbb [rdi + 24], rax + +rawAddLS_done: + ret + + + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; sub +;;;;;;;;;;;;;;;;;;;;;; +; Substracts two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_sub: + push rbp + push rsi + push rdx + mov rbp, rsp + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is long first operand + jc sub_l1 + bt rcx, 63 ; Check if is long second operand + jc sub_s1l2 + +sub_s1s2: ; Both operands are short + + xor rdx, rdx + mov edx, eax + sub edx, ecx + jo sub_manageOverflow ; rsi already is the 64bits result + + mov [rdi], rdx ; not necessary to adjust so just save and return + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +sub_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rsi, eax + movsx rdx, ecx + sub rsi, rdx + call rawCopyS2L + pop rsi + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +sub_l1: + bt rcx, 63 ; Check if is short second operand + jc sub_l1l2 + +;;;;;;;; +sub_l1s2: + bt rax, 62 ; check if montgomery first + jc sub_l1ms2 +sub_l1ns2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rsi, 8 + movsx rdx, ecx + add rdi, 8 + cmp rdx, 0 + + jns tmp_3 + neg rdx + call rawAddLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret +tmp_3: + call rawSubLS + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_l1ms2: + bt rcx, 62 ; check if montgomery second + jc sub_l1ms2m +sub_l1ms2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_l1ms2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;; +sub_s1l2: + bt rcx, 62 ; check if montgomery first + jc sub_s1l2m +sub_s1l2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + cmp eax, 0 + + js tmp_4 + + ; First Operand is positive + push rsi + add rdi, 8 + movsx rsi, eax + add rdx, 8 + call rawSubSL + sub rdi, 8 + pop rsi + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_4: ; First operand is negative + push rsi + lea rsi, [rdx + 8] + movsx rdx, eax + add rdi, 8 + neg rdx + call rawNegLS + sub rdi, 8 + pop rsi + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_s1l2m: + bt rax, 62 ; check if montgomery second + jc sub_s1ml2m +sub_s1nl2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_s1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +;;;; +sub_l1l2: + bt rax, 62 ; check if montgomery first + jc sub_l1ml2 +sub_l1nl2: + bt rcx, 62 ; check if montgomery second + jc sub_l1nl2m +sub_l1nl2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_l1nl2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_l1ml2: + bt rcx, 62 ; check if montgomery seconf + jc sub_l1ml2m +sub_l1ml2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +sub_l1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawSubLS +;;;;;;;;;;;;;;;;;;;;;; +; Substracts a short element from the long element +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to the long data of element 1 where will be substracted +; rdx <= Value to be substracted +; [rdi] = [rsi] - rdx +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawSubLS: + ; Substract first digit + + mov rax, [rsi] + sub rax, rdx + mov [rdi] ,rax + mov rdx, 0 + + mov rax, [rsi + 8] + sbb rax, rdx + mov [rdi + 8], rax + + mov rax, [rsi + 16] + sbb rax, rdx + mov [rdi + 16], rax + + mov rax, [rsi + 24] + sbb rax, rdx + mov [rdi + 24], rax + + jnc rawSubLS_done ; if overflow, add q + + ; Add q +rawSubLS_aq: + + mov rax, [q + 0] + add [rdi + 0], rax + + mov rax, [q + 8] + adc [rdi + 8], rax + + mov rax, [q + 16] + adc [rdi + 16], rax + + mov rax, [q + 24] + adc [rdi + 24], rax + +rawSubLS_done: + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawSubSL +;;;;;;;;;;;;;;;;;;;;;; +; Substracts a long element from a short element +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Value from where will bo substracted +; rdx <= Pointer to long of the value to be substracted +; +; [rdi] = rsi - [rdx] +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawSubSL: + ; Substract first digit + sub rsi, [rdx] + mov [rdi] ,rsi + + + mov rax, 0 + sbb rax, [rdx + 8] + mov [rdi + 8], rax + + mov rax, 0 + sbb rax, [rdx + 16] + mov [rdi + 16], rax + + mov rax, 0 + sbb rax, [rdx + 24] + mov [rdi + 24], rax + + jnc rawSubSL_done ; if overflow, add q + + ; Add q +rawSubSL_aq: + + mov rax, [q + 0] + add [rdi + 0], rax + + mov rax, [q + 8] + adc [rdi + 8], rax + + mov rax, [q + 16] + adc [rdi + 16], rax + + mov rax, [q + 24] + adc [rdi + 24], rax + +rawSubSL_done: + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawSubLL +;;;;;;;;;;;;;;;;;;;;;; +; Substracts a long element from a short element +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to long from where substracted +; rdx <= Pointer to long of the value to be substracted +; +; [rdi] = [rsi] - [rdx] +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawSubLL: +Fr_rawSub: + ; Substract first digit + + mov rax, [rsi + 0] + sub rax, [rdx + 0] + mov [rdi + 0], rax + + mov rax, [rsi + 8] + sbb rax, [rdx + 8] + mov [rdi + 8], rax + + mov rax, [rsi + 16] + sbb rax, [rdx + 16] + mov [rdi + 16], rax + + mov rax, [rsi + 24] + sbb rax, [rdx + 24] + mov [rdi + 24], rax + + jnc rawSubLL_done ; if overflow, add q + + ; Add q +rawSubLL_aq: + + mov rax, [q + 0] + add [rdi + 0], rax + + mov rax, [q + 8] + adc [rdi + 8], rax + + mov rax, [q + 16] + adc [rdi + 16], rax + + mov rax, [q + 24] + adc [rdi + 24], rax + +rawSubLL_done: + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawNegLS +;;;;;;;;;;;;;;;;;;;;;; +; Substracts a long element and a short element form 0 +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to long from where substracted +; rdx <= short value to be substracted too +; +; [rdi] = -[rsi] - rdx +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawNegLS: + mov rax, [q] + sub rax, rdx + mov [rdi], rax + + mov rax, [q + 8 ] + sbb rax, 0 + mov [rdi + 8], rax + + mov rax, [q + 16 ] + sbb rax, 0 + mov [rdi + 16], rax + + mov rax, [q + 24 ] + sbb rax, 0 + mov [rdi + 24], rax + + setc dl + + + mov rax, [rdi + 0 ] + sub rax, [rsi + 0] + mov [rdi + 0], rax + + mov rax, [rdi + 8 ] + sbb rax, [rsi + 8] + mov [rdi + 8], rax + + mov rax, [rdi + 16 ] + sbb rax, [rsi + 16] + mov [rdi + 16], rax + + mov rax, [rdi + 24 ] + sbb rax, [rsi + 24] + mov [rdi + 24], rax + + + setc dh + or dl, dh + jz rawNegSL_done + + ; it is a negative value, so add q + + mov rax, [q + 0] + add [rdi + 0], rax + + mov rax, [q + 8] + adc [rdi + 8], rax + + mov rax, [q + 16] + adc [rdi + 16], rax + + mov rax, [q + 24] + adc [rdi + 24], rax + + +rawNegSL_done: + ret + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; neg +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element to be negated +; rdi <= Pointer to result +; [rdi] = -[rsi] +;;;;;;;;;;;;;;;;;;;;;; +Fr_neg: + mov rax, [rsi] + bt rax, 63 ; Check if is short first operand + jc neg_l + +neg_s: ; Operand is short + + neg eax + jo neg_manageOverflow ; Check if overflow. (0x80000000 is the only case) + + mov [rdi], rax ; not necessary to adjust so just save and return + ret + +neg_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rsi, eax + neg rsi + call rawCopyS2L + pop rsi + ret + + + +neg_l: + mov [rdi], rax ; Copy the type + + add rdi, 8 + add rsi, 8 + call rawNegL + sub rdi, 8 + sub rsi, 8 + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawNeg +;;;;;;;;;;;;;;;;;;;;;; +; Negates a value +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to the long data of element 1 +; +; [rdi] = - [rsi] +;;;;;;;;;;;;;;;;;;;;;; +rawNegL: +Fr_rawNeg: + ; Compare is zero + + xor rax, rax + + cmp [rsi + 0], rax + jnz doNegate + + cmp [rsi + 8], rax + jnz doNegate + + cmp [rsi + 16], rax + jnz doNegate + + cmp [rsi + 24], rax + jnz doNegate + + ; it's zero so just set to zero + + mov [rdi + 0], rax + + mov [rdi + 8], rax + + mov [rdi + 16], rax + + mov [rdi + 24], rax + + ret +doNegate: + + mov rax, [q + 0] + sub rax, [rsi + 0] + mov [rdi + 0], rax + + mov rax, [q + 8] + sbb rax, [rsi + 8] + mov [rdi + 8], rax + + mov rax, [q + 16] + sbb rax, [rsi + 16] + mov [rdi + 16], rax + + mov rax, [q + 24] + sbb rax, [rsi + 24] + mov [rdi + 24], rax + + ret + + + + + + + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; square +;;;;;;;;;;;;;;;;;;;;;; +; Squares a field element +; Params: +; rsi <= Pointer to element 1 +; rdi <= Pointer to result +; [rdi] = [rsi] * [rsi] +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_square: + mov r8, [rsi] + bt r8, 63 ; Check if is short first operand + jc square_l1 + +square_s1: ; Both operands are short + + xor rax, rax + mov eax, r8d + imul eax + jo square_manageOverflow ; rsi already is the 64bits result + + mov [rdi], rax ; not necessary to adjust so just save and return + +square_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rax, r8d + imul rax + mov rsi, rax + call rawCopyS2L + pop rsi + + ret + +square_l1: + bt r8, 62 ; check if montgomery first + jc square_l1m +square_l1n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + call Fr_rawMSquare + sub rdi, 8 + sub rsi, 8 + + + push rsi + add rdi, 8 + mov rsi, rdi + lea rdx, [R3] + call Fr_rawMMul + sub rdi, 8 + pop rsi + + ret + +square_l1m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + call Fr_rawMSquare + sub rdi, 8 + sub rsi, 8 + + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; mul +;;;;;;;;;;;;;;;;;;;;;; +; Multiplies two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; [rdi] = [rsi] * [rdi] +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_mul: + mov r8, [rsi] + mov r9, [rdx] + bt r8, 63 ; Check if is short first operand + jc mul_l1 + bt r9, 63 ; Check if is short second operand + jc mul_s1l2 + +mul_s1s2: ; Both operands are short + + xor rax, rax + mov eax, r8d + imul r9d + jo mul_manageOverflow ; rsi already is the 64bits result + + mov [rdi], rax ; not necessary to adjust so just save and return + +mul_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rax, r8d + movsx rcx, r9d + imul rcx + mov rsi, rax + call rawCopyS2L + pop rsi + + ret + +mul_l1: + bt r9, 63 ; Check if is short second operand + jc mul_l1l2 + +;;;;;;;; +mul_l1s2: + bt r8, 62 ; check if montgomery first + jc mul_l1ms2 +mul_l1ns2: + bt r9, 62 ; check if montgomery first + jc mul_l1ns2m +mul_l1ns2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + push rsi + add rsi, 8 + movsx rdx, r9d + add rdi, 8 + cmp rdx, 0 + + jns tmp_5 + neg rdx + call Fr_rawMMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + pop rsi + + jmp tmp_6 +tmp_5: + call Fr_rawMMul1 + sub rdi, 8 + pop rsi +tmp_6: + + + + push rsi + add rdi, 8 + mov rsi, rdi + lea rdx, [R3] + call Fr_rawMMul + sub rdi, 8 + pop rsi + + ret + + +mul_l1ns2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + + +mul_l1ms2: + bt r9, 62 ; check if montgomery second + jc mul_l1ms2m +mul_l1ms2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + push rsi + add rsi, 8 + movsx rdx, r9d + add rdi, 8 + cmp rdx, 0 + + jns tmp_7 + neg rdx + call Fr_rawMMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + pop rsi + + jmp tmp_8 +tmp_7: + call Fr_rawMMul1 + sub rdi, 8 + pop rsi +tmp_8: + + + ret + +mul_l1ms2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + + +;;;;;;;; +mul_s1l2: + bt r8, 62 ; check if montgomery first + jc mul_s1ml2 +mul_s1nl2: + bt r9, 62 ; check if montgomery first + jc mul_s1nl2m +mul_s1nl2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + push rsi + lea rsi, [rdx + 8] + movsx rdx, r8d + add rdi, 8 + cmp rdx, 0 + + jns tmp_9 + neg rdx + call Fr_rawMMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + pop rsi + + jmp tmp_10 +tmp_9: + call Fr_rawMMul1 + sub rdi, 8 + pop rsi +tmp_10: + + + + push rsi + add rdi, 8 + mov rsi, rdi + lea rdx, [R3] + call Fr_rawMMul + sub rdi, 8 + pop rsi + + ret + +mul_s1nl2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + push rsi + lea rsi, [rdx + 8] + movsx rdx, r8d + add rdi, 8 + cmp rdx, 0 + + jns tmp_11 + neg rdx + call Fr_rawMMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + pop rsi + + jmp tmp_12 +tmp_11: + call Fr_rawMMul1 + sub rdi, 8 + pop rsi +tmp_12: + + + ret + +mul_s1ml2: + bt r9, 62 ; check if montgomery first + jc mul_s1ml2m +mul_s1ml2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + +mul_s1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + +;;;; +mul_l1l2: + bt r8, 62 ; check if montgomery first + jc mul_l1ml2 +mul_l1nl2: + bt r9, 62 ; check if montgomery second + jc mul_l1nl2m +mul_l1nl2n: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + + push rsi + add rdi, 8 + mov rsi, rdi + lea rdx, [R3] + call Fr_rawMMul + sub rdi, 8 + pop rsi + + ret + +mul_l1nl2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + +mul_l1ml2: + bt r9, 62 ; check if montgomery seconf + jc mul_l1ml2m +mul_l1ml2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + +mul_l1ml2m: + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call Fr_rawMMul + sub rdi, 8 + sub rsi, 8 + + ret + + + + + + + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; band +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_band: + push rbp + push rsi + push rdx + mov rbp, rsp + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is short first operand + jc and_l1 + bt rcx, 63 ; Check if is short second operand + jc and_s1l2 + +and_s1s2: + + cmp eax, 0 + + js tmp_13 + + cmp ecx, 0 + js tmp_13 + xor rdx, rdx ; both ops are positive so do the op and return + mov edx, eax + and edx, ecx + mov [rdi], rdx ; not necessary to adjust so just save and return + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_13: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_15 ; q is bigget so done. + jnz tmp_14 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_15 ; q is bigget so done. + jnz tmp_14 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_15 ; q is bigget so done. + jnz tmp_14 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_15 ; q is bigget so done. + jnz tmp_14 ; q is lower + + ; If equal substract q +tmp_14: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_15: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + + +and_l1: + bt rcx, 63 ; Check if is short second operand + jc and_l1l2 + + +and_l1s2: + bt rax, 62 ; check if montgomery first + jc and_l1ms2 +and_l1ns2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_16 + movsx rax, ecx + and rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + and rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + and rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + and rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_18 ; q is bigget so done. + jnz tmp_17 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_18 ; q is bigget so done. + jnz tmp_17 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_18 ; q is bigget so done. + jnz tmp_17 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_18 ; q is bigget so done. + jnz tmp_17 ; q is lower + + ; If equal substract q +tmp_17: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_18: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_16: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_20 ; q is bigget so done. + jnz tmp_19 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_20 ; q is bigget so done. + jnz tmp_19 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_20 ; q is bigget so done. + jnz tmp_19 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_20 ; q is bigget so done. + jnz tmp_19 ; q is lower + + ; If equal substract q +tmp_19: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_20: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +and_l1ms2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_21 + movsx rax, ecx + and rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + and rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + and rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + and rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_23 ; q is bigget so done. + jnz tmp_22 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_23 ; q is bigget so done. + jnz tmp_22 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_23 ; q is bigget so done. + jnz tmp_22 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_23 ; q is bigget so done. + jnz tmp_22 ; q is lower + + ; If equal substract q +tmp_22: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_23: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_21: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_25 ; q is bigget so done. + jnz tmp_24 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_25 ; q is bigget so done. + jnz tmp_24 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_25 ; q is bigget so done. + jnz tmp_24 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_25 ; q is bigget so done. + jnz tmp_24 ; q is lower + + ; If equal substract q +tmp_24: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_25: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +and_s1l2: + bt rcx, 62 ; check if montgomery first + jc and_s1l2m +and_s1l2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov eax, [rsi] + cmp eax, 0 + + js tmp_26 + and rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_28 ; q is bigget so done. + jnz tmp_27 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_28 ; q is bigget so done. + jnz tmp_27 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_28 ; q is bigget so done. + jnz tmp_27 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_28 ; q is bigget so done. + jnz tmp_27 ; q is lower + + ; If equal substract q +tmp_27: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_28: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_26: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_30 ; q is bigget so done. + jnz tmp_29 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_30 ; q is bigget so done. + jnz tmp_29 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_30 ; q is bigget so done. + jnz tmp_29 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_30 ; q is bigget so done. + jnz tmp_29 ; q is lower + + ; If equal substract q +tmp_29: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_30: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +and_s1l2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov eax, [rsi] + cmp eax, 0 + + js tmp_31 + and rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_33 ; q is bigget so done. + jnz tmp_32 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_33 ; q is bigget so done. + jnz tmp_32 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_33 ; q is bigget so done. + jnz tmp_32 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_33 ; q is bigget so done. + jnz tmp_32 ; q is lower + + ; If equal substract q +tmp_32: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_33: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_31: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_35 ; q is bigget so done. + jnz tmp_34 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_35 ; q is bigget so done. + jnz tmp_34 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_35 ; q is bigget so done. + jnz tmp_34 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_35 ; q is bigget so done. + jnz tmp_34 ; q is lower + + ; If equal substract q +tmp_34: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_35: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +and_l1l2: + bt rax, 62 ; check if montgomery first + jc and_l1ml2 + bt rcx, 62 ; check if montgomery first + jc and_l1nl2m +and_l1nl2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_37 ; q is bigget so done. + jnz tmp_36 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_37 ; q is bigget so done. + jnz tmp_36 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_37 ; q is bigget so done. + jnz tmp_36 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_37 ; q is bigget so done. + jnz tmp_36 ; q is lower + + ; If equal substract q +tmp_36: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_37: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +and_l1nl2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_39 ; q is bigget so done. + jnz tmp_38 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_39 ; q is bigget so done. + jnz tmp_38 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_39 ; q is bigget so done. + jnz tmp_38 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_39 ; q is bigget so done. + jnz tmp_38 ; q is lower + + ; If equal substract q +tmp_38: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_39: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +and_l1ml2: + bt rcx, 62 ; check if montgomery first + jc and_l1ml2m +and_l1ml2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_41 ; q is bigget so done. + jnz tmp_40 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_41 ; q is bigget so done. + jnz tmp_40 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_41 ; q is bigget so done. + jnz tmp_40 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_41 ; q is bigget so done. + jnz tmp_40 ; q is lower + + ; If equal substract q +tmp_40: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_41: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +and_l1ml2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_43 ; q is bigget so done. + jnz tmp_42 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_43 ; q is bigget so done. + jnz tmp_42 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_43 ; q is bigget so done. + jnz tmp_42 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_43 ; q is bigget so done. + jnz tmp_42 ; q is lower + + ; If equal substract q +tmp_42: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_43: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; bor +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_bor: + push rbp + push rsi + push rdx + mov rbp, rsp + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is short first operand + jc or_l1 + bt rcx, 63 ; Check if is short second operand + jc or_s1l2 + +or_s1s2: + + cmp eax, 0 + + js tmp_44 + + cmp ecx, 0 + js tmp_44 + xor rdx, rdx ; both ops are positive so do the op and return + mov edx, eax + or edx, ecx + mov [rdi], rdx ; not necessary to adjust so just save and return + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_44: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_46 ; q is bigget so done. + jnz tmp_45 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_46 ; q is bigget so done. + jnz tmp_45 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_46 ; q is bigget so done. + jnz tmp_45 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_46 ; q is bigget so done. + jnz tmp_45 ; q is lower + + ; If equal substract q +tmp_45: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_46: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + + +or_l1: + bt rcx, 63 ; Check if is short second operand + jc or_l1l2 + + +or_l1s2: + bt rax, 62 ; check if montgomery first + jc or_l1ms2 +or_l1ns2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_47 + movsx rax, ecx + or rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + or rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + or rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + or rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_49 ; q is bigget so done. + jnz tmp_48 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_49 ; q is bigget so done. + jnz tmp_48 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_49 ; q is bigget so done. + jnz tmp_48 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_49 ; q is bigget so done. + jnz tmp_48 ; q is lower + + ; If equal substract q +tmp_48: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_49: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_47: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_51 ; q is bigget so done. + jnz tmp_50 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_51 ; q is bigget so done. + jnz tmp_50 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_51 ; q is bigget so done. + jnz tmp_50 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_51 ; q is bigget so done. + jnz tmp_50 ; q is lower + + ; If equal substract q +tmp_50: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_51: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +or_l1ms2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_52 + movsx rax, ecx + or rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + or rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + or rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + or rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_54 ; q is bigget so done. + jnz tmp_53 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_54 ; q is bigget so done. + jnz tmp_53 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_54 ; q is bigget so done. + jnz tmp_53 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_54 ; q is bigget so done. + jnz tmp_53 ; q is lower + + ; If equal substract q +tmp_53: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_54: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_52: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_56 ; q is bigget so done. + jnz tmp_55 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_56 ; q is bigget so done. + jnz tmp_55 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_56 ; q is bigget so done. + jnz tmp_55 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_56 ; q is bigget so done. + jnz tmp_55 ; q is lower + + ; If equal substract q +tmp_55: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_56: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +or_s1l2: + bt rcx, 62 ; check if montgomery first + jc or_s1l2m +or_s1l2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov eax, [rsi] + cmp eax, 0 + + js tmp_57 + or rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_59 ; q is bigget so done. + jnz tmp_58 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_59 ; q is bigget so done. + jnz tmp_58 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_59 ; q is bigget so done. + jnz tmp_58 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_59 ; q is bigget so done. + jnz tmp_58 ; q is lower + + ; If equal substract q +tmp_58: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_59: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_57: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_61 ; q is bigget so done. + jnz tmp_60 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_61 ; q is bigget so done. + jnz tmp_60 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_61 ; q is bigget so done. + jnz tmp_60 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_61 ; q is bigget so done. + jnz tmp_60 ; q is lower + + ; If equal substract q +tmp_60: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_61: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +or_s1l2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov eax, [rsi] + cmp eax, 0 + + js tmp_62 + or rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_64 ; q is bigget so done. + jnz tmp_63 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_64 ; q is bigget so done. + jnz tmp_63 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_64 ; q is bigget so done. + jnz tmp_63 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_64 ; q is bigget so done. + jnz tmp_63 ; q is lower + + ; If equal substract q +tmp_63: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_64: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_62: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_66 ; q is bigget so done. + jnz tmp_65 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_66 ; q is bigget so done. + jnz tmp_65 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_66 ; q is bigget so done. + jnz tmp_65 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_66 ; q is bigget so done. + jnz tmp_65 ; q is lower + + ; If equal substract q +tmp_65: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_66: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +or_l1l2: + bt rax, 62 ; check if montgomery first + jc or_l1ml2 + bt rcx, 62 ; check if montgomery first + jc or_l1nl2m +or_l1nl2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_68 ; q is bigget so done. + jnz tmp_67 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_68 ; q is bigget so done. + jnz tmp_67 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_68 ; q is bigget so done. + jnz tmp_67 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_68 ; q is bigget so done. + jnz tmp_67 ; q is lower + + ; If equal substract q +tmp_67: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_68: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +or_l1nl2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_70 ; q is bigget so done. + jnz tmp_69 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_70 ; q is bigget so done. + jnz tmp_69 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_70 ; q is bigget so done. + jnz tmp_69 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_70 ; q is bigget so done. + jnz tmp_69 ; q is lower + + ; If equal substract q +tmp_69: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_70: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +or_l1ml2: + bt rcx, 62 ; check if montgomery first + jc or_l1ml2m +or_l1ml2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_72 ; q is bigget so done. + jnz tmp_71 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_72 ; q is bigget so done. + jnz tmp_71 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_72 ; q is bigget so done. + jnz tmp_71 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_72 ; q is bigget so done. + jnz tmp_71 ; q is lower + + ; If equal substract q +tmp_71: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_72: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +or_l1ml2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_74 ; q is bigget so done. + jnz tmp_73 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_74 ; q is bigget so done. + jnz tmp_73 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_74 ; q is bigget so done. + jnz tmp_73 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_74 ; q is bigget so done. + jnz tmp_73 ; q is lower + + ; If equal substract q +tmp_73: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_74: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; bxor +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_bxor: + push rbp + push rsi + push rdx + mov rbp, rsp + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is short first operand + jc xor_l1 + bt rcx, 63 ; Check if is short second operand + jc xor_s1l2 + +xor_s1s2: + + cmp eax, 0 + + js tmp_75 + + cmp ecx, 0 + js tmp_75 + xor rdx, rdx ; both ops are positive so do the op and return + mov edx, eax + xor edx, ecx + mov [rdi], rdx ; not necessary to adjust so just save and return + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_75: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_77 ; q is bigget so done. + jnz tmp_76 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_77 ; q is bigget so done. + jnz tmp_76 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_77 ; q is bigget so done. + jnz tmp_76 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_77 ; q is bigget so done. + jnz tmp_76 ; q is lower + + ; If equal substract q +tmp_76: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_77: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + + +xor_l1: + bt rcx, 63 ; Check if is short second operand + jc xor_l1l2 + + +xor_l1s2: + bt rax, 62 ; check if montgomery first + jc xor_l1ms2 +xor_l1ns2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_78 + movsx rax, ecx + xor rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + xor rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + xor rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + xor rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_80 ; q is bigget so done. + jnz tmp_79 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_80 ; q is bigget so done. + jnz tmp_79 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_80 ; q is bigget so done. + jnz tmp_79 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_80 ; q is bigget so done. + jnz tmp_79 ; q is lower + + ; If equal substract q +tmp_79: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_80: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_78: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_82 ; q is bigget so done. + jnz tmp_81 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_82 ; q is bigget so done. + jnz tmp_81 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_82 ; q is bigget so done. + jnz tmp_81 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_82 ; q is bigget so done. + jnz tmp_81 ; q is lower + + ; If equal substract q +tmp_81: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_82: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +xor_l1ms2: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov rcx, [rdx] + cmp ecx, 0 + + js tmp_83 + movsx rax, ecx + xor rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + xor rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + xor rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + xor rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_85 ; q is bigget so done. + jnz tmp_84 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_85 ; q is bigget so done. + jnz tmp_84 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_85 ; q is bigget so done. + jnz tmp_84 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_85 ; q is bigget so done. + jnz tmp_84 ; q is lower + + ; If equal substract q +tmp_84: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_85: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_83: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_87 ; q is bigget so done. + jnz tmp_86 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_87 ; q is bigget so done. + jnz tmp_86 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_87 ; q is bigget so done. + jnz tmp_86 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_87 ; q is bigget so done. + jnz tmp_86 ; q is lower + + ; If equal substract q +tmp_86: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_87: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +xor_s1l2: + bt rcx, 62 ; check if montgomery first + jc xor_s1l2m +xor_s1l2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov eax, [rsi] + cmp eax, 0 + + js tmp_88 + xor rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_90 ; q is bigget so done. + jnz tmp_89 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_90 ; q is bigget so done. + jnz tmp_89 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_90 ; q is bigget so done. + jnz tmp_89 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_90 ; q is bigget so done. + jnz tmp_89 ; q is lower + + ; If equal substract q +tmp_89: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_90: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_88: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_92 ; q is bigget so done. + jnz tmp_91 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_92 ; q is bigget so done. + jnz tmp_91 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_92 ; q is bigget so done. + jnz tmp_91 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_92 ; q is bigget so done. + jnz tmp_91 ; q is lower + + ; If equal substract q +tmp_91: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_92: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +xor_s1l2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov eax, [rsi] + cmp eax, 0 + + js tmp_93 + xor rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_95 ; q is bigget so done. + jnz tmp_94 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_95 ; q is bigget so done. + jnz tmp_94 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_95 ; q is bigget so done. + jnz tmp_94 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_95 ; q is bigget so done. + jnz tmp_94 ; q is lower + + ; If equal substract q +tmp_94: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_95: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +tmp_93: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_97 ; q is bigget so done. + jnz tmp_96 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_97 ; q is bigget so done. + jnz tmp_96 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_97 ; q is bigget so done. + jnz tmp_96 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_97 ; q is bigget so done. + jnz tmp_96 ; q is lower + + ; If equal substract q +tmp_96: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_97: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + + +xor_l1l2: + bt rax, 62 ; check if montgomery first + jc xor_l1ml2 + bt rcx, 62 ; check if montgomery first + jc xor_l1nl2m +xor_l1nl2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_99 ; q is bigget so done. + jnz tmp_98 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_99 ; q is bigget so done. + jnz tmp_98 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_99 ; q is bigget so done. + jnz tmp_98 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_99 ; q is bigget so done. + jnz tmp_98 ; q is lower + + ; If equal substract q +tmp_98: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_99: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +xor_l1nl2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_101 ; q is bigget so done. + jnz tmp_100 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_101 ; q is bigget so done. + jnz tmp_100 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_101 ; q is bigget so done. + jnz tmp_100 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_101 ; q is bigget so done. + jnz tmp_100 ; q is lower + + ; If equal substract q +tmp_100: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_101: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +xor_l1ml2: + bt rcx, 62 ; check if montgomery first + jc xor_l1ml2m +xor_l1ml2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_103 ; q is bigget so done. + jnz tmp_102 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_103 ; q is bigget so done. + jnz tmp_102 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_103 ; q is bigget so done. + jnz tmp_102 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_103 ; q is bigget so done. + jnz tmp_102 ; q is lower + + ; If equal substract q +tmp_102: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_103: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +xor_l1ml2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_105 ; q is bigget so done. + jnz tmp_104 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_105 ; q is bigget so done. + jnz tmp_104 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_105 ; q is bigget so done. + jnz tmp_104 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_105 ; q is bigget so done. + jnz tmp_104 ; q is lower + + ; If equal substract q +tmp_104: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_105: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +;;;;;;;;;;;;;;;;;;;;;; +; bnot +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_bnot: + push rbp + push rsi + push rdx + mov rbp, rsp + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov rax, [rsi] + bt rax, 63 ; Check if is long operand + jc bnot_l1 +bnot_s: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp bnot_l1n + +bnot_l1: + bt rax, 62 ; check if montgomery first + jnc bnot_l1n + +bnot_l1m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + +bnot_l1n: + + mov rax, [rsi + 8] + not rax + + mov [rdi + 8], rax + + mov rax, [rsi + 16] + not rax + + mov [rdi + 16], rax + + mov rax, [rsi + 24] + not rax + + mov [rdi + 24], rax + + mov rax, [rsi + 32] + not rax + + and rax, [lboMask] + + mov [rdi + 32], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_107 ; q is bigget so done. + jnz tmp_106 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_107 ; q is bigget so done. + jnz tmp_106 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_107 ; q is bigget so done. + jnz tmp_106 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_107 ; q is bigget so done. + jnz tmp_106 ; q is lower + + ; If equal substract q +tmp_106: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_107: + + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawShr +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= how much is shifted +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +rawShr: + cmp rdx, 0 + je Fr_rawCopy + + cmp rdx, 254 + jae Fr_rawZero + +rawShr_nz: + mov r8, rdx + shr r8,6 + mov rcx, rdx + and rcx, 0x3F + jz rawShr_aligned + mov ch, 64 + sub ch, cl + + mov r9, 1 + rol cx, 8 + shl r9, cl + rol cx, 8 + sub r9, 1 + mov r10, r9 + not r10 + + + cmp r8, 3 + jae rawShr_if2_0 + + mov rax, [rsi + r8*8 + 0 ] + shr rax, cl + and rax, r9 + mov r11, [rsi + r8*8 + 8 ] + rol cx, 8 + shl r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + mov [rdi + 0], rax + + jmp rawShr_endif_0 +rawShr_if2_0: + jne rawShr_else_0 + + mov rax, [rsi + r8*8 + 0 ] + shr rax, cl + and rax, r9 + mov [rdi + 0], rax + + jmp rawShr_endif_0 +rawShr_else_0: + xor rax, rax + mov [rdi + 0], rax +rawShr_endif_0: + + cmp r8, 2 + jae rawShr_if2_1 + + mov rax, [rsi + r8*8 + 8 ] + shr rax, cl + and rax, r9 + mov r11, [rsi + r8*8 + 16 ] + rol cx, 8 + shl r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + mov [rdi + 8], rax + + jmp rawShr_endif_1 +rawShr_if2_1: + jne rawShr_else_1 + + mov rax, [rsi + r8*8 + 8 ] + shr rax, cl + and rax, r9 + mov [rdi + 8], rax + + jmp rawShr_endif_1 +rawShr_else_1: + xor rax, rax + mov [rdi + 8], rax +rawShr_endif_1: + + cmp r8, 1 + jae rawShr_if2_2 + + mov rax, [rsi + r8*8 + 16 ] + shr rax, cl + and rax, r9 + mov r11, [rsi + r8*8 + 24 ] + rol cx, 8 + shl r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + mov [rdi + 16], rax + + jmp rawShr_endif_2 +rawShr_if2_2: + jne rawShr_else_2 + + mov rax, [rsi + r8*8 + 16 ] + shr rax, cl + and rax, r9 + mov [rdi + 16], rax + + jmp rawShr_endif_2 +rawShr_else_2: + xor rax, rax + mov [rdi + 16], rax +rawShr_endif_2: + + cmp r8, 0 + jae rawShr_if2_3 + + mov rax, [rsi + r8*8 + 24 ] + shr rax, cl + and rax, r9 + mov r11, [rsi + r8*8 + 32 ] + rol cx, 8 + shl r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + mov [rdi + 24], rax + + jmp rawShr_endif_3 +rawShr_if2_3: + jne rawShr_else_3 + + mov rax, [rsi + r8*8 + 24 ] + shr rax, cl + and rax, r9 + mov [rdi + 24], rax + + jmp rawShr_endif_3 +rawShr_else_3: + xor rax, rax + mov [rdi + 24], rax +rawShr_endif_3: + + + ret + +rawShr_aligned: + + cmp r8, 3 + ja rawShr_if3_0 + mov rax, [rsi + r8*8 + 0 ] + mov [rdi + 0], rax + jmp rawShr_endif3_0 +rawShr_if3_0: + xor rax, rax + mov [rdi + 0], rax +rawShr_endif3_0: + + cmp r8, 2 + ja rawShr_if3_1 + mov rax, [rsi + r8*8 + 8 ] + mov [rdi + 8], rax + jmp rawShr_endif3_1 +rawShr_if3_1: + xor rax, rax + mov [rdi + 8], rax +rawShr_endif3_1: + + cmp r8, 1 + ja rawShr_if3_2 + mov rax, [rsi + r8*8 + 16 ] + mov [rdi + 16], rax + jmp rawShr_endif3_2 +rawShr_if3_2: + xor rax, rax + mov [rdi + 16], rax +rawShr_endif3_2: + + cmp r8, 0 + ja rawShr_if3_3 + mov rax, [rsi + r8*8 + 24 ] + mov [rdi + 24], rax + jmp rawShr_endif3_3 +rawShr_if3_3: + xor rax, rax + mov [rdi + 24], rax +rawShr_endif3_3: + + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawShl +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= how much is shifted +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +rawShl: + cmp rdx, 0 + je Fr_rawCopy + + cmp rdx, 254 + jae Fr_rawZero + + mov r8, rdx + shr r8,6 + mov rcx, rdx + and rcx, 0x3F + jz rawShl_aligned + mov ch, 64 + sub ch, cl + + + mov r10, 1 + shl r10, cl + sub r10, 1 + mov r9, r10 + not r9 + + mov rdx, rsi + mov rax, r8 + shl rax, 3 + sub rdx, rax + + + cmp r8, 3 + jae rawShl_if2_3 + + mov rax, [rdx + 24 ] + shl rax, cl + and rax, r9 + mov r11, [rdx + 16 ] + rol cx, 8 + shr r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + + and rax, [lboMask] + + + mov [rdi + 24], rax + + jmp rawShl_endif_3 +rawShl_if2_3: + jne rawShl_else_3 + + mov rax, [rdx + 24 ] + shl rax, cl + and rax, r9 + + and rax, [lboMask] + + + mov [rdi + 24], rax + + jmp rawShl_endif_3 +rawShl_else_3: + xor rax, rax + mov [rdi + 24], rax +rawShl_endif_3: + + cmp r8, 2 + jae rawShl_if2_2 + + mov rax, [rdx + 16 ] + shl rax, cl + and rax, r9 + mov r11, [rdx + 8 ] + rol cx, 8 + shr r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + + + mov [rdi + 16], rax + + jmp rawShl_endif_2 +rawShl_if2_2: + jne rawShl_else_2 + + mov rax, [rdx + 16 ] + shl rax, cl + and rax, r9 + + + mov [rdi + 16], rax + + jmp rawShl_endif_2 +rawShl_else_2: + xor rax, rax + mov [rdi + 16], rax +rawShl_endif_2: + + cmp r8, 1 + jae rawShl_if2_1 + + mov rax, [rdx + 8 ] + shl rax, cl + and rax, r9 + mov r11, [rdx + 0 ] + rol cx, 8 + shr r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + + + mov [rdi + 8], rax + + jmp rawShl_endif_1 +rawShl_if2_1: + jne rawShl_else_1 + + mov rax, [rdx + 8 ] + shl rax, cl + and rax, r9 + + + mov [rdi + 8], rax + + jmp rawShl_endif_1 +rawShl_else_1: + xor rax, rax + mov [rdi + 8], rax +rawShl_endif_1: + + cmp r8, 0 + jae rawShl_if2_0 + + mov rax, [rdx + 0 ] + shl rax, cl + and rax, r9 + mov r11, [rdx + -8 ] + rol cx, 8 + shr r11, cl + rol cx, 8 + and r11, r10 + or rax, r11 + + + mov [rdi + 0], rax + + jmp rawShl_endif_0 +rawShl_if2_0: + jne rawShl_else_0 + + mov rax, [rdx + 0 ] + shl rax, cl + and rax, r9 + + + mov [rdi + 0], rax + + jmp rawShl_endif_0 +rawShl_else_0: + xor rax, rax + mov [rdi + 0], rax +rawShl_endif_0: + + + + + + + ; Compare with q + + mov rax, [rdi + 24] + cmp rax, [q + 24] + jc tmp_109 ; q is bigget so done. + jnz tmp_108 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 16] + jc tmp_109 ; q is bigget so done. + jnz tmp_108 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 8] + jc tmp_109 ; q is bigget so done. + jnz tmp_108 ; q is lower + + mov rax, [rdi + 0] + cmp rax, [q + 0] + jc tmp_109 ; q is bigget so done. + jnz tmp_108 ; q is lower + + ; If equal substract q +tmp_108: + + mov rax, [q + 0] + sub [rdi + 0], rax + + mov rax, [q + 8] + sbb [rdi + 8], rax + + mov rax, [q + 16] + sbb [rdi + 16], rax + + mov rax, [q + 24] + sbb [rdi + 24], rax + +tmp_109: + + ret; + +rawShl_aligned: + mov rdx, rsi + mov rax, r8 + shl rax, 3 + sub rdx, rax + + + cmp r8, 3 + ja rawShl_if3_3 + mov rax, [rdx + 24 ] + + and rax, [lboMask] + + mov [rdi + 24], rax + jmp rawShl_endif3_3 +rawShl_if3_3: + xor rax, rax + mov [rdi + 24], rax +rawShl_endif3_3: + + cmp r8, 2 + ja rawShl_if3_2 + mov rax, [rdx + 16 ] + + mov [rdi + 16], rax + jmp rawShl_endif3_2 +rawShl_if3_2: + xor rax, rax + mov [rdi + 16], rax +rawShl_endif3_2: + + cmp r8, 1 + ja rawShl_if3_1 + mov rax, [rdx + 8 ] + + mov [rdi + 8], rax + jmp rawShl_endif3_1 +rawShl_if3_1: + xor rax, rax + mov [rdi + 8], rax +rawShl_endif3_1: + + cmp r8, 0 + ja rawShl_if3_0 + mov rax, [rdx + 0 ] + + mov [rdi + 0], rax + jmp rawShl_endif3_0 +rawShl_if3_0: + xor rax, rax + mov [rdi + 0], rax +rawShl_endif3_0: + + + + + + ; Compare with q + + mov rax, [rdi + 24] + cmp rax, [q + 24] + jc tmp_111 ; q is bigget so done. + jnz tmp_110 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 16] + jc tmp_111 ; q is bigget so done. + jnz tmp_110 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 8] + jc tmp_111 ; q is bigget so done. + jnz tmp_110 ; q is lower + + mov rax, [rdi + 0] + cmp rax, [q + 0] + jc tmp_111 ; q is bigget so done. + jnz tmp_110 ; q is lower + + ; If equal substract q +tmp_110: + + mov rax, [q + 0] + sub [rdi + 0], rax + + mov rax, [q + 8] + sbb [rdi + 8], rax + + mov rax, [q + 16] + sbb [rdi + 16], rax + + mov rax, [q + 24] + sbb [rdi + 24], rax + +tmp_111: + + ret + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; shr +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_shr: + push rbp + push rsi + push rdi + push rdx + mov rbp, rsp + + + + + + + mov rcx, [rdx] + bt rcx, 63 ; Check if is short second operand + jnc tmp_112 + + ; long 2 + bt rcx, 62 ; Check if is montgomery second operand + jnc tmp_113 + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + +tmp_113: + mov rcx, [rdx + 8] + cmp rcx, 254 + jae tmp_114 + xor rax, rax + + cmp [rdx + 16], rax + jnz tmp_114 + + cmp [rdx + 24], rax + jnz tmp_114 + + cmp [rdx + 32], rax + jnz tmp_114 + + mov rdx, rcx + jmp do_shr + +tmp_114: + mov rcx, [q] + sub rcx, [rdx+8] + cmp rcx, 254 + jae setzero + mov rax, [q] + sub rax, [rdx+8] + + mov rax, [q+ 8] + sbb rax, [rdx + 16] + jnz setzero + + mov rax, [q+ 16] + sbb rax, [rdx + 24] + jnz setzero + + mov rax, [q+ 24] + sbb rax, [rdx + 32] + jnz setzero + + mov rdx, rcx + jmp do_shl + +tmp_112: + cmp ecx, 0 + jl tmp_115 + cmp ecx, 254 + jae setzero + movsx rdx, ecx + jmp do_shr +tmp_115: + neg ecx + cmp ecx, 254 + jae setzero + movsx rdx, ecx + jmp do_shl + + + + +;;;;;;;;;;;;;;;;;;;;;; +; shl +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_shl: + push rbp + push rsi + push rdi + push rdx + mov rbp, rsp + + + + + + mov rcx, [rdx] + bt rcx, 63 ; Check if is short second operand + jnc tmp_116 + + ; long 2 + bt rcx, 62 ; Check if is montgomery second operand + jnc tmp_117 + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + +tmp_117: + mov rcx, [rdx + 8] + cmp rcx, 254 + jae tmp_118 + xor rax, rax + + cmp [rdx + 16], rax + jnz tmp_118 + + cmp [rdx + 24], rax + jnz tmp_118 + + cmp [rdx + 32], rax + jnz tmp_118 + + mov rdx, rcx + jmp do_shl + +tmp_118: + mov rcx, [q] + sub rcx, [rdx+8] + cmp rcx, 254 + jae setzero + mov rax, [q] + sub rax, [rdx+8] + + mov rax, [q+ 8] + sbb rax, [rdx + 16] + jnz setzero + + mov rax, [q+ 16] + sbb rax, [rdx + 24] + jnz setzero + + mov rax, [q+ 24] + sbb rax, [rdx + 32] + jnz setzero + + mov rdx, rcx + jmp do_shr + +tmp_116: + cmp ecx, 0 + jl tmp_119 + cmp ecx, 254 + jae setzero + movsx rdx, ecx + jmp do_shl +tmp_119: + neg ecx + cmp ecx, 254 + jae setzero + movsx rdx, ecx + jmp do_shr + + + +;;;;;;;;;; +;;; doShl +;;;;;;;;;; +do_shl: + mov rcx, [rsi] + bt rcx, 63 ; Check if is short second operand + jc do_shll +do_shls: + + movsx rax, ecx + cmp rax, 0 + jz setzero; + jl do_shlcl + + cmp rdx, 31 + jae do_shlcl + + mov cl, dl + shl rax, cl + mov rcx, rax + shr rcx, 31 + jnz do_shlcl + mov [rdi], rax + mov rsp, rbp + pop rdx + pop rdi + pop rsi + pop rbp + ret + +do_shlcl: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp do_shlln + +do_shll: + bt rcx, 62 ; Check if is short second operand + jnc do_shlln + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + +do_shlln: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + add rdi, 8 + add rsi, 8 + call rawShl + mov rsp, rbp + pop rdx + pop rdi + pop rsi + pop rbp + ret + + +;;;;;;;;;; +;;; doShr +;;;;;;;;;; +do_shr: + mov rcx, [rsi] + bt rcx, 63 ; Check if is short second operand + jc do_shrl +do_shrs: + movsx rax, ecx + cmp rax, 0 + jz setzero; + jl do_shrcl + + cmp rdx, 31 + jae setzero + + mov cl, dl + shr rax, cl + mov [rdi], rax + mov rsp, rbp + pop rdx + pop rdi + pop rsi + pop rbp + ret + +do_shrcl: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + +do_shrl: + bt rcx, 62 ; Check if is short second operand + jnc do_shrln + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + +do_shrln: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + add rdi, 8 + add rsi, 8 + call rawShr + mov rsp, rbp + pop rdx + pop rdi + pop rsi + pop rbp + ret + +setzero: + xor rax, rax + mov [rdi], rax + mov rsp, rbp + pop rdx + pop rdi + pop rsi + pop rbp + ret + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; rgt - Raw Greater Than +;;;;;;;;;;;;;;;;;;;;;; +; returns in ax 1 id *rsi > *rdx +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rax <= Return 1 or 0 +; Modified Registers: +; r8, r9, rax +;;;;;;;;;;;;;;;;;;;;;; +Fr_rgt: + push rbp + push rsi + push rdx + mov rbp, rsp + mov r8, [rsi] + mov r9, [rdx] + bt r8, 63 ; Check if is short first operand + jc rgt_l1 + bt r9, 63 ; Check if is short second operand + jc rgt_s1l2 + +rgt_s1s2: ; Both operands are short + cmp r8d, r9d + jg rgt_ret1 + jmp rgt_ret0 + + +rgt_l1: + bt r9, 63 ; Check if is short second operand + jc rgt_l1l2 + +;;;;;;;; +rgt_l1s2: + bt r8, 62 ; check if montgomery first + jc rgt_l1ms2 +rgt_l1ns2: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rgtL1L2 + +rgt_l1ms2: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rgtL1L2 + + +;;;;;;;; +rgt_s1l2: + bt r9, 62 ; check if montgomery second + jc rgt_s1l2m +rgt_s1l2n: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rgtL1L2 + +rgt_s1l2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rgtL1L2 + +;;;; +rgt_l1l2: + bt r8, 62 ; check if montgomery first + jc rgt_l1ml2 +rgt_l1nl2: + bt r9, 62 ; check if montgomery second + jc rgt_l1nl2m +rgt_l1nl2n: + jmp rgtL1L2 + +rgt_l1nl2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rgtL1L2 + +rgt_l1ml2: + bt r9, 62 ; check if montgomery second + jc rgt_l1ml2m +rgt_l1ml2n: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rgtL1L2 + +rgt_l1ml2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rgtL1L2 + + +;;;;;; +; rgtL1L2 +;;;;;; + +rgtL1L2: + + + mov rax, [rsi + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc rgtl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rsi + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc rgtl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rsi + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc rgtl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rsi + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rgtl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jmp rgtl1l2_p1 + + + +rgtl1l2_p1: + + + mov rax, [rdx + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc rgt_ret1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc rgt_ret1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc rgt_ret1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rgt_ret1 ; half e1-e2 is neg => e1 < e2 + + jmp rgtRawL1L2 + + + + +rgtl1l2_n1: + + + mov rax, [rdx + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc rgtRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jnz rgt_ret0 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc rgtRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jnz rgt_ret0 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc rgtRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jnz rgt_ret0 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rgtRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jmp rgt_ret0 + + + + + +rgtRawL1L2: + + mov rax, [rsi + 32] + cmp [rdx + 32], rax ; comare with (q-1)/2 + jc rgt_ret1 ; rsi 1st > 2nd + + jnz rgt_ret0 + + + mov rax, [rsi + 24] + cmp [rdx + 24], rax ; comare with (q-1)/2 + jc rgt_ret1 ; rsi 1st > 2nd + + jnz rgt_ret0 + + + mov rax, [rsi + 16] + cmp [rdx + 16], rax ; comare with (q-1)/2 + jc rgt_ret1 ; rsi 1st > 2nd + + jnz rgt_ret0 + + + mov rax, [rsi + 8] + cmp [rdx + 8], rax ; comare with (q-1)/2 + jc rgt_ret1 ; rsi 1st > 2nd + + + +rgt_ret0: + xor rax, rax + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret +rgt_ret1: + mov rax, 1 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; rlt - Raw Less Than +;;;;;;;;;;;;;;;;;;;;;; +; returns in ax 1 id *rsi > *rdx +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rax <= Return 1 or 0 +; Modified Registers: +; r8, r9, rax +;;;;;;;;;;;;;;;;;;;;;; +Fr_rlt: + push rbp + push rsi + push rdx + mov rbp, rsp + mov r8, [rsi] + mov r9, [rdx] + bt r8, 63 ; Check if is short first operand + jc rlt_l1 + bt r9, 63 ; Check if is short second operand + jc rlt_s1l2 + +rlt_s1s2: ; Both operands are short + cmp r8d, r9d + jl rlt_ret1 + jmp rlt_ret0 + + +rlt_l1: + bt r9, 63 ; Check if is short second operand + jc rlt_l1l2 + +;;;;;;;; +rlt_l1s2: + bt r8, 62 ; check if montgomery first + jc rlt_l1ms2 +rlt_l1ns2: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rltL1L2 + +rlt_l1ms2: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rltL1L2 + + +;;;;;;;; +rlt_s1l2: + bt r9, 62 ; check if montgomery second + jc rlt_s1l2m +rlt_s1l2n: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rltL1L2 + +rlt_s1l2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rltL1L2 + +;;;; +rlt_l1l2: + bt r8, 62 ; check if montgomery first + jc rlt_l1ml2 +rlt_l1nl2: + bt r9, 62 ; check if montgomery second + jc rlt_l1nl2m +rlt_l1nl2n: + jmp rltL1L2 + +rlt_l1nl2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rltL1L2 + +rlt_l1ml2: + bt r9, 62 ; check if montgomery second + jc rlt_l1ml2m +rlt_l1ml2n: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp rltL1L2 + +rlt_l1ml2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toNormal + mov rsi, rdi + pop rdi + pop rdx + + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp rltL1L2 + + +;;;;;; +; rltL1L2 +;;;;;; + +rltL1L2: + + + mov rax, [rsi + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc rltl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jnz rltl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rsi + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc rltl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jnz rltl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rsi + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc rltl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jnz rltl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rsi + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rltl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jmp rltl1l2_p1 + + + +rltl1l2_p1: + + + mov rax, [rdx + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc rlt_ret0 ; half e1-e2 is neg => e1 < e2 + + jnz rltRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc rlt_ret0 ; half e1-e2 is neg => e1 < e2 + + jnz rltRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc rlt_ret0 ; half e1-e2 is neg => e1 < e2 + + jnz rltRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rlt_ret0 ; half e1-e2 is neg => e1 < e2 + + jmp rltRawL1L2 + + + + +rltl1l2_n1: + + + mov rax, [rdx + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc rltRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jnz rlt_ret1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc rltRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jnz rlt_ret1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc rltRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jnz rlt_ret1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rltRawL1L2 ; half e1-e2 is neg => e1 < e2 + + jmp rlt_ret1 + + + + + +rltRawL1L2: + + mov rax, [rsi + 32] + cmp [rdx + 32], rax ; comare with (q-1)/2 + jc rlt_ret0 ; rsi 1st > 2nd + jnz rlt_ret1 + + mov rax, [rsi + 24] + cmp [rdx + 24], rax ; comare with (q-1)/2 + jc rlt_ret0 ; rsi 1st > 2nd + jnz rlt_ret1 + + mov rax, [rsi + 16] + cmp [rdx + 16], rax ; comare with (q-1)/2 + jc rlt_ret0 ; rsi 1st > 2nd + jnz rlt_ret1 + + mov rax, [rsi + 8] + cmp [rdx + 8], rax ; comare with (q-1)/2 + jc rlt_ret0 ; rsi 1st > 2nd + jnz rlt_ret1 + + +rlt_ret0: + xor rax, rax + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret +rlt_ret1: + mov rax, 1 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; req - Raw Eq +;;;;;;;;;;;;;;;;;;;;;; +; returns in ax 1 id *rsi == *rdx +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rax <= Return 1 or 0 +; Modified Registers: +; r8, r9, rax +;;;;;;;;;;;;;;;;;;;;;; +Fr_req: + push rbp + push rsi + push rdx + mov rbp, rsp + mov r8, [rsi] + mov r9, [rdx] + bt r8, 63 ; Check if is short first operand + jc req_l1 + bt r9, 63 ; Check if is short second operand + jc req_s1l2 + +req_s1s2: ; Both operands are short + cmp r8d, r9d + je req_ret1 + jmp req_ret0 + + +req_l1: + bt r9, 63 ; Check if is short second operand + jc req_l1l2 + +;;;;;;;; +req_l1s2: + bt r8, 62 ; check if montgomery first + jc req_l1ms2 +req_l1ns2: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toLongNormal + mov rdx, rdi + pop rdi + pop rsi + + jmp reqL1L2 + +req_l1ms2: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + jmp reqL1L2 + + +;;;;;;;; +req_s1l2: + bt r9, 62 ; check if montgomery second + jc req_s1l2m +req_s1l2n: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toLongNormal + mov rsi, rdi + pop rdi + pop rdx + + jmp reqL1L2 + +req_s1l2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + jmp reqL1L2 + +;;;; +req_l1l2: + bt r8, 62 ; check if montgomery first + jc req_l1ml2 +req_l1nl2: + bt r9, 62 ; check if montgomery second + jc req_l1nl2m +req_l1nl2n: + jmp reqL1L2 + +req_l1nl2m: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rdx + push r8 + call Fr_toMontgomery + mov rsi, rdi + pop rdi + pop rdx + + jmp reqL1L2 + +req_l1ml2: + bt r9, 62 ; check if montgomery second + jc req_l1ml2m +req_l1ml2n: + + mov r8, rdi + sub rsp, 40 + mov rdi, rsp + push rsi + mov rsi, rdx + push r8 + call Fr_toMontgomery + mov rdx, rdi + pop rdi + pop rsi + + jmp reqL1L2 + +req_l1ml2m: + jmp reqL1L2 + + +;;;;;; +; eqL1L2 +;;;;;; + +reqL1L2: + + mov rax, [rsi + 8] + cmp [rdx + 8], rax + jne req_ret0 ; rsi 1st > 2nd + + mov rax, [rsi + 16] + cmp [rdx + 16], rax + jne req_ret0 ; rsi 1st > 2nd + + mov rax, [rsi + 24] + cmp [rdx + 24], rax + jne req_ret0 ; rsi 1st > 2nd + + mov rax, [rsi + 32] + cmp [rdx + 32], rax + jne req_ret0 ; rsi 1st > 2nd + + +req_ret1: + mov rax, 1 + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + +req_ret0: + xor rax, rax + mov rsp, rbp + pop rdx + pop rsi + pop rbp + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; gt +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_gt: + call Fr_rgt + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; lt +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_lt: + call Fr_rlt + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; eq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_eq: + call Fr_req + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; neq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_neq: + call Fr_req + xor rax, 1 + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; geq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_geq: + call Fr_rlt + xor rax, 1 + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; leq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_leq: + call Fr_rgt + xor rax, 1 + mov [rdi], rax + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawIsEq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rdi <= Pointer to element 1 +; rsi <= Pointer to element 2 +; Returns +; ax <= 1 if are equal 0, otherwise +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +Fr_rawIsEq: + + mov rax, [rsi + 0] + cmp [rdi + 0], rax + jne rawIsEq_ret0 + + mov rax, [rsi + 8] + cmp [rdi + 8], rax + jne rawIsEq_ret0 + + mov rax, [rsi + 16] + cmp [rdi + 16], rax + jne rawIsEq_ret0 + + mov rax, [rsi + 24] + cmp [rdi + 24], rax + jne rawIsEq_ret0 + +rawIsEq_ret1: + mov rax, 1 + ret + +rawIsEq_ret0: + xor rax, rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawIsZero +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rdi <= Pointer to element 1 +; Returns +; ax <= 1 if is 0, otherwise +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +Fr_rawIsZero: + + cmp qword [rdi + 0], $0 + jne rawIsZero_ret0 + + cmp qword [rdi + 8], $0 + jne rawIsZero_ret0 + + cmp qword [rdi + 16], $0 + jne rawIsZero_ret0 + + cmp qword [rdi + 24], $0 + jne rawIsZero_ret0 + + +rawIsZero_ret1: + mov rax, 1 + ret + +rawIsZero_ret0: + xor rax, rax + ret + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; land +;;;;;;;;;;;;;;;;;;;;;; +; Logical and between two elements +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result zero or one +; Modified Registers: +; rax, rcx, r8 +;;;;;;;;;;;;;;;;;;;;;; +Fr_land: + + + + + + + mov rax, [rsi] + bt rax, 63 + jc tmp_120 + + test eax, eax + jz retZero_122 + jmp retOne_121 + +tmp_120: + + mov rax, [rsi + 8] + test rax, rax + jnz retOne_121 + + mov rax, [rsi + 16] + test rax, rax + jnz retOne_121 + + mov rax, [rsi + 24] + test rax, rax + jnz retOne_121 + + mov rax, [rsi + 32] + test rax, rax + jnz retOne_121 + + +retZero_122: + mov qword r8, 0 + jmp done_123 + +retOne_121: + mov qword r8, 1 + +done_123: + + + + + + + + mov rax, [rdx] + bt rax, 63 + jc tmp_124 + + test eax, eax + jz retZero_126 + jmp retOne_125 + +tmp_124: + + mov rax, [rdx + 8] + test rax, rax + jnz retOne_125 + + mov rax, [rdx + 16] + test rax, rax + jnz retOne_125 + + mov rax, [rdx + 24] + test rax, rax + jnz retOne_125 + + mov rax, [rdx + 32] + test rax, rax + jnz retOne_125 + + +retZero_126: + mov qword rcx, 0 + jmp done_127 + +retOne_125: + mov qword rcx, 1 + +done_127: + + and rcx, r8 + mov [rdi], rcx + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; lor +;;;;;;;;;;;;;;;;;;;;;; +; Logical or between two elements +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result zero or one +; Modified Registers: +; rax, rcx, r8 +;;;;;;;;;;;;;;;;;;;;;; +Fr_lor: + + + + + + + mov rax, [rsi] + bt rax, 63 + jc tmp_128 + + test eax, eax + jz retZero_130 + jmp retOne_129 + +tmp_128: + + mov rax, [rsi + 8] + test rax, rax + jnz retOne_129 + + mov rax, [rsi + 16] + test rax, rax + jnz retOne_129 + + mov rax, [rsi + 24] + test rax, rax + jnz retOne_129 + + mov rax, [rsi + 32] + test rax, rax + jnz retOne_129 + + +retZero_130: + mov qword r8, 0 + jmp done_131 + +retOne_129: + mov qword r8, 1 + +done_131: + + + + + + + + mov rax, [rdx] + bt rax, 63 + jc tmp_132 + + test eax, eax + jz retZero_134 + jmp retOne_133 + +tmp_132: + + mov rax, [rdx + 8] + test rax, rax + jnz retOne_133 + + mov rax, [rdx + 16] + test rax, rax + jnz retOne_133 + + mov rax, [rdx + 24] + test rax, rax + jnz retOne_133 + + mov rax, [rdx + 32] + test rax, rax + jnz retOne_133 + + +retZero_134: + mov qword rcx, 0 + jmp done_135 + +retOne_133: + mov qword rcx, 1 + +done_135: + + or rcx, r8 + mov [rdi], rcx + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; lnot +;;;;;;;;;;;;;;;;;;;;;; +; Do the logical not of an element +; Params: +; rsi <= Pointer to element to be tested +; rdi <= Pointer to result one if element1 is zero and zero otherwise +; Modified Registers: +; rax, rax, r8 +;;;;;;;;;;;;;;;;;;;;;; +Fr_lnot: + + + + + + + mov rax, [rsi] + bt rax, 63 + jc tmp_136 + + test eax, eax + jz retZero_138 + jmp retOne_137 + +tmp_136: + + mov rax, [rsi + 8] + test rax, rax + jnz retOne_137 + + mov rax, [rsi + 16] + test rax, rax + jnz retOne_137 + + mov rax, [rsi + 24] + test rax, rax + jnz retOne_137 + + mov rax, [rsi + 32] + test rax, rax + jnz retOne_137 + + +retZero_138: + mov qword rcx, 0 + jmp done_139 + +retOne_137: + mov qword rcx, 1 + +done_139: + + test rcx, rcx + + jz lnot_retOne +lnot_retZero: + mov qword [rdi], 0 + ret +lnot_retOne: + mov qword [rdi], 1 + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; isTrue +;;;;;;;;;;;;;;;;;;;;;; +; Convert a 64 bit integer to a long format field element +; Params: +; rsi <= Pointer to the element +; Returs: +; rax <= 1 if true 0 if false +;;;;;;;;;;;;;;;;;;;;;;; +Fr_isTrue: + + + + + + + mov rax, [rdi] + bt rax, 63 + jc tmp_140 + + test eax, eax + jz retZero_142 + jmp retOne_141 + +tmp_140: + + mov rax, [rdi + 8] + test rax, rax + jnz retOne_141 + + mov rax, [rdi + 16] + test rax, rax + jnz retOne_141 + + mov rax, [rdi + 24] + test rax, rax + jnz retOne_141 + + mov rax, [rdi + 32] + test rax, rax + jnz retOne_141 + + +retZero_142: + mov qword rax, 0 + jmp done_143 + +retOne_141: + mov qword rax, 1 + +done_143: + + ret + + + + + + section .data +Fr_q: + dd 0 + dd 0x80000000 +Fr_rawq: +q dq 0x43e1f593f0000001,0x2833e84879b97091,0xb85045b68181585d,0x30644e72e131a029 +half dq 0xa1f0fac9f8000000,0x9419f4243cdcb848,0xdc2822db40c0ac2e,0x183227397098d014 +R2 dq 0x1bb8e645ae216da7,0x53fe3ab1e35c59e3,0x8c49833d53bb8085,0x0216d0b17f4e44a5 +Fr_R3: + dd 0 + dd 0x80000000 +Fr_rawR3: +R3 dq 0x5e94d8e1b4bf0040,0x2a489cbe1cfbb6b8,0x893cc664a19fcfed,0x0cf8594b7fcc657c +lboMask dq 0x3fffffffffffffff +np dq 0xc2e1f593efffffff + diff --git a/examples/toy/toy_cpp/fr.cpp b/examples/toy/bn254/toy_cpp/fr.cpp similarity index 84% rename from examples/toy/toy_cpp/fr.cpp rename to examples/toy/bn254/toy_cpp/fr.cpp index 78e8567..14864de 100644 --- a/examples/toy/toy_cpp/fr.cpp +++ b/examples/toy/bn254/toy_cpp/fr.cpp @@ -53,7 +53,7 @@ bool Fr_init() { return true; } -void Fr_str2element(PFrElement pE, char const *s, int base) { +void Fr_str2element(PFrElement pE, char const *s, uint base) { mpz_t mr; mpz_init_set_str(mr, s, base); mpz_fdiv_r(mr, mr, q); @@ -169,8 +169,8 @@ void Fr_fail() { RawFr::RawFr() { Fr_init(); - fromString(fZero, "0"); - fromString(fOne, "1"); + set(fZero, 0); + set(fOne, 1); neg(fNegOne, fOne); } @@ -197,7 +197,27 @@ void RawFr::fromUI(Element &r, unsigned long int v) { mpz_clear(mr); } +RawFr::Element RawFr::set(int value) { + Element r; + set(r, value); + return r; +} +void RawFr::set(Element &r, int value) { + mpz_t mr; + mpz_init(mr); + mpz_set_si(mr, value); + if (value < 0) { + mpz_add(mr, mr, q); + } + + mpz_export((void *)(r.v), NULL, -1, 8, -1, 0, mr); + + for (int i=0; i +#include +#include + +#ifdef __APPLE__ +#include // typedef unsigned int uint; +#endif // __APPLE__ + +#define Fr_N64 4 +#define Fr_SHORT 0x00000000 +#define Fr_LONG 0x80000000 +#define Fr_LONGMONTGOMERY 0xC0000000 +typedef uint64_t FrRawElement[Fr_N64]; +typedef struct __attribute__((__packed__)) { + int32_t shortVal; + uint32_t type; + FrRawElement longVal; +} FrElement; +typedef FrElement *PFrElement; +extern FrElement Fr_q; +extern FrElement Fr_R3; +extern FrRawElement Fr_rawq; +extern FrRawElement Fr_rawR3; + +extern "C" void Fr_copy(PFrElement r, PFrElement a); +extern "C" void Fr_copyn(PFrElement r, PFrElement a, int n); +extern "C" void Fr_add(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_sub(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_neg(PFrElement r, PFrElement a); +extern "C" void Fr_mul(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_square(PFrElement r, PFrElement a); +extern "C" void Fr_band(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_bor(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_bxor(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_bnot(PFrElement r, PFrElement a); +extern "C" void Fr_shl(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_shr(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_eq(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_neq(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_lt(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_gt(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_leq(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_geq(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_land(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_lor(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_lnot(PFrElement r, PFrElement a); +extern "C" void Fr_toNormal(PFrElement r, PFrElement a); +extern "C" void Fr_toLongNormal(PFrElement r, PFrElement a); +extern "C" void Fr_toMontgomery(PFrElement r, PFrElement a); + +extern "C" int Fr_isTrue(PFrElement pE); +extern "C" int Fr_toInt(PFrElement pE); + +extern "C" void Fr_rawCopy(FrRawElement pRawResult, const FrRawElement pRawA); +extern "C" void Fr_rawSwap(FrRawElement pRawResult, FrRawElement pRawA); +extern "C" void Fr_rawAdd(FrRawElement pRawResult, const FrRawElement pRawA, const FrRawElement pRawB); +extern "C" void Fr_rawSub(FrRawElement pRawResult, const FrRawElement pRawA, const FrRawElement pRawB); +extern "C" void Fr_rawNeg(FrRawElement pRawResult, const FrRawElement pRawA); +extern "C" void Fr_rawMMul(FrRawElement pRawResult, const FrRawElement pRawA, const FrRawElement pRawB); +extern "C" void Fr_rawMSquare(FrRawElement pRawResult, const FrRawElement pRawA); +extern "C" void Fr_rawMMul1(FrRawElement pRawResult, const FrRawElement pRawA, uint64_t pRawB); +extern "C" void Fr_rawToMontgomery(FrRawElement pRawResult, const FrRawElement &pRawA); +extern "C" void Fr_rawFromMontgomery(FrRawElement pRawResult, const FrRawElement &pRawA); +extern "C" int Fr_rawIsEq(const FrRawElement pRawA, const FrRawElement pRawB); +extern "C" int Fr_rawIsZero(const FrRawElement pRawB); + +extern "C" void Fr_fail(); + + +// Pending functions to convert + +void Fr_str2element(PFrElement pE, char const*s, uint base); +char *Fr_element2str(PFrElement pE); +void Fr_idiv(PFrElement r, PFrElement a, PFrElement b); +void Fr_mod(PFrElement r, PFrElement a, PFrElement b); +void Fr_inv(PFrElement r, PFrElement a); +void Fr_div(PFrElement r, PFrElement a, PFrElement b); +void Fr_pow(PFrElement r, PFrElement a, PFrElement b); + +class RawFr { + +public: + const static int N64 = Fr_N64; + const static int MaxBits = 254; + + + struct Element { + FrRawElement v; + }; + +private: + Element fZero; + Element fOne; + Element fNegOne; + +public: + + RawFr(); + ~RawFr(); + + const Element &zero() { return fZero; }; + const Element &one() { return fOne; }; + const Element &negOne() { return fNegOne; }; + Element set(int value); + void set(Element &r, int value); + + void fromString(Element &r, const std::string &n, uint32_t radix = 10); + std::string toString(const Element &a, uint32_t radix = 10); + + void inline copy(Element &r, const Element &a) { Fr_rawCopy(r.v, a.v); }; + void inline swap(Element &a, Element &b) { Fr_rawSwap(a.v, b.v); }; + void inline add(Element &r, const Element &a, const Element &b) { Fr_rawAdd(r.v, a.v, b.v); }; + void inline sub(Element &r, const Element &a, const Element &b) { Fr_rawSub(r.v, a.v, b.v); }; + void inline mul(Element &r, const Element &a, const Element &b) { Fr_rawMMul(r.v, a.v, b.v); }; + + Element inline add(const Element &a, const Element &b) { Element r; Fr_rawAdd(r.v, a.v, b.v); return r;}; + Element inline sub(const Element &a, const Element &b) { Element r; Fr_rawSub(r.v, a.v, b.v); return r;}; + Element inline mul(const Element &a, const Element &b) { Element r; Fr_rawMMul(r.v, a.v, b.v); return r;}; + + Element inline neg(const Element &a) { Element r; Fr_rawNeg(r.v, a.v); return r; }; + Element inline square(const Element &a) { Element r; Fr_rawMSquare(r.v, a.v); return r; }; + + Element inline add(int a, const Element &b) { return add(set(a), b);}; + Element inline sub(int a, const Element &b) { return sub(set(a), b);}; + Element inline mul(int a, const Element &b) { return mul(set(a), b);}; + + Element inline add(const Element &a, int b) { return add(a, set(b));}; + Element inline sub(const Element &a, int b) { return sub(a, set(b));}; + Element inline mul(const Element &a, int b) { return mul(a, set(b));}; + + void inline mul1(Element &r, const Element &a, uint64_t b) { Fr_rawMMul1(r.v, a.v, b); }; + void inline neg(Element &r, const Element &a) { Fr_rawNeg(r.v, a.v); }; + void inline square(Element &r, const Element &a) { Fr_rawMSquare(r.v, a.v); }; + void inv(Element &r, const Element &a); + void div(Element &r, const Element &a, const Element &b); + void exp(Element &r, const Element &base, uint8_t* scalar, unsigned int scalarSize); + + void inline toMontgomery(Element &r, const Element &a) { Fr_rawToMontgomery(r.v, a.v); }; + void inline fromMontgomery(Element &r, const Element &a) { Fr_rawFromMontgomery(r.v, a.v); }; + int inline eq(const Element &a, const Element &b) { return Fr_rawIsEq(a.v, b.v); }; + int inline isZero(const Element &a) { return Fr_rawIsZero(a.v); }; + + void toMpz(mpz_t r, const Element &a); + void fromMpz(Element &a, const mpz_t r); + + int toRprBE(const Element &element, uint8_t *data, int bytes); + int fromRprBE(Element &element, const uint8_t *data, int bytes); + + int bytes ( void ) { return Fr_N64 * 8; }; + + void fromUI(Element &r, unsigned long int v); + + static RawFr field; + +}; + + +#endif // __FR_H + + + diff --git a/examples/toy/bn254/toy_cpp/fr.o b/examples/toy/bn254/toy_cpp/fr.o new file mode 100644 index 0000000..d6fc248 Binary files /dev/null and b/examples/toy/bn254/toy_cpp/fr.o differ diff --git a/examples/toy/bn254/toy_cpp/fr_asm.o b/examples/toy/bn254/toy_cpp/fr_asm.o new file mode 100644 index 0000000..f1e71ad Binary files /dev/null and b/examples/toy/bn254/toy_cpp/fr_asm.o differ diff --git a/examples/toy/toy_cpp/main.cpp b/examples/toy/bn254/toy_cpp/main.cpp similarity index 100% rename from examples/toy/toy_cpp/main.cpp rename to examples/toy/bn254/toy_cpp/main.cpp diff --git a/examples/toy/bn254/toy_cpp/main.o b/examples/toy/bn254/toy_cpp/main.o new file mode 100644 index 0000000..c95e528 Binary files /dev/null and b/examples/toy/bn254/toy_cpp/main.o differ diff --git a/examples/toy/bn254/toy_cpp/toy b/examples/toy/bn254/toy_cpp/toy new file mode 100755 index 0000000..4b01078 Binary files /dev/null and b/examples/toy/bn254/toy_cpp/toy differ diff --git a/examples/toy/toy_cpp/toy.cpp b/examples/toy/bn254/toy_cpp/toy.cpp similarity index 84% rename from examples/toy/toy_cpp/toy.cpp rename to examples/toy/bn254/toy_cpp/toy.cpp index 4146f63..4fc4df5 100644 --- a/examples/toy/toy_cpp/toy.cpp +++ b/examples/toy/bn254/toy_cpp/toy.cpp @@ -29,17 +29,23 @@ void release_memory_component(Circom_CalcWit* ctx, uint pos) {{ if (pos != 0){{ -delete ctx->componentMemory[pos].subcomponents; +if(ctx->componentMemory[pos].subcomponents) +delete []ctx->componentMemory[pos].subcomponents; -delete ctx->componentMemory[pos].subcomponentsParallel; +if(ctx->componentMemory[pos].subcomponentsParallel) +delete []ctx->componentMemory[pos].subcomponentsParallel; -delete ctx->componentMemory[pos].outputIsSet; +if(ctx->componentMemory[pos].outputIsSet) +delete []ctx->componentMemory[pos].outputIsSet; -delete ctx->componentMemory[pos].mutexes; +if(ctx->componentMemory[pos].mutexes) +delete []ctx->componentMemory[pos].mutexes; -delete ctx->componentMemory[pos].cvs; +if(ctx->componentMemory[pos].cvs) +delete []ctx->componentMemory[pos].cvs; -delete ctx->componentMemory[pos].sbct; +if(ctx->componentMemory[pos].sbct) +delete []ctx->componentMemory[pos].sbct; }} diff --git a/examples/toy/bn254/toy_cpp/toy.dat b/examples/toy/bn254/toy_cpp/toy.dat new file mode 100644 index 0000000..3bf0e7e Binary files /dev/null and b/examples/toy/bn254/toy_cpp/toy.dat differ diff --git a/examples/toy/bn254/toy_cpp/toy.o b/examples/toy/bn254/toy_cpp/toy.o new file mode 100644 index 0000000..12a727f Binary files /dev/null and b/examples/toy/bn254/toy_cpp/toy.o differ diff --git a/examples/toy/toy_js/generate_witness.js b/examples/toy/bn254/toy_js/generate_witness.js similarity index 100% rename from examples/toy/toy_js/generate_witness.js rename to examples/toy/bn254/toy_js/generate_witness.js diff --git a/examples/toy/bn254/toy_js/toy.wasm b/examples/toy/bn254/toy_js/toy.wasm new file mode 100644 index 0000000..9a9fc78 Binary files /dev/null and b/examples/toy/bn254/toy_js/toy.wasm differ diff --git a/examples/toy/toy_js/witness_calculator.js b/examples/toy/bn254/toy_js/witness_calculator.js similarity index 100% rename from examples/toy/toy_js/witness_calculator.js rename to examples/toy/bn254/toy_js/witness_calculator.js diff --git a/examples/toy/compile.sh b/examples/toy/compile.sh index 9d525df..56f86ed 100644 --- a/examples/toy/compile.sh +++ b/examples/toy/compile.sh @@ -1,4 +1,8 @@ #!/bin/bash -circom ./examples/toy/toy.circom --r1cs --wasm --sym --c --output ./examples/toy/ --prime vesta -cd examples/toy/toy_cpp && make \ No newline at end of file +circom ./examples/toy/toy.circom --r1cs --wasm --sym --c --output ./examples/toy/pasta/ --prime vesta +cd examples/toy/pasta/toy_cpp && make +cd - + +circom ./examples/toy/toy.circom --r1cs --wasm --sym --c --output ./examples/toy/bn254/ --prime bn128 +cd examples/toy/bn254/toy_cpp && make \ No newline at end of file diff --git a/examples/toy/toy.r1cs b/examples/toy/pasta/toy.r1cs similarity index 100% rename from examples/toy/toy.r1cs rename to examples/toy/pasta/toy.r1cs diff --git a/examples/toy/pasta/toy.sym b/examples/toy/pasta/toy.sym new file mode 100644 index 0000000..e346cd4 --- /dev/null +++ b/examples/toy/pasta/toy.sym @@ -0,0 +1,5 @@ +1,1,0,main.step_out[0] +2,2,0,main.step_out[1] +3,3,0,main.step_in[0] +4,4,0,main.step_in[1] +5,-1,0,main.adder diff --git a/examples/toy/pasta/toy_cpp/Makefile b/examples/toy/pasta/toy_cpp/Makefile new file mode 100644 index 0000000..de1a5e7 --- /dev/null +++ b/examples/toy/pasta/toy_cpp/Makefile @@ -0,0 +1,22 @@ +CC=g++ +CFLAGS=-std=c++11 -O3 -I. +DEPS_HPP = circom.hpp calcwit.hpp fr.hpp +DEPS_O = main.o calcwit.o fr.o fr_asm.o + +ifeq ($(shell uname),Darwin) + NASM=nasm -fmacho64 --prefix _ +endif +ifeq ($(shell uname),Linux) + NASM=nasm -felf64 +endif + +all: toy + +%.o: %.cpp $(DEPS_HPP) + $(CC) -c $< $(CFLAGS) + +fr_asm.o: fr.asm + $(NASM) fr.asm -o fr_asm.o + +toy: $(DEPS_O) toy.o + $(CC) -o toy *.o -lgmp diff --git a/examples/toy/pasta/toy_cpp/calcwit.cpp b/examples/toy/pasta/toy_cpp/calcwit.cpp new file mode 100644 index 0000000..949fea4 --- /dev/null +++ b/examples/toy/pasta/toy_cpp/calcwit.cpp @@ -0,0 +1,126 @@ +#include +#include +#include +#include "calcwit.hpp" + +extern void run(Circom_CalcWit* ctx); + +std::string int_to_hex( u64 i ) +{ + std::stringstream stream; + stream << "0x" + << std::setfill ('0') << std::setw(16) + << std::hex << i; + return stream.str(); +} + +u64 fnv1a(std::string s) { + u64 hash = 0xCBF29CE484222325LL; + for(char& c : s) { + hash ^= u64(c); + hash *= 0x100000001B3LL; + } + return hash; +} + +Circom_CalcWit::Circom_CalcWit (Circom_Circuit *aCircuit, uint maxTh) { + circuit = aCircuit; + inputSignalAssignedCounter = get_main_input_signal_no(); + inputSignalAssigned = new bool[inputSignalAssignedCounter]; + for (int i = 0; i< inputSignalAssignedCounter; i++) { + inputSignalAssigned[i] = false; + } + signalValues = new FrElement[get_total_signal_no()]; + Fr_str2element(&signalValues[0], "1", 10); + componentMemory = new Circom_Component[get_number_of_components()]; + circuitConstants = circuit ->circuitConstants; + templateInsId2IOSignalInfo = circuit -> templateInsId2IOSignalInfo; + + maxThread = maxTh; + + // parallelism + numThread = 0; + +} + +Circom_CalcWit::~Circom_CalcWit() { + // ... +} + +uint Circom_CalcWit::getInputSignalHashPosition(u64 h) { + uint n = get_size_of_input_hashmap(); + uint pos = (uint)(h % (u64)n); + if (circuit->InputHashMap[pos].hash!=h){ + uint inipos = pos; + pos++; + while (pos != inipos) { + if (circuit->InputHashMap[pos].hash==h) return pos; + if (circuit->InputHashMap[pos].hash==0) { + fprintf(stderr, "Signal not found\n"); + assert(false); + } + pos = (pos+1)%n; + } + fprintf(stderr, "Signals not found\n"); + assert(false); + } + return pos; +} + +void Circom_CalcWit::tryRunCircuit(){ + if (inputSignalAssignedCounter == 0) { + run(this); + } +} + +void Circom_CalcWit::setInputSignal(u64 h, uint i, FrElement & val){ + if (inputSignalAssignedCounter == 0) { + fprintf(stderr, "No more signals to be assigned\n"); + assert(false); + } + uint pos = getInputSignalHashPosition(h); + if (i >= circuit->InputHashMap[pos].signalsize) { + fprintf(stderr, "Input signal array access exceeds the size\n"); + assert(false); + } + + uint si = circuit->InputHashMap[pos].signalid+i; + if (inputSignalAssigned[si-get_main_input_signal_start()]) { + fprintf(stderr, "Signal assigned twice: %d\n", si); + assert(false); + } + signalValues[si] = val; + inputSignalAssigned[si-get_main_input_signal_start()] = true; + inputSignalAssignedCounter--; + tryRunCircuit(); +} + +u64 Circom_CalcWit::getInputSignalSize(u64 h) { + uint pos = getInputSignalHashPosition(h); + return circuit->InputHashMap[pos].signalsize; +} + +std::string Circom_CalcWit::getTrace(u64 id_cmp){ + if (id_cmp == 0) return componentMemory[id_cmp].componentName; + else{ + u64 id_father = componentMemory[id_cmp].idFather; + std::string my_name = componentMemory[id_cmp].componentName; + + return Circom_CalcWit::getTrace(id_father) + "." + my_name; + } + + +} + +std::string Circom_CalcWit::generate_position_array(uint* dimensions, uint size_dimensions, uint index){ + std::string positions = ""; + + for (uint i = 0 ; i < size_dimensions; i++){ + uint last_pos = index % dimensions[size_dimensions -1 - i]; + index = index / dimensions[size_dimensions -1 - i]; + std::string new_pos = "[" + std::to_string(last_pos) + "]"; + positions = new_pos + positions; + } + return positions; +} + diff --git a/examples/toy/pasta/toy_cpp/calcwit.hpp b/examples/toy/pasta/toy_cpp/calcwit.hpp new file mode 100644 index 0000000..363de21 --- /dev/null +++ b/examples/toy/pasta/toy_cpp/calcwit.hpp @@ -0,0 +1,69 @@ +#ifndef CIRCOM_CALCWIT_H +#define CIRCOM_CALCWIT_H + +#include +#include +#include +#include +#include + +#include "circom.hpp" +#include "fr.hpp" + +#define NMUTEXES 32 //512 + +u64 fnv1a(std::string s); + +class Circom_CalcWit { + + bool *inputSignalAssigned; + uint inputSignalAssignedCounter; + + Circom_Circuit *circuit; + +public: + + FrElement *signalValues; + Circom_Component* componentMemory; + FrElement* circuitConstants; + std::map templateInsId2IOSignalInfo; + std::string* listOfTemplateMessages; + + // parallelism + std::mutex numThreadMutex; + std::condition_variable ntcvs; + int numThread; + + int maxThread; + + // Functions called by the circuit + Circom_CalcWit(Circom_Circuit *aCircuit, uint numTh = NMUTEXES); + ~Circom_CalcWit(); + + // Public functions + void setInputSignal(u64 h, uint i, FrElement &val); + void tryRunCircuit(); + + u64 getInputSignalSize(u64 h); + + inline uint getRemaingInputsToBeSet() { + return inputSignalAssignedCounter; + } + + inline void getWitness(uint idx, PFrElement val) { + Fr_copy(val, &signalValues[circuit->witness2SignalList[idx]]); + } + + std::string getTrace(u64 id_cmp); + + std::string generate_position_array(uint* dimensions, uint size_dimensions, uint index); + +private: + + uint getInputSignalHashPosition(u64 h); + +}; + +typedef void (*Circom_TemplateFunction)(uint __cIdx, Circom_CalcWit* __ctx); + +#endif // CIRCOM_CALCWIT_H diff --git a/examples/toy/pasta/toy_cpp/calcwit.o b/examples/toy/pasta/toy_cpp/calcwit.o new file mode 100644 index 0000000..28d1cc3 Binary files /dev/null and b/examples/toy/pasta/toy_cpp/calcwit.o differ diff --git a/examples/toy/pasta/toy_cpp/circom.hpp b/examples/toy/pasta/toy_cpp/circom.hpp new file mode 100644 index 0000000..eabc686 --- /dev/null +++ b/examples/toy/pasta/toy_cpp/circom.hpp @@ -0,0 +1,85 @@ +#ifndef __CIRCOM_H +#define __CIRCOM_H + +#include +#include +#include +#include +#include + +#include "fr.hpp" + +typedef unsigned long long u64; +typedef uint32_t u32; +typedef uint8_t u8; + +//only for the main inputs +struct __attribute__((__packed__)) HashSignalInfo { + u64 hash; + u64 signalid; + u64 signalsize; +}; + +struct IODef { + u32 offset; + u32 len; + u32 *lengths; +}; + +struct IODefPair { + u32 len; + IODef* defs; +}; + +struct Circom_Circuit { + // const char *P; + HashSignalInfo* InputHashMap; + u64* witness2SignalList; + FrElement* circuitConstants; + std::map templateInsId2IOSignalInfo; +}; + + +struct Circom_Component { + u32 templateId; + u64 signalStart; + u32 inputCounter; + std::string templateName; + std::string componentName; + u64 idFather; + u32* subcomponents = NULL; + bool* subcomponentsParallel = NULL; + bool *outputIsSet = NULL; //one for each output + std::mutex *mutexes = NULL; //one for each output + std::condition_variable *cvs = NULL; + std::thread *sbct = NULL;//subcomponent threads +}; + +/* +For every template instantiation create two functions: +- name_create +- name_run + +//PFrElement: pointer to FrElement + +Every name_run or circom_function has: +===================================== + +//array of PFrElements for auxiliars in expression computation (known size); +PFrElements expaux[]; + +//array of PFrElements for local vars (known size) +PFrElements lvar[]; + +*/ + +uint get_main_input_signal_start(); +uint get_main_input_signal_no(); +uint get_total_signal_no(); +uint get_number_of_components(); +uint get_size_of_input_hashmap(); +uint get_size_of_witness(); +uint get_size_of_constants(); +uint get_size_of_io_map(); + +#endif // __CIRCOM_H diff --git a/examples/toy/toy_cpp/fr.asm b/examples/toy/pasta/toy_cpp/fr.asm similarity index 99% rename from examples/toy/toy_cpp/fr.asm rename to examples/toy/pasta/toy_cpp/fr.asm index d539042..0e180a5 100644 --- a/examples/toy/toy_cpp/fr.asm +++ b/examples/toy/pasta/toy_cpp/fr.asm @@ -65,6 +65,10 @@ + + + + ;;;;;;;;;;;;;;;;;;;;;; ; copy ;;;;;;;;;;;;;;;;;;;;;; @@ -284,15 +288,12 @@ Fr_long: jnc Fr_longNormal Fr_longMontgomery: - mov r8, rdi sub rsp, 40 + push rsi + mov rsi, rdi mov rdi, rsp - push rdx - push r8 call Fr_toNormal - mov rsi, rdi - pop rdi - pop rdx + pop rsi Fr_longNormal: diff --git a/examples/toy/pasta/toy_cpp/fr.cpp b/examples/toy/pasta/toy_cpp/fr.cpp new file mode 100644 index 0000000..14864de --- /dev/null +++ b/examples/toy/pasta/toy_cpp/fr.cpp @@ -0,0 +1,321 @@ +#include "fr.hpp" +#include +#include +#include +#include +#include + + +static mpz_t q; +static mpz_t zero; +static mpz_t one; +static mpz_t mask; +static size_t nBits; +static bool initialized = false; + + +void Fr_toMpz(mpz_t r, PFrElement pE) { + FrElement tmp; + Fr_toNormal(&tmp, pE); + if (!(tmp.type & Fr_LONG)) { + mpz_set_si(r, tmp.shortVal); + if (tmp.shortVal<0) { + mpz_add(r, r, q); + } + } else { + mpz_import(r, Fr_N64, -1, 8, -1, 0, (const void *)tmp.longVal); + } +} + +void Fr_fromMpz(PFrElement pE, mpz_t v) { + if (mpz_fits_sint_p(v)) { + pE->type = Fr_SHORT; + pE->shortVal = mpz_get_si(v); + } else { + pE->type = Fr_LONG; + for (int i=0; ilongVal[i] = 0; + mpz_export((void *)(pE->longVal), NULL, -1, 8, -1, 0, v); + } +} + + +bool Fr_init() { + if (initialized) return false; + initialized = true; + mpz_init(q); + mpz_import(q, Fr_N64, -1, 8, -1, 0, (const void *)Fr_q.longVal); + mpz_init_set_ui(zero, 0); + mpz_init_set_ui(one, 1); + nBits = mpz_sizeinbase (q, 2); + mpz_init(mask); + mpz_mul_2exp(mask, one, nBits); + mpz_sub(mask, mask, one); + return true; +} + +void Fr_str2element(PFrElement pE, char const *s, uint base) { + mpz_t mr; + mpz_init_set_str(mr, s, base); + mpz_fdiv_r(mr, mr, q); + Fr_fromMpz(pE, mr); + mpz_clear(mr); +} + +char *Fr_element2str(PFrElement pE) { + FrElement tmp; + mpz_t r; + if (!(pE->type & Fr_LONG)) { + if (pE->shortVal>=0) { + char *r = new char[32]; + sprintf(r, "%d", pE->shortVal); + return r; + } else { + mpz_init_set_si(r, pE->shortVal); + mpz_add(r, r, q); + } + } else { + Fr_toNormal(&tmp, pE); + mpz_init(r); + mpz_import(r, Fr_N64, -1, 8, -1, 0, (const void *)tmp.longVal); + } + char *res = mpz_get_str (0, 10, r); + mpz_clear(r); + return res; +} + +void Fr_idiv(PFrElement r, PFrElement a, PFrElement b) { + mpz_t ma; + mpz_t mb; + mpz_t mr; + mpz_init(ma); + mpz_init(mb); + mpz_init(mr); + + Fr_toMpz(ma, a); + // char *s1 = mpz_get_str (0, 10, ma); + // printf("s1 %s\n", s1); + Fr_toMpz(mb, b); + // char *s2 = mpz_get_str (0, 10, mb); + // printf("s2 %s\n", s2); + mpz_fdiv_q(mr, ma, mb); + // char *sr = mpz_get_str (0, 10, mr); + // printf("r %s\n", sr); + Fr_fromMpz(r, mr); + + mpz_clear(ma); + mpz_clear(mb); + mpz_clear(mr); +} + +void Fr_mod(PFrElement r, PFrElement a, PFrElement b) { + mpz_t ma; + mpz_t mb; + mpz_t mr; + mpz_init(ma); + mpz_init(mb); + mpz_init(mr); + + Fr_toMpz(ma, a); + Fr_toMpz(mb, b); + mpz_fdiv_r(mr, ma, mb); + Fr_fromMpz(r, mr); + + mpz_clear(ma); + mpz_clear(mb); + mpz_clear(mr); +} + +void Fr_pow(PFrElement r, PFrElement a, PFrElement b) { + mpz_t ma; + mpz_t mb; + mpz_t mr; + mpz_init(ma); + mpz_init(mb); + mpz_init(mr); + + Fr_toMpz(ma, a); + Fr_toMpz(mb, b); + mpz_powm(mr, ma, mb, q); + Fr_fromMpz(r, mr); + + mpz_clear(ma); + mpz_clear(mb); + mpz_clear(mr); +} + +void Fr_inv(PFrElement r, PFrElement a) { + mpz_t ma; + mpz_t mr; + mpz_init(ma); + mpz_init(mr); + + Fr_toMpz(ma, a); + mpz_invert(mr, ma, q); + Fr_fromMpz(r, mr); + mpz_clear(ma); + mpz_clear(mr); +} + +void Fr_div(PFrElement r, PFrElement a, PFrElement b) { + FrElement tmp; + Fr_inv(&tmp, b); + Fr_mul(r, a, &tmp); +} + +void Fr_fail() { + assert(false); +} + + +RawFr::RawFr() { + Fr_init(); + set(fZero, 0); + set(fOne, 1); + neg(fNegOne, fOne); +} + +RawFr::~RawFr() { +} + +void RawFr::fromString(Element &r, const std::string &s, uint32_t radix) { + mpz_t mr; + mpz_init_set_str(mr, s.c_str(), radix); + mpz_fdiv_r(mr, mr, q); + for (int i=0; i>3] & (1 << (p & 0x7))) +void RawFr::exp(Element &r, const Element &base, uint8_t* scalar, unsigned int scalarSize) { + bool oneFound = false; + Element copyBase; + copy(copyBase, base); + for (int i=scalarSize*8-1; i>=0; i--) { + if (!oneFound) { + if ( !BIT_IS_SET(scalar, i) ) continue; + copy(r, copyBase); + oneFound = true; + continue; + } + square(r, r); + if ( BIT_IS_SET(scalar, i) ) { + mul(r, r, copyBase); + } + } + if (!oneFound) { + copy(r, fOne); + } +} + +void RawFr::toMpz(mpz_t r, const Element &a) { + Element tmp; + Fr_rawFromMontgomery(tmp.v, a.v); + mpz_import(r, Fr_N64, -1, 8, -1, 0, (const void *)tmp.v); +} + +void RawFr::fromMpz(Element &r, const mpz_t a) { + for (int i=0; i #include +#ifdef __APPLE__ +#include // typedef unsigned int uint; +#endif // __APPLE__ + #define Fr_N64 4 #define Fr_SHORT 0x00000000 #define Fr_LONG 0x80000000 @@ -68,7 +72,7 @@ extern "C" void Fr_fail(); // Pending functions to convert -void Fr_str2element(PFrElement pE, char const*s, int base); +void Fr_str2element(PFrElement pE, char const*s, uint base); char *Fr_element2str(PFrElement pE); void Fr_idiv(PFrElement r, PFrElement a, PFrElement b); void Fr_mod(PFrElement r, PFrElement a, PFrElement b); @@ -97,9 +101,11 @@ class RawFr { RawFr(); ~RawFr(); - Element &zero() { return fZero; }; - Element &one() { return fOne; }; - Element &negOne() { return fNegOne; }; + const Element &zero() { return fZero; }; + const Element &one() { return fOne; }; + const Element &negOne() { return fNegOne; }; + Element set(int value); + void set(Element &r, int value); void fromString(Element &r, const std::string &n, uint32_t radix = 10); std::string toString(const Element &a, uint32_t radix = 10); @@ -109,6 +115,22 @@ class RawFr { void inline add(Element &r, const Element &a, const Element &b) { Fr_rawAdd(r.v, a.v, b.v); }; void inline sub(Element &r, const Element &a, const Element &b) { Fr_rawSub(r.v, a.v, b.v); }; void inline mul(Element &r, const Element &a, const Element &b) { Fr_rawMMul(r.v, a.v, b.v); }; + + Element inline add(const Element &a, const Element &b) { Element r; Fr_rawAdd(r.v, a.v, b.v); return r;}; + Element inline sub(const Element &a, const Element &b) { Element r; Fr_rawSub(r.v, a.v, b.v); return r;}; + Element inline mul(const Element &a, const Element &b) { Element r; Fr_rawMMul(r.v, a.v, b.v); return r;}; + + Element inline neg(const Element &a) { Element r; Fr_rawNeg(r.v, a.v); return r; }; + Element inline square(const Element &a) { Element r; Fr_rawMSquare(r.v, a.v); return r; }; + + Element inline add(int a, const Element &b) { return add(set(a), b);}; + Element inline sub(int a, const Element &b) { return sub(set(a), b);}; + Element inline mul(int a, const Element &b) { return mul(set(a), b);}; + + Element inline add(const Element &a, int b) { return add(a, set(b));}; + Element inline sub(const Element &a, int b) { return sub(a, set(b));}; + Element inline mul(const Element &a, int b) { return mul(a, set(b));}; + void inline mul1(Element &r, const Element &a, uint64_t b) { Fr_rawMMul1(r.v, a.v, b); }; void inline neg(Element &r, const Element &a) { Fr_rawNeg(r.v, a.v); }; void inline square(Element &r, const Element &a) { Fr_rawMSquare(r.v, a.v); }; @@ -124,6 +146,11 @@ class RawFr { void toMpz(mpz_t r, const Element &a); void fromMpz(Element &a, const mpz_t r); + int toRprBE(const Element &element, uint8_t *data, int bytes); + int fromRprBE(Element &element, const uint8_t *data, int bytes); + + int bytes ( void ) { return Fr_N64 * 8; }; + void fromUI(Element &r, unsigned long int v); static RawFr field; diff --git a/examples/toy/pasta/toy_cpp/fr.o b/examples/toy/pasta/toy_cpp/fr.o new file mode 100644 index 0000000..d6fc248 Binary files /dev/null and b/examples/toy/pasta/toy_cpp/fr.o differ diff --git a/examples/toy/toy_cpp/fr_asm.o b/examples/toy/pasta/toy_cpp/fr_asm.o similarity index 56% rename from examples/toy/toy_cpp/fr_asm.o rename to examples/toy/pasta/toy_cpp/fr_asm.o index 6c96ae4..b68efe9 100644 Binary files a/examples/toy/toy_cpp/fr_asm.o and b/examples/toy/pasta/toy_cpp/fr_asm.o differ diff --git a/examples/toy/pasta/toy_cpp/main.cpp b/examples/toy/pasta/toy_cpp/main.cpp new file mode 100644 index 0000000..18d846c --- /dev/null +++ b/examples/toy/pasta/toy_cpp/main.cpp @@ -0,0 +1,288 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using json = nlohmann::json; + +#include "calcwit.hpp" +#include "circom.hpp" + + +#define handle_error(msg) \ + do { perror(msg); exit(EXIT_FAILURE); } while (0) + +Circom_Circuit* loadCircuit(std::string const &datFileName) { + Circom_Circuit *circuit = new Circom_Circuit; + + int fd; + struct stat sb; + + fd = open(datFileName.c_str(), O_RDONLY); + if (fd == -1) { + std::cout << ".dat file not found: " << datFileName << "\n"; + throw std::system_error(errno, std::generic_category(), "open"); + } + + if (fstat(fd, &sb) == -1) { /* To obtain file size */ + throw std::system_error(errno, std::generic_category(), "fstat"); + } + + u8* bdata = (u8*)mmap(NULL, sb.st_size, PROT_READ , MAP_PRIVATE, fd, 0); + close(fd); + + circuit->InputHashMap = new HashSignalInfo[get_size_of_input_hashmap()]; + uint dsize = get_size_of_input_hashmap()*sizeof(HashSignalInfo); + memcpy((void *)(circuit->InputHashMap), (void *)bdata, dsize); + + circuit->witness2SignalList = new u64[get_size_of_witness()]; + uint inisize = dsize; + dsize = get_size_of_witness()*sizeof(u64); + memcpy((void *)(circuit->witness2SignalList), (void *)(bdata+inisize), dsize); + + circuit->circuitConstants = new FrElement[get_size_of_constants()]; + if (get_size_of_constants()>0) { + inisize += dsize; + dsize = get_size_of_constants()*sizeof(FrElement); + memcpy((void *)(circuit->circuitConstants), (void *)(bdata+inisize), dsize); + } + + std::map templateInsId2IOSignalInfo1; + if (get_size_of_io_map()>0) { + u32 index[get_size_of_io_map()]; + inisize += dsize; + dsize = get_size_of_io_map()*sizeof(u32); + memcpy((void *)index, (void *)(bdata+inisize), dsize); + inisize += dsize; + assert(inisize % sizeof(u32) == 0); + assert(sb.st_size % sizeof(u32) == 0); + u32 dataiomap[(sb.st_size-inisize)/sizeof(u32)]; + memcpy((void *)dataiomap, (void *)(bdata+inisize), sb.st_size-inisize); + u32* pu32 = dataiomap; + + for (int i = 0; i < get_size_of_io_map(); i++) { + u32 n = *pu32; + IODefPair p; + p.len = n; + IODef defs[n]; + pu32 += 1; + for (u32 j = 0; j templateInsId2IOSignalInfo = move(templateInsId2IOSignalInfo1); + + munmap(bdata, sb.st_size); + + return circuit; +} + +bool check_valid_number(std::string & s, uint base){ + bool is_valid = true; + if (base == 16){ + for (uint i = 0; i < s.size(); i++){ + is_valid &= ( + ('0' <= s[i] && s[i] <= '9') || + ('a' <= s[i] && s[i] <= 'f') || + ('A' <= s[i] && s[i] <= 'F') + ); + } + } else{ + for (uint i = 0; i < s.size(); i++){ + is_valid &= ('0' <= s[i] && s[i] < char(int('0') + base)); + } + } + return is_valid; +} + +void json2FrElements (json val, std::vector & vval){ + if (!val.is_array()) { + FrElement v; + std::string s_aux, s; + uint base; + if (val.is_string()) { + s_aux = val.get(); + std::string possible_prefix = s_aux.substr(0, 2); + if (possible_prefix == "0b" || possible_prefix == "0B"){ + s = s_aux.substr(2, s_aux.size() - 2); + base = 2; + } else if (possible_prefix == "0o" || possible_prefix == "0O"){ + s = s_aux.substr(2, s_aux.size() - 2); + base = 8; + } else if (possible_prefix == "0x" || possible_prefix == "0X"){ + s = s_aux.substr(2, s_aux.size() - 2); + base = 16; + } else{ + s = s_aux; + base = 10; + } + if (!check_valid_number(s, base)){ + std::ostringstream errStrStream; + errStrStream << "Invalid number in JSON input: " << s_aux << "\n"; + throw std::runtime_error(errStrStream.str() ); + } + } else if (val.is_number()) { + double vd = val.get(); + std::stringstream stream; + stream << std::fixed << std::setprecision(0) << vd; + s = stream.str(); + base = 10; + } else { + std::ostringstream errStrStream; + errStrStream << "Invalid JSON type\n"; + throw std::runtime_error(errStrStream.str() ); + } + Fr_str2element (&v, s.c_str(), base); + vval.push_back(v); + } else { + for (uint i = 0; i < val.size(); i++) { + json2FrElements (val[i], vval); + } + } +} + + +void loadJson(Circom_CalcWit *ctx, std::string filename) { + std::ifstream inStream(filename); + json j; + inStream >> j; + + u64 nItems = j.size(); + // printf("Items : %llu\n",nItems); + if (nItems == 0){ + ctx->tryRunCircuit(); + } + for (json::iterator it = j.begin(); it != j.end(); ++it) { + // std::cout << it.key() << " => " << it.value() << '\n'; + u64 h = fnv1a(it.key()); + std::vector v; + json2FrElements(it.value(),v); + uint signalSize = ctx->getInputSignalSize(h); + if (v.size() < signalSize) { + std::ostringstream errStrStream; + errStrStream << "Error loading signal " << it.key() << ": Not enough values\n"; + throw std::runtime_error(errStrStream.str() ); + } + if (v.size() > signalSize) { + std::ostringstream errStrStream; + errStrStream << "Error loading signal " << it.key() << ": Too many values\n"; + throw std::runtime_error(errStrStream.str() ); + } + for (uint i = 0; i " << Fr_element2str(&(v[i])) << '\n'; + ctx->setInputSignal(h,i,v[i]); + } catch (std::runtime_error e) { + std::ostringstream errStrStream; + errStrStream << "Error setting signal: " << it.key() << "\n" << e.what(); + throw std::runtime_error(errStrStream.str() ); + } + } + } +} + +void writeBinWitness(Circom_CalcWit *ctx, std::string wtnsFileName) { + FILE *write_ptr; + + write_ptr = fopen(wtnsFileName.c_str(),"wb"); + + fwrite("wtns", 4, 1, write_ptr); + + u32 version = 2; + fwrite(&version, 4, 1, write_ptr); + + u32 nSections = 2; + fwrite(&nSections, 4, 1, write_ptr); + + // Header + u32 idSection1 = 1; + fwrite(&idSection1, 4, 1, write_ptr); + + u32 n8 = Fr_N64*8; + + u64 idSection1length = 8 + n8; + fwrite(&idSection1length, 8, 1, write_ptr); + + fwrite(&n8, 4, 1, write_ptr); + + fwrite(Fr_q.longVal, Fr_N64*8, 1, write_ptr); + + uint Nwtns = get_size_of_witness(); + + u32 nVars = (u32)Nwtns; + fwrite(&nVars, 4, 1, write_ptr); + + // Data + u32 idSection2 = 2; + fwrite(&idSection2, 4, 1, write_ptr); + + u64 idSection2length = (u64)n8*(u64)Nwtns; + fwrite(&idSection2length, 8, 1, write_ptr); + + FrElement v; + + for (int i=0;igetWitness(i, &v); + Fr_toLongNormal(&v, &v); + fwrite(v.longVal, Fr_N64*8, 1, write_ptr); + } + fclose(write_ptr); +} + +int main (int argc, char *argv[]) { + std::string cl(argv[0]); + if (argc!=3) { + std::cout << "Usage: " << cl << " \n"; + } else { + std::string datfile = cl + ".dat"; + std::string jsonfile(argv[1]); + std::string wtnsfile(argv[2]); + + // auto t_start = std::chrono::high_resolution_clock::now(); + + Circom_Circuit *circuit = loadCircuit(datfile); + + Circom_CalcWit *ctx = new Circom_CalcWit(circuit); + + loadJson(ctx, jsonfile); + if (ctx->getRemaingInputsToBeSet()!=0) { + std::cerr << "Not all inputs have been set. Only " << get_main_input_signal_no()-ctx->getRemaingInputsToBeSet() << " out of " << get_main_input_signal_no() << std::endl; + assert(false); + } + /* + for (uint i = 0; igetWitness(i, &x); + std::cout << i << ": " << Fr_element2str(&x) << std::endl; + } + */ + + //auto t_mid = std::chrono::high_resolution_clock::now(); + //std::cout << std::chrono::duration(t_mid-t_start).count()<(t_end-t_mid).count()< +#include +#include +#include "circom.hpp" +#include "calcwit.hpp" +void Example_0_create(uint soffset,uint coffset,Circom_CalcWit* ctx,std::string componentName,uint componentFather); +void Example_0_run(uint ctx_index,Circom_CalcWit* ctx); +Circom_TemplateFunction _functionTable[1] = { +Example_0_run }; +Circom_TemplateFunction _functionTableParallel[1] = { +NULL }; +uint get_main_input_signal_start() {return 3;} + +uint get_main_input_signal_no() {return 3;} + +uint get_total_signal_no() {return 6;} + +uint get_number_of_components() {return 1;} + +uint get_size_of_input_hashmap() {return 256;} + +uint get_size_of_witness() {return 5;} + +uint get_size_of_constants() {return 2;} + +uint get_size_of_io_map() {return 0;} + +void release_memory_component(Circom_CalcWit* ctx, uint pos) {{ + +if (pos != 0){{ + +if(ctx->componentMemory[pos].subcomponents) +delete []ctx->componentMemory[pos].subcomponents; + +if(ctx->componentMemory[pos].subcomponentsParallel) +delete []ctx->componentMemory[pos].subcomponentsParallel; + +if(ctx->componentMemory[pos].outputIsSet) +delete []ctx->componentMemory[pos].outputIsSet; + +if(ctx->componentMemory[pos].mutexes) +delete []ctx->componentMemory[pos].mutexes; + +if(ctx->componentMemory[pos].cvs) +delete []ctx->componentMemory[pos].cvs; + +if(ctx->componentMemory[pos].sbct) +delete []ctx->componentMemory[pos].sbct; + +}} + + +}} + + +// function declarations +// template declarations +void Example_0_create(uint soffset,uint coffset,Circom_CalcWit* ctx,std::string componentName,uint componentFather){ +ctx->componentMemory[coffset].templateId = 0; +ctx->componentMemory[coffset].templateName = "Example"; +ctx->componentMemory[coffset].signalStart = soffset; +ctx->componentMemory[coffset].inputCounter = 3; +ctx->componentMemory[coffset].componentName = componentName; +ctx->componentMemory[coffset].idFather = componentFather; +ctx->componentMemory[coffset].subcomponents = new uint[0]; +} + +void Example_0_run(uint ctx_index,Circom_CalcWit* ctx){ +FrElement* signalValues = ctx->signalValues; +u64 mySignalStart = ctx->componentMemory[ctx_index].signalStart; +std::string myTemplateName = ctx->componentMemory[ctx_index].templateName; +std::string myComponentName = ctx->componentMemory[ctx_index].componentName; +u64 myFather = ctx->componentMemory[ctx_index].idFather; +u64 myId = ctx_index; +u32* mySubcomponents = ctx->componentMemory[ctx_index].subcomponents; +bool* mySubcomponentsParallel = ctx->componentMemory[ctx_index].subcomponentsParallel; +FrElement* circuitConstants = ctx->circuitConstants; +std::string* listOfTemplateMessages = ctx->listOfTemplateMessages; +FrElement expaux[3]; +FrElement lvar[0]; +uint sub_component_aux; +uint index_multiple_eq; +{ +PFrElement aux_dest = &signalValues[mySignalStart + 0]; +// load src +Fr_add(&expaux[0],&signalValues[mySignalStart + 2],&signalValues[mySignalStart + 4]); // line circom 12 +// end load src +Fr_copy(aux_dest,&expaux[0]); +} +{ +PFrElement aux_dest = &signalValues[mySignalStart + 1]; +// load src +Fr_add(&expaux[0],&signalValues[mySignalStart + 2],&signalValues[mySignalStart + 3]); // line circom 13 +// end load src +Fr_copy(aux_dest,&expaux[0]); +} +for (uint i = 0; i < 0; i++){ +uint index_subc = ctx->componentMemory[ctx_index].subcomponents[i]; +if (index_subc != 0)release_memory_component(ctx,index_subc); +} +} + +void run(Circom_CalcWit* ctx){ +Example_0_create(1,0,ctx,"main",0); +Example_0_run(0,ctx); +} + diff --git a/examples/toy/toy_cpp/toy.dat b/examples/toy/pasta/toy_cpp/toy.dat similarity index 100% rename from examples/toy/toy_cpp/toy.dat rename to examples/toy/pasta/toy_cpp/toy.dat diff --git a/examples/toy/pasta/toy_cpp/toy.o b/examples/toy/pasta/toy_cpp/toy.o new file mode 100644 index 0000000..12a727f Binary files /dev/null and b/examples/toy/pasta/toy_cpp/toy.o differ diff --git a/examples/toy/pasta/toy_js/generate_witness.js b/examples/toy/pasta/toy_js/generate_witness.js new file mode 100644 index 0000000..eabb86e --- /dev/null +++ b/examples/toy/pasta/toy_js/generate_witness.js @@ -0,0 +1,20 @@ +const wc = require("./witness_calculator.js"); +const { readFileSync, writeFile } = require("fs"); + +if (process.argv.length != 5) { + console.log("Usage: node generate_witness.js "); +} else { + const input = JSON.parse(readFileSync(process.argv[3], "utf8")); + + const buffer = readFileSync(process.argv[2]); + wc(buffer).then(async witnessCalculator => { + // const w= await witnessCalculator.calculateWitness(input,0); + // for (let i=0; i< w.length; i++){ + // console.log(w[i]); + // } + const buff= await witnessCalculator.calculateWTNSBin(input,0); + writeFile(process.argv[4], buff, function(err) { + if (err) throw err; + }); + }); +} diff --git a/examples/toy/toy_js/toy.wasm b/examples/toy/pasta/toy_js/toy.wasm similarity index 98% rename from examples/toy/toy_js/toy.wasm rename to examples/toy/pasta/toy_js/toy.wasm index 9e69b53..95405bf 100644 Binary files a/examples/toy/toy_js/toy.wasm and b/examples/toy/pasta/toy_js/toy.wasm differ diff --git a/examples/toy/pasta/toy_js/witness_calculator.js b/examples/toy/pasta/toy_js/witness_calculator.js new file mode 100644 index 0000000..20e6e20 --- /dev/null +++ b/examples/toy/pasta/toy_js/witness_calculator.js @@ -0,0 +1,337 @@ +module.exports = async function builder(code, options) { + + options = options || {}; + + let wasmModule; + try { + wasmModule = await WebAssembly.compile(code); + } catch (err) { + console.log(err); + console.log("\nTry to run circom --c in order to generate c++ code instead\n"); + throw new Error(err); + } + + let wc; + + let errStr = ""; + let msgStr = ""; + + const instance = await WebAssembly.instantiate(wasmModule, { + runtime: { + exceptionHandler : function(code) { + let err; + if (code == 1) { + err = "Signal not found.\n"; + } else if (code == 2) { + err = "Too many signals set.\n"; + } else if (code == 3) { + err = "Signal already set.\n"; + } else if (code == 4) { + err = "Assert Failed.\n"; + } else if (code == 5) { + err = "Not enough memory.\n"; + } else if (code == 6) { + err = "Input signal array access exceeds the size.\n"; + } else { + err = "Unknown error.\n"; + } + throw new Error(err + errStr); + }, + printErrorMessage : function() { + errStr += getMessage() + "\n"; + // console.error(getMessage()); + }, + writeBufferMessage : function() { + const msg = getMessage(); + // Any calls to `log()` will always end with a `\n`, so that's when we print and reset + if (msg === "\n") { + console.log(msgStr); + msgStr = ""; + } else { + // If we've buffered other content, put a space in between the items + if (msgStr !== "") { + msgStr += " " + } + // Then append the message to the message we are creating + msgStr += msg; + } + }, + showSharedRWMemory : function() { + printSharedRWMemory (); + } + + } + }); + + const sanityCheck = + options +// options && +// ( +// options.sanityCheck || +// options.logGetSignal || +// options.logSetSignal || +// options.logStartComponent || +// options.logFinishComponent +// ); + + + wc = new WitnessCalculator(instance, sanityCheck); + return wc; + + function getMessage() { + var message = ""; + var c = instance.exports.getMessageChar(); + while ( c != 0 ) { + message += String.fromCharCode(c); + c = instance.exports.getMessageChar(); + } + return message; + } + + function printSharedRWMemory () { + const shared_rw_memory_size = instance.exports.getFieldNumLen32(); + const arr = new Uint32Array(shared_rw_memory_size); + for (let j=0; j { + const h = fnvHash(k); + const hMSB = parseInt(h.slice(0,8), 16); + const hLSB = parseInt(h.slice(8,16), 16); + const fArr = flatArray(input[k]); + let signalSize = this.instance.exports.getInputSignalSize(hMSB, hLSB); + if (signalSize < 0){ + throw new Error(`Signal ${k} not found\n`); + } + if (fArr.length < signalSize) { + throw new Error(`Not enough values for input signal ${k}\n`); + } + if (fArr.length > signalSize) { + throw new Error(`Too many values for input signal ${k}\n`); + } + for (let i=0; i0) { + res.unshift(0); + i--; + } + } + return res; +} + +function fromArray32(arr) { //returns a BigInt + var res = BigInt(0); + const radix = BigInt(0x100000000); + for (let i = 0; i() + ); let iteration_count = 5; let root = current_dir().unwrap(); - let circuit_file = root.join("examples/toy/toy.r1cs"); - let r1cs = load_r1cs(&FileLocation::PathBuf(circuit_file)); - let witness_generator_wasm = root.join("examples/toy/toy_js/toy.wasm"); + let circuit_file = root.join(circuit_filepath); + let r1cs = load_r1cs::(&FileLocation::PathBuf(circuit_file)); + let witness_generator_file = root.join(witness_gen_filepath); let mut private_inputs = Vec::new(); for i in 0..iteration_count { @@ -22,9 +34,9 @@ fn main() { private_inputs.push(private_input); } - let start_public_input = vec![F1::from(10), F1::from(10)]; + let start_public_input = [F::::from(10), F::::from(10)]; - let pp = create_public_params(r1cs.clone()); + let pp: PublicParams = create_public_params(r1cs.clone()); println!( "Number of constraints per step (primary circuit): {}", @@ -47,27 +59,22 @@ fn main() { println!("Creating a RecursiveSNARK..."); let start = Instant::now(); let recursive_snark = create_recursive_circuit( - FileLocation::PathBuf(witness_generator_wasm), + FileLocation::PathBuf(witness_generator_file), r1cs, private_inputs, - start_public_input.clone(), + start_public_input.to_vec(), &pp, ) .unwrap(); println!("RecursiveSNARK creation took {:?}", start.elapsed()); // TODO: empty? - let z0_secondary = vec![::Scalar::zero()]; + let z0_secondary = [F::::from(0)]; // verify the recursive SNARK println!("Verifying a RecursiveSNARK..."); let start = Instant::now(); - let res = recursive_snark.verify( - &pp, - iteration_count, - start_public_input.clone(), - z0_secondary.clone(), - ); + let res = recursive_snark.verify(&pp, iteration_count, &start_public_input, &z0_secondary); println!( "RecursiveSNARK::verify: {:?}, took {:?}", res, @@ -78,8 +85,8 @@ fn main() { // produce a compressed SNARK println!("Generating a CompressedSNARK using Spartan with IPA-PC..."); let start = Instant::now(); - let (pk, vk) = CompressedSNARK::<_, _, _, _, S1, S2>::setup(&pp).unwrap(); - let res = CompressedSNARK::<_, _, _, _, S1, S2>::prove(&pp, &pk, &recursive_snark); + let (pk, vk) = CompressedSNARK::<_, _, _, _, S, S>::setup(&pp).unwrap(); + let res = CompressedSNARK::<_, _, _, _, S, S>::prove(&pp, &pk, &recursive_snark); println!( "CompressedSNARK::prove: {:?}, took {:?}", res.is_ok(), @@ -94,8 +101,8 @@ fn main() { let res = compressed_snark.verify( &vk, iteration_count, - start_public_input.clone(), - z0_secondary, + start_public_input.to_vec(), + z0_secondary.to_vec(), ); println!( "CompressedSNARK::verify: {:?}, took {:?}", @@ -104,3 +111,15 @@ fn main() { ); assert!(res.is_ok()); } + +fn main() { + let group_name = "bn254"; + + let circuit_filepath = format!("examples/toy/{}/toy.r1cs", group_name); + for witness_gen_filepath in [ + format!("examples/toy/{}/toy_cpp/toy", group_name), + format!("examples/toy/{}/toy_js/toy.wasm", group_name), + ] { + run_test(circuit_filepath.clone(), witness_gen_filepath); + } +} diff --git a/examples/toy.rs b/examples/toy_pasta.rs similarity index 58% rename from examples/toy.rs rename to examples/toy_pasta.rs index d2236ce..f6e842d 100644 --- a/examples/toy.rs +++ b/examples/toy_pasta.rs @@ -1,19 +1,30 @@ use std::{collections::HashMap, env::current_dir, time::Instant}; use nova_scotia::{ - circom::reader::load_r1cs, create_public_params, create_recursive_circuit, FileLocation, F1, - G2, S1, S2, + circom::reader::load_r1cs, create_public_params, create_recursive_circuit, FileLocation, F, S, +}; +use nova_snark::{ + provider, + traits::{circuit::StepCircuit, Group}, + CompressedSNARK, PublicParams, }; -use nova_snark::{traits::Group, CompressedSNARK}; use serde_json::json; -fn main() { +fn run_test(circuit_filepath: String, witness_gen_filepath: String) { + type G1 = pasta_curves::pallas::Point; + type G2 = pasta_curves::vesta::Point; + + println!( + "Running test with witness generator: {} and group: {}", + witness_gen_filepath, + std::any::type_name::() + ); let iteration_count = 5; let root = current_dir().unwrap(); - let circuit_file = root.join("examples/toy/toy.r1cs"); - let r1cs = load_r1cs(&FileLocation::PathBuf(circuit_file)); - let witness_generator_file = root.join("examples/toy/toy_cpp/toy"); + let circuit_file = root.join(circuit_filepath); + let r1cs = load_r1cs::(&FileLocation::PathBuf(circuit_file)); + let witness_generator_file = root.join(witness_gen_filepath); let mut private_inputs = Vec::new(); for i in 0..iteration_count { @@ -22,9 +33,9 @@ fn main() { private_inputs.push(private_input); } - let start_public_input = vec![F1::from(10), F1::from(10)]; + let start_public_input = [F::::from(10), F::::from(10)]; - let pp = create_public_params(r1cs.clone()); + let pp: PublicParams = create_public_params(r1cs.clone()); println!( "Number of constraints per step (primary circuit): {}", @@ -50,24 +61,19 @@ fn main() { FileLocation::PathBuf(witness_generator_file), r1cs, private_inputs, - start_public_input.clone(), + start_public_input.to_vec(), &pp, ) .unwrap(); println!("RecursiveSNARK creation took {:?}", start.elapsed()); // TODO: empty? - let z0_secondary = vec![::Scalar::zero()]; + let z0_secondary = [F::::from(0)]; // verify the recursive SNARK println!("Verifying a RecursiveSNARK..."); let start = Instant::now(); - let res = recursive_snark.verify( - &pp, - iteration_count, - start_public_input.clone(), - z0_secondary.clone(), - ); + let res = recursive_snark.verify(&pp, iteration_count, &start_public_input, &z0_secondary); println!( "RecursiveSNARK::verify: {:?}, took {:?}", res, @@ -78,8 +84,9 @@ fn main() { // produce a compressed SNARK println!("Generating a CompressedSNARK using Spartan with IPA-PC..."); let start = Instant::now(); - let (pk, vk) = CompressedSNARK::<_, _, _, _, S1, S2>::setup(&pp).unwrap(); - let res = CompressedSNARK::<_, _, _, _, S1, S2>::prove(&pp, &pk, &recursive_snark); + + let (pk, vk) = CompressedSNARK::<_, _, _, _, S, S>::setup(&pp).unwrap(); + let res = CompressedSNARK::<_, _, _, _, S, S>::prove(&pp, &pk, &recursive_snark); println!( "CompressedSNARK::prove: {:?}, took {:?}", res.is_ok(), @@ -94,8 +101,8 @@ fn main() { let res = compressed_snark.verify( &vk, iteration_count, - start_public_input.clone(), - z0_secondary, + start_public_input.to_vec(), + z0_secondary.to_vec(), ); println!( "CompressedSNARK::verify: {:?}, took {:?}", @@ -104,3 +111,15 @@ fn main() { ); assert!(res.is_ok()); } + +fn main() { + let group_name = "pasta"; + + let circuit_filepath = format!("examples/toy/{}/toy.r1cs", group_name); + for witness_gen_filepath in [ + format!("examples/toy/{}/toy_cpp/toy", group_name), + format!("examples/toy/{}/toy_js/toy.wasm", group_name), + ] { + run_test(circuit_filepath.clone(), witness_gen_filepath); + } +} diff --git a/src/circom/file.rs b/src/circom/file.rs index 8fad95f..564f3b7 100644 --- a/src/circom/file.rs +++ b/src/circom/file.rs @@ -4,15 +4,13 @@ use crate::circom::circuit::Constraint; use byteorder::{LittleEndian, ReadBytesExt}; use ff::PrimeField; -use pasta_curves::group::Group; +use hex_literal::hex; +use nova_snark::traits::Group; use std::{ collections::HashMap, io::{Error, ErrorKind, Read, Result, Seek, SeekFrom}, }; -type G1 = pasta_curves::pallas::Point; -type G2 = pasta_curves::vesta::Point; - // R1CSFile's header #[derive(Debug, Default)] pub struct Header { @@ -120,7 +118,11 @@ fn read_map(mut reader: R, size: u64, header: &Header) -> Result(mut reader: R) -> Result::Scalar>> { +pub fn from_reader(mut reader: R) -> Result::Scalar>> +where + G1: Group::Scalar>, + G2: Group::Scalar>, +{ let mut magic = [0u8; 4]; reader.read_exact(&mut magic)?; if magic != [0x72, 0x31, 0x63, 0x73] { @@ -161,8 +163,14 @@ pub fn from_reader(mut reader: R) -> Result(reader).unwrap(); assert_eq!(file.version, 1); assert_eq!(file.header.field_size, 32); diff --git a/src/circom/reader.rs b/src/circom/reader.rs index 2a445f4..171e343 100644 --- a/src/circom/reader.rs +++ b/src/circom/reader.rs @@ -14,9 +14,7 @@ use crate::circom::circuit::{CircuitJson, R1CS}; use crate::circom::file::{from_reader, read_field}; use crate::FileLocation; use ff::PrimeField; -use pasta_curves::group::Group; - -type G1 = pasta_curves::pallas::Point; +use nova_snark::traits::Group; pub fn generate_witness_from_bin( witness_bin: &Path, @@ -172,7 +170,11 @@ pub(crate) fn load_witness_from_bin_reader( #[cfg(not(target_family = "wasm"))] /// load r1cs file by filename with autodetect encoding (bin or json) -pub fn load_r1cs(filename: &FileLocation) -> R1CS<::Scalar> { +pub fn load_r1cs(filename: &FileLocation) -> R1CS<::Scalar> +where + G1: Group::Scalar>, + G2: Group::Scalar>, +{ let filename = match filename { FileLocation::PathBuf(filename) => filename, FileLocation::URL(_) => panic!("unreachable"), @@ -180,7 +182,7 @@ pub fn load_r1cs(filename: &FileLocation) -> R1CS<::Scalar> { if filename.ends_with("json") { load_r1cs_from_json_file(filename) } else { - load_r1cs_from_bin_file(filename) + load_r1cs_from_bin_file::(filename) } } @@ -230,17 +232,25 @@ fn load_r1cs_from_json(reader: R) -> R1CS { } /// load r1cs from bin file by filename -fn load_r1cs_from_bin_file(filename: &Path) -> R1CS<::Scalar> { +fn load_r1cs_from_bin_file(filename: &Path) -> R1CS<::Scalar> +where + G1: Group::Scalar>, + G2: Group::Scalar>, +{ let reader = OpenOptions::new() .read(true) .open(filename) .expect("unable to open."); - load_r1cs_from_bin(BufReader::new(reader)) + load_r1cs_from_bin::<_, G1, G2>(BufReader::new(reader)) } /// load r1cs from bin by a reader -pub(crate) fn load_r1cs_from_bin(reader: R) -> R1CS<::Scalar> { - let file = from_reader(reader).expect("unable to read."); +pub(crate) fn load_r1cs_from_bin(reader: R) -> R1CS<::Scalar> +where + G1: Group::Scalar>, + G2: Group::Scalar>, +{ + let file = from_reader::<_, G1, G2>(reader).expect("unable to read."); let num_inputs = (1 + file.header.n_pub_in + file.header.n_pub_out) as usize; let num_variables = file.header.n_wires as usize; let num_aux = num_variables - num_inputs; diff --git a/src/circom/wasm.rs b/src/circom/wasm.rs index c09f41e..3e98207 100644 --- a/src/circom/wasm.rs +++ b/src/circom/wasm.rs @@ -1,4 +1,4 @@ -use crate::{FileLocation, G1, R1CS}; +use crate::{FileLocation, R1CS}; use crate::circom::reader::{load_r1cs_from_bin, load_witness_from_bin_reader}; use ff::PrimeField; @@ -43,14 +43,18 @@ pub async fn generate_witness_browser(input_json_string: &str, wasm_file: &str) #[cfg(target_family = "wasm")] /// load r1cs file by filename with autodetect encoding (bin or json) -pub async fn load_r1cs(filename: &FileLocation) -> R1CS<::Scalar> { +pub async fn load_r1cs(filename: &FileLocation) -> R1CS<::Scalar> +where + G1: Group::Scalar>, + G2: Group::Scalar>, +{ let filename = match filename { FileLocation::PathBuf(_) => panic!("unreachable"), FileLocation::URL(path) => path, }; let r1cs_ser = read_file(filename).await.to_vec(); let r1cs_cursor = Cursor::new(r1cs_ser); - load_r1cs_from_bin(r1cs_cursor) + load_r1cs_from_bin::<_, G1, G2>(r1cs_cursor) } #[cfg(target_family = "wasm")] diff --git a/src/lib.rs b/src/lib.rs index 5f514bd..2b30bca 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,7 +5,9 @@ use std::{ path::{Path, PathBuf}, }; +use crate::circom::reader::generate_witness_from_bin; use circom::circuit::{CircomCircuit, R1CS}; +use ff::Field; use nova_snark::{ traits::{circuit::TrivialTestCircuit, Group}, PublicParams, RecursiveSNARK, @@ -15,8 +17,6 @@ use num_traits::Num; use serde::{Deserialize, Serialize}; use serde_json::Value; -use crate::circom::reader::generate_witness_from_bin; - #[cfg(not(target_family = "wasm"))] use crate::circom::reader::generate_witness_from_wasm; @@ -25,24 +25,23 @@ use crate::circom::wasm::generate_witness_from_wasm; pub mod circom; -pub type G1 = pasta_curves::pallas::Point; -pub type F1 = ::Scalar; -pub type EE1 = nova_snark::provider::ipa_pc::EvaluationEngine; -pub type S1 = nova_snark::spartan::RelaxedR1CSSNARK; -pub type G2 = pasta_curves::vesta::Point; -pub type F2 = ::Scalar; -pub type EE2 = nova_snark::provider::ipa_pc::EvaluationEngine; -pub type S2 = nova_snark::spartan::RelaxedR1CSSNARK; - -pub type C1 = CircomCircuit; -pub type C2 = TrivialTestCircuit; +pub type F = ::Scalar; +pub type EE = nova_snark::provider::ipa_pc::EvaluationEngine; +pub type S = nova_snark::spartan::RelaxedR1CSSNARK>; +pub type C1 = CircomCircuit<::Scalar>; +pub type C2 = TrivialTestCircuit<::Scalar>; +#[derive(Clone)] pub enum FileLocation { PathBuf(PathBuf), URL(String), } -pub fn create_public_params(r1cs: R1CS) -> PublicParams { +pub fn create_public_params(r1cs: R1CS>) -> PublicParams, C2> +where + G1: Group::Scalar>, + G2: Group::Scalar>, +{ let circuit_primary = CircomCircuit { r1cs, witness: None, @@ -61,13 +60,110 @@ struct CircomInput { } #[cfg(not(target_family = "wasm"))] -pub fn create_recursive_circuit( +fn compute_witness( + current_public_input: Vec, + private_input: HashMap, + witness_generator_file: FileLocation, + witness_generator_output: &Path, +) -> Vec<::Scalar> +where + G1: Group::Scalar>, + G2: Group::Scalar>, +{ + let decimal_stringified_input: Vec = current_public_input + .iter() + .map(|x| BigInt::from_str_radix(x, 16).unwrap().to_str_radix(10)) + .collect(); + + let input = CircomInput { + step_in: decimal_stringified_input.clone(), + extra: private_input.clone(), + }; + + let is_wasm = match &witness_generator_file { + FileLocation::PathBuf(path) => path.extension().unwrap_or_default() == "wasm", + FileLocation::URL(_) => true, + }; + let input_json = serde_json::to_string(&input).unwrap(); + + if is_wasm { + generate_witness_from_wasm::>( + &witness_generator_file, + &input_json, + &witness_generator_output, + ) + } else { + let witness_generator_file = match &witness_generator_file { + FileLocation::PathBuf(path) => path, + FileLocation::URL(_) => panic!("unreachable"), + }; + generate_witness_from_bin::>( + &witness_generator_file, + &input_json, + &witness_generator_output, + ) + } +} + +#[cfg(target_family = "wasm")] +async fn compute_witness( + current_public_input: Vec, + private_input: HashMap, witness_generator_file: FileLocation, - r1cs: R1CS, + witness_generator_output: &Path, +) -> Vec<::Scalar> +where + G1: Group::Scalar>, + G2: Group::Scalar>, +{ + let decimal_stringified_input: Vec = current_public_input + .iter() + .map(|x| BigInt::from_str_radix(x, 16).unwrap().to_str_radix(10)) + .collect(); + + let input = CircomInput { + step_in: decimal_stringified_input.clone(), + extra: private_input.clone(), + }; + + let is_wasm = match &witness_generator_file { + FileLocation::PathBuf(path) => path.extension().unwrap_or_default() == "wasm", + FileLocation::URL(_) => true, + }; + let input_json = serde_json::to_string(&input).unwrap(); + + if is_wasm { + generate_witness_from_wasm::>( + &witness_generator_file, + &input_json, + &witness_generator_output, + ) + .await + } else { + let witness_generator_file = match &witness_generator_file { + FileLocation::PathBuf(path) => path, + FileLocation::URL(_) => panic!("unreachable"), + }; + generate_witness_from_bin::>( + &witness_generator_file, + &input_json, + &witness_generator_output, + ) + } +} + +#[cfg(not(target_family = "wasm"))] +pub fn create_recursive_circuit( + witness_generator_file: FileLocation, + r1cs: R1CS>, private_inputs: Vec>, - start_public_input: Vec, - pp: &PublicParams, -) -> Result, std::io::Error> { + start_public_input: Vec>, + pp: &PublicParams, C2>, +) -> Result, C2>, std::io::Error> +where + G1: Group::Scalar>, + G2: Group::Scalar>, +{ let root = current_dir().unwrap(); let witness_generator_output = root.join("circom_witness.wtns"); @@ -79,45 +175,36 @@ pub fn create_recursive_circuit( .collect::>(); let mut current_public_input = start_public_input_hex.clone(); - let circuit_secondary = TrivialTestCircuit::default(); - let z0_secondary = vec![::Scalar::zero()]; - let mut recursive_snark: Option> = None; - - for i in 0..iteration_count { - let decimal_stringified_input: Vec = current_public_input - .iter() - .map(|x| BigInt::from_str_radix(x, 16).unwrap().to_str_radix(10)) - .collect(); + let witness_0 = compute_witness::( + current_public_input.clone(), + private_inputs[0].clone(), + witness_generator_file.clone(), + &witness_generator_output, + ); - let input = CircomInput { - step_in: decimal_stringified_input.clone(), - extra: private_inputs[i].clone(), - }; + let circuit_0 = CircomCircuit { + r1cs: r1cs.clone(), + witness: Some(witness_0), + }; + let circuit_secondary = TrivialTestCircuit::default(); + let z0_secondary = vec![G2::Scalar::ZERO]; - let input_json = serde_json::to_string(&input).unwrap(); + let mut recursive_snark = RecursiveSNARK::, C2>::new( + &pp, + &circuit_0, + &circuit_secondary, + start_public_input.clone(), + z0_secondary.clone(), + ); - let is_wasm = match &witness_generator_file { - FileLocation::PathBuf(path) => path.extension().unwrap_or_default() == "wasm", - FileLocation::URL(_) => true, - }; + for i in 0..iteration_count { + let witness = compute_witness::( + current_public_input.clone(), + private_inputs[i].clone(), + witness_generator_file.clone(), + &witness_generator_output, + ); - let witness = if is_wasm { - generate_witness_from_wasm::<::Scalar>( - &witness_generator_file, - &input_json, - &witness_generator_output, - ) - } else { - let witness_generator_file = match &witness_generator_file { - FileLocation::PathBuf(path) => path, - FileLocation::URL(_) => panic!("unreachable"), - }; - generate_witness_from_bin::<::Scalar>( - &witness_generator_file, - &input_json, - &witness_generator_output, - ) - }; let circuit = CircomCircuit { r1cs: r1cs.clone(), witness: Some(witness), @@ -129,33 +216,36 @@ pub fn create_recursive_circuit( .map(|&x| format!("{:?}", x).strip_prefix("0x").unwrap().to_string()) .collect(); - let res = RecursiveSNARK::prove_step( + let res = recursive_snark.prove_step( &pp, - recursive_snark, - circuit.clone(), - circuit_secondary.clone(), + &circuit, + &circuit_secondary, start_public_input.clone(), z0_secondary.clone(), ); assert!(res.is_ok()); - recursive_snark = Some(res.unwrap()); } fs::remove_file(witness_generator_output)?; - let recursive_snark = recursive_snark.unwrap(); Ok(recursive_snark) } #[cfg(target_family = "wasm")] -pub async fn create_recursive_circuit( +pub async fn create_recursive_circuit( witness_generator_file: FileLocation, - r1cs: R1CS, + r1cs: R1CS>, private_inputs: Vec>, - start_public_input: Vec, - pp: &PublicParams, -) -> Result, std::io::Error> { + start_public_input: Vec>, + pp: &PublicParams, C2>, +) -> Result, C2>, std::io::Error> +where + G1: Group::Scalar>, + G2: Group::Scalar>, +{ + let root = current_dir().unwrap(); + let witness_generator_output = root.join("circom_witness.wtns"); + let iteration_count = private_inputs.len(); - let mut circuit_iterations = Vec::with_capacity(iteration_count); let start_public_input_hex = start_public_input .iter() @@ -163,76 +253,59 @@ pub async fn create_recursive_circuit( .collect::>(); let mut current_public_input = start_public_input_hex.clone(); - for i in 0..iteration_count { - let decimal_stringified_input: Vec = current_public_input - .iter() - .map(|x| BigInt::from_str_radix(x, 16).unwrap().to_str_radix(10)) - .collect(); - - let input = CircomInput { - step_in: decimal_stringified_input.clone(), - extra: private_inputs[i].clone(), - }; + let witness_0 = compute_witness::( + current_public_input.clone(), + private_inputs[0].clone(), + witness_generator_file.clone(), + &witness_generator_output, + ) + .await; + + let circuit_0 = CircomCircuit { + r1cs: r1cs.clone(), + witness: Some(witness_0), + }; + let circuit_secondary = TrivialTestCircuit::default(); + let z0_secondary = vec![G2::Scalar::ZERO]; - let input_json = serde_json::to_string(&input).unwrap(); + let mut recursive_snark = RecursiveSNARK::, C2>::new( + &pp, + &circuit_0, + &circuit_secondary, + start_public_input.clone(), + z0_secondary.clone(), + ); - let is_wasm = match &witness_generator_file { - FileLocation::PathBuf(path) => path.extension().unwrap_or_default() == "wasm", - FileLocation::URL(_) => true, - }; + for i in 0..iteration_count { + let witness = compute_witness::( + current_public_input.clone(), + private_inputs[i].clone(), + witness_generator_file.clone(), + &witness_generator_output, + ) + .await; - let witness = if is_wasm { - generate_witness_from_wasm::<::Scalar>( - &witness_generator_file, - &input_json, - Path::new(""), - ) - .await - } else { - let witness_generator_file = match &witness_generator_file { - FileLocation::PathBuf(path) => path, - FileLocation::URL(_) => panic!("unreachable"), - }; - generate_witness_from_bin::<::Scalar>( - &witness_generator_file, - &input_json, - Path::new(""), - ) - }; let circuit = CircomCircuit { r1cs: r1cs.clone(), witness: Some(witness), }; - let current_public_output = circuit.get_public_outputs(); - circuit_iterations.push(circuit); + let current_public_output = circuit.get_public_outputs(); current_public_input = current_public_output .iter() .map(|&x| format!("{:?}", x).strip_prefix("0x").unwrap().to_string()) .collect(); - } - let circuit_secondary = TrivialTestCircuit::default(); - - let mut recursive_snark: Option> = None; - - let z0_secondary = vec![::Scalar::zero()]; - - for i in 0..iteration_count { - let res = RecursiveSNARK::prove_step( + let res = recursive_snark.prove_step( &pp, - recursive_snark, - circuit_iterations[i].clone(), - circuit_secondary.clone(), + &circuit, + &circuit_secondary, start_public_input.clone(), z0_secondary.clone(), ); - assert!(res.is_ok()); - recursive_snark = Some(res.unwrap()); } - - let recursive_snark = recursive_snark.unwrap(); + fs::remove_file(witness_generator_output)?; Ok(recursive_snark) }