diff --git a/benchmarks/bench_elliptic_parallel_template.nim b/benchmarks/bench_elliptic_parallel_template.nim index d9015406..0bb2c57f 100644 --- a/benchmarks/bench_elliptic_parallel_template.nim +++ b/benchmarks/bench_elliptic_parallel_template.nim @@ -66,6 +66,7 @@ proc createBenchMsmContext*(EC: typedesc, inputSizes: openArray[int]): BenchMsmC const bits = EC.getScalarField().bits() type ECaff = affine(EC) + result.numInputs = maxNumInputs result.points = newSeq[ECaff](maxNumInputs) result.coefs = newSeq[BigInt[bits]](maxNumInputs) @@ -81,7 +82,7 @@ proc createBenchMsmContext*(EC: typedesc, inputSizes: openArray[int]): BenchMsmC var tmp = threadRng.random_unsafe(EC) tmp.clearCofactor() points[i].affine(tmp) - coefs[i] = rng.random_unsafe(BigInt[bits]) + coefs[i] = threadRng.random_unsafe(BigInt[bits]) let chunks = balancedChunksPrioNumber(0, maxNumInputs, result.tp.numThreads) diff --git a/benchmarks/zkalc.nim b/benchmarks/zkalc.nim new file mode 100644 index 00000000..cd045858 --- /dev/null +++ b/benchmarks/zkalc.nim @@ -0,0 +1,432 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +# ############################################################ +# +# Benchmark results for zka.lc +# +# ############################################################ + +# https://zka.lc/ +# https://github.com/mmaker/zkalc + +import + constantine/hashes, + constantine/lowlevel_fields, + constantine/lowlevel_elliptic_curves, + constantine/lowlevel_elliptic_curves_parallel, + constantine/threadpool, + # Helpers + helpers/prng_unsafe, + # Standard library + std/[stats, monotimes, times, strformat, strutils, cmdline], + # Third-party + jsony, cliche + +type + ZkalcBenchDetails = object + `range`: seq[int] + results: seq[float64] + stddev: seq[float64] + + ZkalcBenchResult = object + add_ff, mul_ff, invert: ZkalcBenchDetails + ip_ff: ZkalcBenchDetails # ip: inner-product + fft: ZkalcBenchDetails + + add_G1, mul_G1, msm_G1: ZkalcBenchDetails + is_in_sub_G1: ZkalcBenchDetails + hash_G1: ZkalcBenchDetails + + add_G2, mul_G2, msm_G2: ZkalcBenchDetails + is_in_sub_G2: ZkalcBenchDetails + hash_G2: ZkalcBenchDetails + + mul_Gt: ZkalcBenchDetails + multiexp_Gt: ZkalcBenchDetails + + pairing: ZkalcBenchDetails + multipairing: ZkalcBenchDetails + +type AggStats = tuple[rs: RunningStat, batchSize: int] + +# Utilities +# ------------------------------------------------------------------------------------- + +template bench(body: untyped): AggStats = + const warmupMs = 100 + const batchMs = 10 + const benchMs = 5000 + + block: + var stats: RunningStat + stats.clear() + + proc warmup(warmupMs: int): tuple[num_iters: int, elapsedNs: int64] = + ## Warmup for the specified time and returns the number of iterations and time used + let start = getMonotime().ticks() + let stop = start + 1_000_000'i64*int64(warmupMs) + + var num_iters = 0 + + while true: + body + + let cur = getMonotime().ticks() + num_iters += 1 + + if cur >= stop: + return (num_iters, cur - start) + + # Warmup and measure how many iterations are done during warmup + let (candidateIters, elapsedNs) = warmup(warmupMs) + + # Deduce batch size for bench iterations so that each batch is atleast 10ms to amortize clock overhead + let batchSize = max(1, int(candidateIters.float64 * batchMs.float64 / warmupMs.float64)) + # Compute the number of iterations for ~5s of benchmarks + let iters = int( + (candidateIters.float64 / batchSize.float64) * # Divide the computed number of iterations by the size of the batch + max(1, benchMs.float64 / (elapsedNs.float64 * 1e-6)) # Scale by the ratio of bench time / warmup time + ) + + for _ in 0 ..< iters: + let start = getMonotime() + + for _ in 0 ..< batchSize: + body + + let stop = getMonotime() + let elapsedNs = (stop.ticks() - start.ticks()) div batchSize + + # We can store integers up to 2⁵³ in a float64 without loss of precision (see also ulp) + # 1 billion is ~ 2³⁰, so you would need 2²³ seconds = 8388608s = 13 weeks 6 days 2 hours 10 minutes 8 seconds + stats.push(elapsedNs) + + (stats, batchSize) + +proc report(op: string, curve: Algebra, aggStats: AggStats) = + let avg = aggStats.rs.mean() + let stddev = aggStats.rs.standardDeviationS() # Sample standard deviation (and not population) + let coefvar = stddev / avg * 100 # coefficient of variation + let throughput = 1e9 / float64(avg) + let iters = aggStats.rs.n + let batchSize = aggStats.batchSize + echo &"{op:<50} {$curve:<10} {throughput:>15.3f} ops/s {avg:>15.1f} ns/op (avg) ±{coefvar:>4.1f}% (coef var) {iters:>4} iterations of {batchSize:>6} operations" + +proc separator(length: int) = + echo "-".repeat(length) + +proc separator() = separator(174) + +proc toZkalc(stats: AggStats, size = 1): ZkalcBenchDetails = + ZkalcBenchDetails( + `range`: @[size], + results: @[stats.rs.mean()], + stddev: @[stats.rs.standardDeviationS()] # Sample standard deviation (and not population) + ) + +proc append(details: var ZkalcBenchDetails, stats: AggStats, size: int) = + details.`range`.add size + details.results.add stats.rs.mean() + details.stddev.add stats.rs.standardDeviationS() # Sample standard deviation (and not population) + +# Prevent compiler optimizing benchmark away +# ------------------------------------------------------------------------------------- +# This doesn't always work unfortunately ... + +proc volatilize(x: ptr byte) {.codegenDecl: "$# $#(char const volatile *x)", inline.} = + discard + +template preventOptimAway*[T](x: var T) = + volatilize(cast[ptr byte](addr x)) + +template preventOptimAway*[T](x: T) = + volatilize(cast[ptr byte](unsafeAddr x)) + +# Field benches +# ------------------------------------------------------------------------------------- + +proc benchFrAdd(rng: var RngState, curve: static Algebra): ZkalcBenchDetails = + var x = rng.random_unsafe(Fr[curve]) + let y = rng.random_unsafe(Fr[curve]) + + preventOptimAway(x) + preventOptimAway(y) + + let stats = bench(): + x += y + + report("𝔽r Addition", curve, stats) + stats.toZkalc() + +proc benchFrMul(rng: var RngState, curve: static Algebra): ZkalcBenchDetails = + var x = rng.random_unsafe(Fr[curve]) + let y = rng.random_unsafe(Fr[curve]) + + preventOptimAway(x) + preventOptimAway(y) + + let stats = bench(): + x *= y + + report("𝔽r Multiplication", curve, stats) + stats.toZkalc() + +proc benchFrInv(rng: var RngState, curve: static Algebra, useVartime: bool): ZkalcBenchDetails = + var x = rng.random_unsafe(Fr[curve]) + + if useVartime: + let stats = bench(): + x.inv_vartime() + + report("𝔽r Inversion " & align("| vartime", 28), curve, stats) + stats.toZkalc() + else: + let stats = bench(): + x.inv() + + report("𝔽r Inversion " & align("| constant-time", 28), curve, stats) + stats.toZkalc() + +proc benchFrIP(rng: var RngState, curve: static Algebra): ZkalcBenchDetails = + + var r: Fr[curve] + let a = rng.random_unsafe(Fr[curve]) + let b = rng.random_unsafe(Fr[curve]) + let u = rng.random_unsafe(Fr[curve]) + let v = rng.random_unsafe(Fr[curve]) + + preventOptimAway(r) + preventOptimAway(a) + preventOptimAway(b) + preventOptimAway(u) + preventOptimAway(v) + + let stats = bench(): + r.sumprod([a, b], [u, v]) + + report("𝔽r Sum of products of size 2", curve, stats) + stats.toZkalc(2) + +# EC benches +# ------------------------------------------------------------------------------------- + +proc benchEcAdd(rng: var RngState, EC: type, useVartime: bool): ZkalcBenchDetails = + const G = + when EC.G == G1: "𝔾1" + else: "𝔾2" + const curve = EC.F.Name + + var r {.noInit.}: EC + let P = rng.random_unsafe(EC) + let Q = rng.random_unsafe(EC) + + preventOptimAway(r) + preventOptimAway(P) + preventOptimAway(Q) + + if useVartime: + let stats = bench(): + r.sum_vartime(P, Q) + + report(G & " Addition " & align("| vartime", 29), curve, stats) + stats.toZkalc() + else: + let stats = bench(): + r.sum(P, Q) + + report(G & " Addition " & align("| constant-time", 29), curve, stats) + stats.toZkalc() + +proc benchEcMul(rng: var RngState, EC: type, useVartime: bool): ZkalcBenchDetails = + const G = + when EC.G == G1: "𝔾1" + else: "𝔾2" + const curve = EC.F.Name + + var r {.noInit.}: EC + var P = rng.random_unsafe(EC) + P.clearCofactor() + let k = rng.random_unsafe(Fr[curve].getBigInt()) + + preventOptimAway(r) + preventOptimAway(P) + + if useVartime: + let stats = bench(): + r.scalarMul_vartime(k, P) + + report(G & " Scalar Multiplication " & align("| vartime", 16), curve, stats) + stats.toZkalc() + else: + let stats = bench(): + r.scalarMul(k, P) + + report(G & " Scalar Multiplication " & align("| constant-time", 16), curve, stats) + stats.toZkalc() + +# EC Msm benches +# ------------------------------------------------------------------------------------- + +type BenchMsmContext*[EC] = object + numInputs: int + coefs: seq[getBigInt(EC.F.Name, kScalarField)] + points: seq[affine(EC)] + +proc createBenchMsmContext*(rng: var RngState, EC: typedesc, maxNumInputs: int): BenchMsmContext[EC] = + let tp = Threadpool.new() + + type Big = EC.F.Name.getBigInt(kScalarField) + type ECaff = affine(EC) + + result.numInputs = maxNumInputs + result.points = newSeq[ECaff](maxNumInputs) + result.coefs = newSeq[Big](maxNumInputs) + + proc genCoefPointPairsChunk[EC, ECaff](rngSeed: uint64, start, len: int, points: ptr ECaff, coefs: ptr Big) {.nimcall.} = + let points = cast[ptr UncheckedArray[ECaff]](points) + let coefs = cast[ptr UncheckedArray[Big]](coefs) + + # RNGs are not threadsafe, create a threadlocal one seeded from the global RNG + var threadRng: RngState + threadRng.seed(rngSeed) + + for i in start ..< start + len: + var tmp = threadRng.random_unsafe(EC) + tmp.clearCofactor() + points[i].affine(tmp) + coefs[i] = threadRng.random_unsafe(Big) + + let chunks = balancedChunksPrioNumber(0, maxNumInputs, tp.numThreads) + + stdout.write &"Generating {maxNumInputs} (coefs, points) pairs ... " + stdout.flushFile() + + let start = getMonotime() + + syncScope: + for (id, start, size) in items(chunks): + tp.spawn genCoefPointPairsChunk[EC, ECaff](rng.next(), start, size, result.points[0].addr, result.coefs[0].addr) + + # Even if child threads are sleeping, it seems like perf is lower when there are threads around + # maybe because the kernel has more overhead or time quantum to keep track off so shut them down. + tp.shutdown() + + let stop = getMonotime() + stdout.write &"in {float64(inNanoSeconds(stop-start)) / 1e6:6.3f} ms\n" + +proc benchEcMsm[EC](ctx: BenchMsmContext[EC]): ZkalcBenchDetails = + const G = + when EC.G == G1: "𝔾1" + else: "𝔾2" + const curve = EC.F.Name + + let tp = Threadpool.new() + var size = 2 + while size <= ctx.numInputs: + var r{.noInit.}: EC + template coefs: untyped = ctx.coefs.toOpenArray(0, size-1) + template points: untyped = ctx.points.toOpenArray(0, size-1) + + let stats = bench(): + tp.multiScalarMul_vartime_parallel(r, coefs, points) + + report(G & " MSM " & align($size, 9) & ", " & align($tp.numThreads & " threads", 11) & align("| vartime", 12), curve, stats) + result.append(stats, size) + + size *= 2 + + tp.shutdown() + +# EC Misc benches +# ------------------------------------------------------------------------------------- + +proc benchEcIsInSubgroup(rng: var RngState, EC: type): ZkalcBenchDetails = + const G = + when EC.G == G1: "𝔾1" + else: "𝔾2" + const curve = EC.F.Name + + var r {.noInit.}: EC + var P = rng.random_unsafe(EC) + P.clearCofactor() + preventOptimAway(P) + + let stats = bench(): + discard P.isInSubgroup() + + report(G & " Subgroup Check", curve, stats) + stats.toZkalc() + +proc benchEcHashToCurve(rng: var RngState, EC: type): ZkalcBenchDetails = + const G = + when EC.G == G1: "𝔾1" + else: "𝔾2" + const curve = EC.F.Name + + const dst = "Constantine_Zkalc_Bench_HashToCurve" + # Gnark uses a message of size 54, probably to not spill over padding with SHA256 + let msg = "Privacy is necessary for an open society [...]" + + type EC = EC_ShortW_Jac[Fp[curve], G1] + var P {.noInit.}: EC + + let stats = bench(): + sha256.hashToCurve( + k = 128, + output = P, + augmentation = "", + message = msg, + domainSepTag = dst + ) + + report(G & " Hash-to-Curve", curve, stats) + stats.toZkalc() + +# Run benches +# ------------------------------------------------------------------------------------- + +proc runBenches(curve: static Algebra, useVartime: bool) = + var rng: RngState + rng.seed(42) + + var zkalc: ZkalcBenchResult + + type EcG1 = EC_ShortW_Jac[Fp[curve], G1] + separator() + zkalc.add_ff = rng.benchFrAdd(curve) + zkalc.mul_ff = rng.benchFrMul(curve) + zkalc.invert = rng.benchFrInv(curve, useVartime) + zkalc.ip_ff = rng.benchFrIP(curve) + separator() + zkalc.add_g1 = rng.benchEcAdd(EcG1, useVartime) + zkalc.mul_g1 = rng.benchEcMul(EcG1, useVartime) + separator() + let ctxG1 = rng.createBenchMsmContext(EcG1, maxNumInputs = 2097152) + separator() + zkalc.msm_g1 = benchEcMsm(ctxG1) + separator() + zkalc.is_in_sub_G1 = rng.benchEcIsInSubgroup(EcG1) + when curve in {BN254_Snarks, BLS12_381}: + zkalc.hash_G1 = rng.benchEcHashToCurve(EcG1) + separator() + +proc main() = + let cmd = commandLineParams() + cmd.getOpt (curve: BN254_Snarks, vartime: true) + + case curve + of BN254_Snarks: BN254_Snarks.runBenches(vartime) + of Pallas: Pallas .runBenches(vartime) + of Vesta: Vesta .runBenches(vartime) + of BLS12_377: BLS12_377 .runBenches(vartime) + of BLS12_381: BLS12_381 .runBenches(vartime) + else: + echo "This curve '" & $curve & "' is not configured for benchmarking at the moment." + +main() diff --git a/constantine/lowlevel_bigints.nim b/constantine/lowlevel_bigints.nim index fd67dfb8..50e58eaa 100644 --- a/constantine/lowlevel_bigints.nim +++ b/constantine/lowlevel_bigints.nim @@ -27,7 +27,10 @@ import # Base types # ------------------------------------------------------------ -export abstractions +export + abstractions.SecretBool, + abstractions.SecretWord, + abstractions.BigInt # BigInt # ------------------------------------------------------------ diff --git a/constantine/lowlevel_elliptic_curves.nim b/constantine/lowlevel_elliptic_curves.nim index d1430498..be518235 100644 --- a/constantine/lowlevel_elliptic_curves.nim +++ b/constantine/lowlevel_elliptic_curves.nim @@ -14,7 +14,6 @@ import ./math/elliptic/[ ec_scalar_mul_vartime, ec_multi_scalar_mul], - ./math/io/io_ec, ./hash_to_curve/hash_to_curve # ############################################################ @@ -35,8 +34,15 @@ import # ------------------------------------------------------------ export - abstractions, - algebras.Algebra + abstractions.SecretBool, + abstractions.SecretWord, + abstractions.BigInt, + algebras.Algebra, + algebras.getBigInt, + algebras.FieldKind + +# Generic sandwich +export abstractions # Elliptic curve # ------------------------------------------------------------ diff --git a/constantine/lowlevel_extension_fields.nim b/constantine/lowlevel_extension_fields.nim index 23edabe5..de9f1b00 100644 --- a/constantine/lowlevel_extension_fields.nim +++ b/constantine/lowlevel_extension_fields.nim @@ -31,8 +31,10 @@ import # ------------------------------------------------------------ export - abstractions, - algebras.Algebra + abstractions.SecretBool, + abstractions.SecretWord, + algebras.Algebra, + algebras.getBigInt # Extension fields # ------------------------------------------------------------ @@ -86,4 +88,3 @@ export extension_fields.sqrt_if_square export extension_fields.sqrt export frobenius.frobenius_map - diff --git a/constantine/lowlevel_fields.nim b/constantine/lowlevel_fields.nim index c5f87fc8..14dc1c6a 100644 --- a/constantine/lowlevel_fields.nim +++ b/constantine/lowlevel_fields.nim @@ -30,8 +30,11 @@ import # ------------------------------------------------------------ export - abstractions, - algebras.Algebra + abstractions.SecretBool, + abstractions.SecretWord, + abstractions.BigInt, + algebras.Algebra, + algebras.getBigInt # Scalar field Fr and Prime Field Fp # ------------------------------------------------------------ @@ -88,6 +91,7 @@ export arithmetic.prod export arithmetic.`*=` export arithmetic.square export arithmetic.square_repeated +export arithmetic.sumprod export arithmetic.csetZero export arithmetic.csetOne @@ -97,6 +101,7 @@ export arithmetic.csub export arithmetic.div2 export arithmetic.inv +export arithmetic.inv_vartime export arithmetic.isSquare export arithmetic.invsqrt diff --git a/constantine/lowlevel_pairing_curves.nim b/constantine/lowlevel_pairing_curves.nim index d4ef869c..abbcc2aa 100644 --- a/constantine/lowlevel_pairing_curves.nim +++ b/constantine/lowlevel_pairing_curves.nim @@ -33,8 +33,10 @@ import # ------------------------------------------------------------ export - abstractions, - algebras.Algebra + abstractions.SecretBool, + abstractions.SecretWord, + algebras.Algebra, + algebras.getBigInt # Pairings # ------------------------------------------------------------ diff --git a/constantine/math/elliptic/ec_scalar_mul_vartime.nim b/constantine/math/elliptic/ec_scalar_mul_vartime.nim index 89357975..65df66af 100644 --- a/constantine/math/elliptic/ec_scalar_mul_vartime.nim +++ b/constantine/math/elliptic/ec_scalar_mul_vartime.nim @@ -21,7 +21,8 @@ import constantine/math/io/io_bigints, constantine/platforms/abstractions, constantine/math_arbitrary_precision/arithmetic/limbs_views, - constantine/named/zoo_endomorphisms + constantine/named/zoo_endomorphisms, + constantine/named/algebras {.push raises: [].} # No exceptions allowed in core cryptographic operations {.push checks: off.} # No defects due to array bound checking or signed integer overflow allowed @@ -336,8 +337,6 @@ func scalarMul_vartime*[scalBits; EC](P: var EC, scalar: BigInt[scalBits]) {.met else: {.error: "Unconfigured".} - const L = scalBits.ceilDiv_vartime(M) + 1 - let usedBits = scalar.limbs.getBits_LE_vartime() when EC.F.Name.hasEndomorphismAcceleration(): @@ -352,8 +351,8 @@ func scalarMul_vartime*[scalBits; EC](P: var EC, scalar: BigInt[scalBits]) {.met return if 64 < usedBits: - # With a window of 5, we precompute 2^3 = 8 points - P.scalarMul_minHammingWeight_windowed_vartime(scalar, window = 5) + # With a window of 4, we precompute 2^4 = 4 points + P.scalarMul_minHammingWeight_windowed_vartime(scalar, window = 4) elif 16 < usedBits: # With a window of 3, we precompute 2^1 = 2 points P.scalarMul_minHammingWeight_windowed_vartime(scalar, window = 3) diff --git a/constantine/math/elliptic/ec_shortweierstrass_batch_ops.nim b/constantine/math/elliptic/ec_shortweierstrass_batch_ops.nim index 8ca885c1..9475e965 100644 --- a/constantine/math/elliptic/ec_shortweierstrass_batch_ops.nim +++ b/constantine/math/elliptic/ec_shortweierstrass_batch_ops.nim @@ -7,6 +7,7 @@ # at your option. This file may not be copied, modified, or distributed except according to those terms. import + constantine/named/algebras, constantine/platforms/abstractions, constantine/math/arithmetic, constantine/math/extension_fields, diff --git a/constantine/math/elliptic/ec_shortweierstrass_jacobian.nim b/constantine/math/elliptic/ec_shortweierstrass_jacobian.nim index 19936e0c..51ef60bd 100644 --- a/constantine/math/elliptic/ec_shortweierstrass_jacobian.nim +++ b/constantine/math/elliptic/ec_shortweierstrass_jacobian.nim @@ -213,6 +213,8 @@ template sumImpl[F; G: static Subgroup]( # | Y₃ = R*(V-X₃)-S₁*HHH | Y₃ = M*(S-X₃)-YY*YY | | | # | Z₃ = Z₁*Z₂*H | Z₃ = Y₁*Z₁ | | | + bind mulCheckSparse + # "when" static evaluation doesn't shortcut booleans :/ # which causes issues when CoefA isn't an int but Fp or Fp2 when CoefA is int: @@ -1028,7 +1030,7 @@ func `~-`*(a: EC_ShortW_Jac, b: EC_ShortW_Aff): EC_ShortW_Jac {.noInit, inline.} ## This MUST NOT be used with secret data. ## ## This is highly VULNERABLE to timing attacks and power analysis attacks.] - ## + ## ## Out-of-place functions SHOULD NOT be used in performance-critical subroutines as compilers ## tend to generate useless memory moves or have difficulties to minimize stack allocation ## and our types might be large (Fp12 ...) diff --git a/constantine/named/properties_curves.nim b/constantine/named/properties_curves.nim index 0ad8621c..54bb1653 100644 --- a/constantine/named/properties_curves.nim +++ b/constantine/named/properties_curves.nim @@ -10,7 +10,8 @@ import std/macros, # Internal ./config_fields_and_curves, - ./deriv/parser_curves + ./deriv/parser_curves, + ./properties_fields export Algebra, CurveFamily, SexticTwist diff --git a/constantine/named/zoo_subgroups.nim b/constantine/named/zoo_subgroups.nim index 1be5a22d..e7c5f702 100644 --- a/constantine/named/zoo_subgroups.nim +++ b/constantine/named/zoo_subgroups.nim @@ -26,6 +26,8 @@ export bn254_nogami_subgroups, bn254_snarks_subgroups, bw6_761_subgroups, + pallas_subgroups, + vesta_subgroups, secp256k1_subgroups func clearCofactor*[ECP](P: var ECP) {.inline.} =