From 92685022c43c9d113126d0bfcb130a773e2c4216 Mon Sep 17 00:00:00 2001 From: Mamy Ratsimbazafy Date: Fri, 19 Jul 2024 07:32:41 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9D=94=BE=E2=82=9C=20multi-exponentiations?= =?UTF-8?q?=20(#436)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(gt-multiexp): add baseline multi-exponentiation on 𝔾ₜ based on BDLO12 * feat(gt-multiexp): add optimized multi-exponentiation in 𝔾ₜ * feat(gt-multiexp): add parallel multi-exponentiation in 𝔾ₜ * feat(gt-multiexp): enable parallel tests in nimble * magic workaround for https://github.com/nim-lang/Nim/issues/23853 --- ...mcfg => bench_ec_msm_bandersnatch.nim.cfg} | 0 .../bench_elliptic_parallel_template.nim | 4 +- benchmarks/bench_gt.nim | 1 - benchmarks/bench_gt_multiexp_bls12_381.nim | 46 ++ .../bench_gt_multiexp_bls12_381.nim.cfg | 1 + benchmarks/bench_gt_parallel_template.nim | 189 +++++++ benchmarks/bench_gt_template.nim | 1 - constantine.nimble | 9 + .../math/elliptic/ec_multi_scalar_mul.nim | 89 ++-- .../elliptic/ec_multi_scalar_mul_parallel.nim | 58 +-- .../ec_multi_scalar_mul_scheduler.nim | 1 + .../math/pairings/cyclotomic_subgroups.nim | 14 +- constantine/math/pairings/gt_multiexp.nim | 489 ++++++++++++++++++ .../math/pairings/gt_multiexp_parallel.nim | 258 +++++++++ .../math/pairings/pairings_generic.nim | 9 +- .../t_pairing_bls12_377_optate.nim | 9 +- .../t_pairing_bls12_381_gt_exp.nim | 4 +- .../t_pairing_bls12_381_gt_multiexp.nim | 18 + .../t_pairing_bls12_381_gt_subgroup.nim | 10 +- .../t_pairing_bls12_381_optate.nim | 9 +- .../t_pairing_bn254_nogami_gt_subgroup.nim | 10 +- .../t_pairing_bn254_nogami_optate.nim | 9 +- .../t_pairing_bn254_snarks_gt_exp.nim | 4 +- .../t_pairing_bn254_snarks_gt_multiexp.nim | 18 + .../t_pairing_bn254_snarks_gt_subgroup.nim | 10 +- .../t_pairing_bn254_snarks_optate.nim | 9 +- .../t_pairing_bw6_761_gt_subgroup.nim | 10 +- .../t_pairing_bw6_761_optate.nim | 9 +- tests/math_pairings/t_pairing_template.nim | 108 ++-- tests/parallel/t_ec_template_parallel.nim | 2 +- ...pairing_bls12_381_gt_multiexp_parallel.nim | 18 + .../parallel/t_pairing_template_parallel.nim | 88 ++++ 32 files changed, 1323 insertions(+), 191 deletions(-) rename benchmarks/{bench_ec_msm_bandersnatch.nimcfg => bench_ec_msm_bandersnatch.nim.cfg} (100%) create mode 100644 benchmarks/bench_gt_multiexp_bls12_381.nim create mode 100644 benchmarks/bench_gt_multiexp_bls12_381.nim.cfg create mode 100644 benchmarks/bench_gt_parallel_template.nim create mode 100644 constantine/math/pairings/gt_multiexp.nim create mode 100644 constantine/math/pairings/gt_multiexp_parallel.nim create mode 100644 tests/math_pairings/t_pairing_bls12_381_gt_multiexp.nim create mode 100644 tests/math_pairings/t_pairing_bn254_snarks_gt_multiexp.nim create mode 100644 tests/parallel/t_pairing_bls12_381_gt_multiexp_parallel.nim create mode 100644 tests/parallel/t_pairing_template_parallel.nim diff --git a/benchmarks/bench_ec_msm_bandersnatch.nimcfg b/benchmarks/bench_ec_msm_bandersnatch.nim.cfg similarity index 100% rename from benchmarks/bench_ec_msm_bandersnatch.nimcfg rename to benchmarks/bench_ec_msm_bandersnatch.nim.cfg diff --git a/benchmarks/bench_elliptic_parallel_template.nim b/benchmarks/bench_elliptic_parallel_template.nim index afc0a649c..647a9f81b 100644 --- a/benchmarks/bench_elliptic_parallel_template.nim +++ b/benchmarks/bench_elliptic_parallel_template.nim @@ -115,7 +115,7 @@ proc msmParallelBench*[EC](ctx: var BenchMsmContext[EC], numInputs: int, iters: var startNaive, stopNaive, startMSMbaseline, stopMSMbaseline, startMSMopt, stopMSMopt, startMSMpara, stopMSMpara: MonoTime if numInputs <= 100000: - startNaive = getMonotime() + # startNaive = getMonotime() bench("EC scalar muls " & align($numInputs, 10) & " (" & $bits & "-bit coefs, points)", EC, iters): var tmp: EC r.setNeutral() @@ -123,7 +123,7 @@ proc msmParallelBench*[EC](ctx: var BenchMsmContext[EC], numInputs: int, iters: tmp.fromAffine(points[i]) tmp.scalarMul(coefs[i]) r += tmp - stopNaive = getMonotime() + # stopNaive = getMonotime() if numInputs <= 100000: startNaive = getMonotime() diff --git a/benchmarks/bench_gt.nim b/benchmarks/bench_gt.nim index cc10108ef..a3599266c 100644 --- a/benchmarks/bench_gt.nim +++ b/benchmarks/bench_gt.nim @@ -33,7 +33,6 @@ proc main() = separator() staticFor i, 0, AvailableCurves.len: const curve = AvailableCurves[i] - const bits = Fr[curve].bits() separator() mulBench(Fp12[curve], Iters) sqrBench(Fp12[curve], Iters) diff --git a/benchmarks/bench_gt_multiexp_bls12_381.nim b/benchmarks/bench_gt_multiexp_bls12_381.nim new file mode 100644 index 000000000..ef60000af --- /dev/null +++ b/benchmarks/bench_gt_multiexp_bls12_381.nim @@ -0,0 +1,46 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + # Internals + constantine/named/algebras, + constantine/math/extension_fields, + # Helpers + ./bench_gt_parallel_template + +# ############################################################ +# +# Benchmark of the 𝔾ₜ group of +# Pairing Friendly curves +# +# ############################################################ + +const Iters = 10000 +const AvailableCurves = [ + # BN254_Nogami, + # BN254_Snarks, + # BLS12_377, + BLS12_381, +] + +const testNumPoints = [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024] + +proc main() = + separator() + staticFor i, 0, AvailableCurves.len: + const curve = AvailableCurves[i] + var ctx = createBenchMultiExpContext(Fp12[curve], testNumPoints) + separator() + for numPoints in testNumPoints: + let batchIters = max(1, Iters div numPoints) + ctx.multiExpParallelBench(numPoints, batchIters) + separator() + separator() + +main() +notes() diff --git a/benchmarks/bench_gt_multiexp_bls12_381.nim.cfg b/benchmarks/bench_gt_multiexp_bls12_381.nim.cfg new file mode 100644 index 000000000..aed303eef --- /dev/null +++ b/benchmarks/bench_gt_multiexp_bls12_381.nim.cfg @@ -0,0 +1 @@ +--threads:on diff --git a/benchmarks/bench_gt_parallel_template.nim b/benchmarks/bench_gt_parallel_template.nim new file mode 100644 index 000000000..0e8044001 --- /dev/null +++ b/benchmarks/bench_gt_parallel_template.nim @@ -0,0 +1,189 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +# ############################################################ +# +# Summary of the performance of a curve +# +# ############################################################ + +import + # Standard library + std/[monotimes, times], + # Internals + constantine/platforms/abstractions, + constantine/named/algebras, + constantine/math/[arithmetic, extension_fields], + constantine/math/pairings/[ + pairings_generic, + gt_exponentiations, + gt_exponentiations_vartime, + gt_multiexp, gt_multiexp_parallel, + ], + constantine/threadpool, + # Helpers + helpers/prng_unsafe, + ./bench_blueprint + +export times, monotimes +export notes +export abstractions +proc separator*() = separator(168) + +proc report(op, domain: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) = + let ns = inNanoseconds((stop-start) div iters) + let throughput = 1e9 / float64(ns) + when SupportsGetTicks: + echo &"{op:<68} {domain:<20} {throughput:>15.3f} ops/s {ns:>9} ns/op {(stopClk - startClk) div iters:>9} CPU cycles (approx)" + else: + echo &"{op:<68} {domain:<20} {throughput:>15.3f} ops/s {ns:>9} ns/op" + +macro fixFieldDisplay(T: typedesc): untyped = + # At compile-time, enums are integers and their display is buggy + # we get the Curve ID instead of the curve name. + let instantiated = T.getTypeInst() + var name = $instantiated[1][0] # Fp + name.add "[" & $Algebra(instantiated[1][1].intVal) & "]" + result = newLit name + +func fixDisplay(T: typedesc): string = + when T is (Fp or Fp2 or Fp4 or Fp6 or Fp12): + fixFieldDisplay(T) + else: + $T + +func fixDisplay(T: Algebra): string = + $T + +template bench(op: string, T: typed, iters: int, body: untyped): untyped = + measure(iters, startTime, stopTime, startClk, stopClk, body) + report(op, fixDisplay(T), startTime, stopTime, startClk, stopClk, iters) + +func random_gt*(rng: var RngState, F: typedesc): F {.inline, noInit.} = + result = rng.random_unsafe(F) + result.finalExp() + +# Multi-exponentiations +# --------------------------------------------------------------------------- + +type BenchMultiexpContext*[GT] = object + tp: Threadpool + numInputs: int + exponents: seq[getBigInt(GT.Name(), kScalarField)] + elems: seq[GT] + +proc createBenchMultiExpContext*(GT: typedesc, inputSizes: openArray[int]): BenchMultiexpContext[GT] = + result.tp = Threadpool.new() + let maxNumInputs = inputSizes.max() + + const bits = Fr[GT.Name].bits() + + result.numInputs = maxNumInputs + result.elems = newSeq[GT](maxNumInputs) + result.exponents = newSeq[BigInt[bits]](maxNumInputs) + + proc genElemExponentPairsChunk[GT](rngSeed: uint64, start, len: int, elems: ptr GT, exponents: ptr BigInt[bits]) {.nimcall.} = + let elems = cast[ptr UncheckedArray[GT]](elems) + let exponents = cast[ptr UncheckedArray[BigInt[bits]]](exponents) + + # RNGs are not threadsafe, create a threadlocal one seeded from the global RNG + var threadRng: RngState + threadRng.seed(rngSeed) + + for i in start ..< start + len: + elems[i] = threadRng.random_gt(GT) + exponents[i] = threadRng.random_unsafe(BigInt[bits]) + + let chunks = balancedChunksPrioNumber(0, maxNumInputs, result.tp.numThreads) + + stdout.write &"Generating {maxNumInputs} (elems, exponents) pairs ... " + stdout.flushFile() + + let start = getMonotime() + + syncScope: + for (id, start, size) in items(chunks): + result.tp.spawn genElemExponentPairsChunk(rng.next(), start, size, result.elems[0].addr, result.exponents[0].addr) + + # Even if child threads are sleeping, it seems like perf is lower when there are threads around + # maybe because the kernel has more overhead or time quantum to keep track off so shut them down. + result.tp.shutdown() + + let stop = getMonotime() + stdout.write &"in {float64(inNanoSeconds(stop-start)) / 1e6:6.3f} ms\n" + +proc multiExpParallelBench*[GT](ctx: var BenchMultiExpContext[GT], numInputs: int, iters: int) = + const bits = Fr[GT.Name].bits() + + template elems: untyped = ctx.elems.toOpenArray(0, numInputs-1) + template exponents: untyped = ctx.exponents.toOpenArray(0, numInputs-1) + + + var r{.noInit.}: GT + var startNaive, stopNaive, startMultiExpBaseline, stopMultiExpBaseline: MonoTime + var startMultiExpOpt, stopMultiExpOpt, startMultiExpPara, stopMultiExpPara: MonoTime + + if numInputs <= 100000: + # startNaive = getMonotime() + bench("𝔾ₜ exponentiations " & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters): + var tmp: GT + r.setOne() + for i in 0 ..< elems.len: + tmp.gtExp(elems[i], exponents[i]) + r *= tmp + # stopNaive = getMonotime() + + if numInputs <= 100000: + startNaive = getMonotime() + bench("𝔾ₜ exponentiations vartime " & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters): + var tmp: GT + r.setOne() + for i in 0 ..< elems.len: + tmp.gtExp_vartime(elems[i], exponents[i]) + r *= tmp + stopNaive = getMonotime() + + if numInputs <= 100000: + startMultiExpBaseline = getMonotime() + bench("𝔾ₜ multi-exponentiations baseline " & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters): + r.multiExp_reference_vartime(elems, exponents) + stopMultiExpBaseline = getMonotime() + + block: + startMultiExpOpt = getMonotime() + bench("𝔾ₜ multi-exponentiations optimized " & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters): + r.multiExp_vartime(elems, exponents) + stopMultiExpOpt = getMonotime() + + block: + ctx.tp = Threadpool.new() + + startMultiExpPara = getMonotime() + bench("𝔾ₜ multi-exponentiations" & align($ctx.tp.numThreads & " threads", 11) & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters): + ctx.tp.multiExp_vartime_parallel(r, elems, exponents) + stopMultiExpPara = getMonotime() + + ctx.tp.shutdown() + + let perfNaive = inNanoseconds((stopNaive-startNaive) div iters) + let perfMultiExpBaseline = inNanoseconds((stopMultiExpBaseline-startMultiExpBaseline) div iters) + let perfMultiExpOpt = inNanoseconds((stopMultiExpOpt-startMultiExpOpt) div iters) + let perfMultiExpPara = inNanoseconds((stopMultiExpPara-startMultiExpPara) div iters) + + if numInputs <= 100000: + let speedupBaseline = float(perfNaive) / float(perfMultiExpBaseline) + echo &"Speedup ratio baseline over naive linear combination: {speedupBaseline:>6.3f}x" + + let speedupOpt = float(perfNaive) / float(perfMultiExpOpt) + echo &"Speedup ratio optimized over naive linear combination: {speedupOpt:>6.3f}x" + + let speedupOptBaseline = float(perfMultiExpBaseline) / float(perfMultiExpOpt) + echo &"Speedup ratio optimized over baseline linear combination: {speedupOptBaseline:>6.3f}x" + + let speedupParaOpt = float(perfMultiExpOpt) / float(perfMultiExpPara) + echo &"Speedup ratio parallel over optimized linear combination: {speedupParaOpt:>6.3f}x" diff --git a/benchmarks/bench_gt_template.nim b/benchmarks/bench_gt_template.nim index 4e41b5f1f..a4d8d9e0f 100644 --- a/benchmarks/bench_gt_template.nim +++ b/benchmarks/bench_gt_template.nim @@ -27,7 +27,6 @@ import helpers/prng_unsafe, ./bench_blueprint - export notes export abstractions proc separator*() = separator(168) diff --git a/constantine.nimble b/constantine.nimble index 93c3a02d6..89ed69a04 100644 --- a/constantine.nimble +++ b/constantine.nimble @@ -637,6 +637,8 @@ const testDescMultithreadedCrypto: seq[string] = @[ "tests/parallel/t_ec_shortw_prj_g1_batch_add_parallel.nim", "tests/parallel/t_ec_shortw_jac_g1_msm_parallel.nim", "tests/parallel/t_ec_shortw_prj_g1_msm_parallel.nim", + "tests/parallel/t_ec_twedwards_prj_msm_parallel.nim", + "tests/parallel/t_pairing_bls12_381_gt_multiexp_parallel.nim", ] const benchDesc = [ @@ -661,6 +663,7 @@ const benchDesc = [ "bench_pairing_bn254_nogami", "bench_pairing_bn254_snarks", "bench_gt", + "bench_gt_multiexp_bls12_381", "bench_summary_bls12_377", "bench_summary_bls12_381", "bench_summary_bn254_nogami", @@ -1043,6 +1046,12 @@ task bench_ec_g2_scalar_mul, "Run benchmark on Elliptic Curve group 𝔾2 (Multi task bench_gt, "Run 𝔾ₜ benchmarks - CC compiler": runBench("bench_gt") +# 𝔾ₜ - multi-exponentiation +# ------------------------------------------ + +task bench_gt_multiexp_bls12_381, "Run 𝔾ₜ multiexponentiation benchmarks for BLS12-381 - CC compiler": + runBench("bench_gt_multiexp_bls12_381") + # Pairings # ------------------------------------------ diff --git a/constantine/math/elliptic/ec_multi_scalar_mul.nim b/constantine/math/elliptic/ec_multi_scalar_mul.nim index 51f9e9573..25c1fc754 100644 --- a/constantine/math/elliptic/ec_multi_scalar_mul.nim +++ b/constantine/math/elliptic/ec_multi_scalar_mul.nim @@ -253,7 +253,7 @@ func miniMSM[bits: static int, EC, ECaff]( for _ in 0 ..< c: r.double() -func multiScalarMul_vartime*[bits: static int, EC, ECaff]( +func msmImpl_vartime[bits: static int, EC, ECaff]( r: var EC, coefs: ptr UncheckedArray[BigInt[bits]], points: ptr UncheckedArray[ECaff], N: int, c: static int) {.tags:[VarTime, HeapAlloc], meter.} = @@ -295,6 +295,9 @@ func multiScalarMul_vartime*[bits: static int, EC, ECaff]( # ------- buckets.freeHeap() +# Multi scalar multiplication with batched affine additions +# ----------------------------------------------------------------------------------------------------------------------- + func schedAccumulate*[NumBuckets, QueueLen, F, G; bits: static int]( sched: ptr Scheduler[NumBuckets, QueueLen, F, G], bitIndex: int, miniMsmKind: static MiniMsmKind, c: static int, @@ -344,7 +347,7 @@ func miniMSM_affine[NumBuckets, QueueLen, EC, ECaff; bits: static int]( for _ in 0 ..< c: r.double() -func multiScalarMulAffine_vartime[bits: static int, EC, ECaff]( +func msmAffineImpl_vartime[bits: static int, EC, ECaff]( r: var EC, coefs: ptr UncheckedArray[BigInt[bits]], points: ptr UncheckedArray[ECaff], N: int, c: static int) {.tags:[VarTime, Alloca, HeapAlloc], meter.} = @@ -389,6 +392,9 @@ func multiScalarMulAffine_vartime[bits: static int, EC, ECaff]( sched.freeHeap() buckets.freeHeap() +# Endomorphism acceleration +# ----------------------------------------------------------------------------------------------------------------------- + proc applyEndomorphism[bits: static int, ECaff]( coefs: ptr UncheckedArray[BigInt[bits]], points: ptr UncheckedArray[ECaff], @@ -403,7 +409,7 @@ proc applyEndomorphism[bits: static int, ECaff]( const G = when ECaff isnot EC_ShortW_Aff: G1 else: ECaff.G - const L = ECaff.getScalarField().bits().ceilDiv_vartime(M) + 1 + const L = ECaff.getScalarField().bits().computeEndoRecodedLength(M) let splitCoefs = allocHeapArray(array[M, BigInt[L]], N) let endoBasis = allocHeapArray(array[M, ECaff], N) @@ -447,7 +453,10 @@ template withEndo[coefsBits: static int, EC, ECaff]( else: msmProc(r, coefs, points, N, c) -func multiScalarMul_dispatch_vartime[bits: static int, F, G]( +# Algorithm selection +# ----------------------------------------------------------------------------------------------------------------------- + +func msm_dispatch_vartime[bits: static int, F, G]( r: var (EC_ShortW_Jac[F, G] or EC_ShortW_Prj[F, G]), coefs: ptr UncheckedArray[BigInt[bits]], points: ptr UncheckedArray[EC_ShortW_Aff[F, G]], N: int) = @@ -460,27 +469,27 @@ func multiScalarMul_dispatch_vartime[bits: static int, F, G]( # but it has no significant impact on performance case c - of 2: withEndo(multiScalarMul_vartime, r, coefs, points, N, c = 2) - of 3: withEndo(multiScalarMul_vartime, r, coefs, points, N, c = 3) - of 4: withEndo(multiScalarMul_vartime, r, coefs, points, N, c = 4) - of 5: withEndo(multiScalarMul_vartime, r, coefs, points, N, c = 5) - of 6: withEndo(multiScalarMul_vartime, r, coefs, points, N, c = 6) - of 7: withEndo(multiScalarMul_vartime, r, coefs, points, N, c = 7) - of 8: withEndo(multiScalarMul_vartime, r, coefs, points, N, c = 8) - - of 9: withEndo(multiScalarMulAffine_vartime, r, coefs, points, N, c = 9) - of 10: withEndo(multiScalarMulAffine_vartime, r, coefs, points, N, c = 10) - of 11: withEndo(multiScalarMulAffine_vartime, r, coefs, points, N, c = 11) - of 12: withEndo(multiScalarMulAffine_vartime, r, coefs, points, N, c = 12) - of 13: withEndo(multiScalarMulAffine_vartime, r, coefs, points, N, c = 13) - of 14: multiScalarMulAffine_vartime(r, coefs, points, N, c = 14) - of 15: multiScalarMulAffine_vartime(r, coefs, points, N, c = 15) - - of 16..17: multiScalarMulAffine_vartime(r, coefs, points, N, c = 16) + of 2: withEndo(msmImpl_vartime, r, coefs, points, N, c = 2) + of 3: withEndo(msmImpl_vartime, r, coefs, points, N, c = 3) + of 4: withEndo(msmImpl_vartime, r, coefs, points, N, c = 4) + of 5: withEndo(msmImpl_vartime, r, coefs, points, N, c = 5) + of 6: withEndo(msmImpl_vartime, r, coefs, points, N, c = 6) + of 7: withEndo(msmImpl_vartime, r, coefs, points, N, c = 7) + of 8: withEndo(msmImpl_vartime, r, coefs, points, N, c = 8) + + of 9: withEndo(msmAffineImpl_vartime, r, coefs, points, N, c = 9) + of 10: withEndo(msmAffineImpl_vartime, r, coefs, points, N, c = 10) + of 11: withEndo(msmAffineImpl_vartime, r, coefs, points, N, c = 11) + of 12: withEndo(msmAffineImpl_vartime, r, coefs, points, N, c = 12) + of 13: withEndo(msmAffineImpl_vartime, r, coefs, points, N, c = 13) + of 14: msmAffineImpl_vartime(r, coefs, points, N, c = 14) + of 15: msmAffineImpl_vartime(r, coefs, points, N, c = 15) + + of 16..17: msmAffineImpl_vartime(r, coefs, points, N, c = 16) else: unreachable() -func multiScalarMul_dispatch_vartime[bits: static int, F]( +func msm_dispatch_vartime[bits: static int, F]( r: var EC_TwEdw_Prj[F], coefs: ptr UncheckedArray[BigInt[bits]], points: ptr UncheckedArray[EC_TwEdw_Aff[F]], N: int) = ## Multiscalar multiplication: @@ -494,22 +503,22 @@ func multiScalarMul_dispatch_vartime[bits: static int, F]( # but it has no significant impact on performance case c - of 2: withEndo(multiScalarMul_vartime, r, coefs, points, N, c = 2) - of 3: withEndo(multiScalarMul_vartime, r, coefs, points, N, c = 3) - of 4: withEndo(multiScalarMul_vartime, r, coefs, points, N, c = 4) - of 5: withEndo(multiScalarMul_vartime, r, coefs, points, N, c = 5) - of 6: withEndo(multiScalarMul_vartime, r, coefs, points, N, c = 6) - of 7: withEndo(multiScalarMul_vartime, r, coefs, points, N, c = 7) - of 8: withEndo(multiScalarMul_vartime, r, coefs, points, N, c = 8) - of 9: withEndo(multiScalarMul_vartime, r, coefs, points, N, c = 9) - of 10: withEndo(multiScalarMul_vartime, r, coefs, points, N, c = 10) - of 11: withEndo(multiScalarMul_vartime, r, coefs, points, N, c = 11) - of 12: withEndo(multiScalarMul_vartime, r, coefs, points, N, c = 12) - of 13: withEndo(multiScalarMul_vartime, r, coefs, points, N, c = 13) - of 14: multiScalarMul_vartime(r, coefs, points, N, c = 14) - of 15: multiScalarMul_vartime(r, coefs, points, N, c = 15) - - of 16..17: multiScalarMul_vartime(r, coefs, points, N, c = 16) + of 2: withEndo(msmImpl_vartime, r, coefs, points, N, c = 2) + of 3: withEndo(msmImpl_vartime, r, coefs, points, N, c = 3) + of 4: withEndo(msmImpl_vartime, r, coefs, points, N, c = 4) + of 5: withEndo(msmImpl_vartime, r, coefs, points, N, c = 5) + of 6: withEndo(msmImpl_vartime, r, coefs, points, N, c = 6) + of 7: withEndo(msmImpl_vartime, r, coefs, points, N, c = 7) + of 8: withEndo(msmImpl_vartime, r, coefs, points, N, c = 8) + of 9: withEndo(msmImpl_vartime, r, coefs, points, N, c = 9) + of 10: withEndo(msmImpl_vartime, r, coefs, points, N, c = 10) + of 11: withEndo(msmImpl_vartime, r, coefs, points, N, c = 11) + of 12: withEndo(msmImpl_vartime, r, coefs, points, N, c = 12) + of 13: withEndo(msmImpl_vartime, r, coefs, points, N, c = 13) + of 14: msmImpl_vartime(r, coefs, points, N, c = 14) + of 15: msmImpl_vartime(r, coefs, points, N, c = 15) + + of 16..17: msmImpl_vartime(r, coefs, points, N, c = 16) else: unreachable() @@ -521,7 +530,7 @@ func multiScalarMul_vartime*[bits: static int, EC, ECaff]( ## Multiscalar multiplication: ## r <- [a₀]P₀ + [a₁]P₁ + ... + [aₙ₋₁]Pₙ₋₁ - multiScalarMul_dispatch_vartime(r, coefs, points, len) + msm_dispatch_vartime(r, coefs, points, len) func multiScalarMul_vartime*[bits: static int, EC, ECaff]( r: var EC, @@ -531,7 +540,7 @@ func multiScalarMul_vartime*[bits: static int, EC, ECaff]( ## r <- [a₀]P₀ + [a₁]P₁ + ... + [aₙ₋₁]Pₙ₋₁ debug: doAssert coefs.len == points.len let N = points.len - multiScalarMul_dispatch_vartime(r, coefs.asUnchecked(), points.asUnchecked(), N) + msm_dispatch_vartime(r, coefs.asUnchecked(), points.asUnchecked(), N) func multiScalarMul_vartime*[F, EC, ECaff]( r: var EC, diff --git a/constantine/math/elliptic/ec_multi_scalar_mul_parallel.nim b/constantine/math/elliptic/ec_multi_scalar_mul_parallel.nim index be5a5c327..0f6d6111a 100644 --- a/constantine/math/elliptic/ec_multi_scalar_mul_parallel.nim +++ b/constantine/math/elliptic/ec_multi_scalar_mul_parallel.nim @@ -12,7 +12,7 @@ import constantine/named/algebras, constantine/math/endomorphisms/split_scalars, constantine/math/extension_fields, constantine/named/zoo_endomorphisms, - ../../threadpool/[threadpool, partitioners] + constantine/threadpool/[threadpool, partitioners] export bestBucketBitSize # No exceptions allowed in core cryptographic operations @@ -145,7 +145,7 @@ proc bucketAccumReduce_withInit[bits: static int, EC, ECaff]( buckets[i].setNeutral() bucketAccumReduce(windowSum[], buckets, bitIndex, miniMsmKind, c, coefs, points, N) -proc msm_vartime_parallel[bits: static int, EC, ECaff]( +proc msmImpl_vartime_parallel[bits: static int, EC, ECaff]( tp: Threadpool, r: ptr EC, coefs: ptr UncheckedArray[BigInt[bits]], points: ptr UncheckedArray[EC_aff], @@ -465,7 +465,7 @@ proc applyEndomorphism_parallel[bits: static int, ECaff]( const G = when ECaff isnot EC_ShortW_Aff: G1 else: ECaff.G - const L = ECaff.getScalarField().bits().ceilDiv_vartime(M) + 1 + const L = ECaff.getScalarField().bits().computeEndoRecodedLength(M) let splitCoefs = allocHeapArray(array[M, BigInt[L]], N) let endoBasis = allocHeapArray(array[M, ECaff], N) @@ -544,14 +544,14 @@ proc multiScalarMul_dispatch_vartime_parallel[bits: static int, F, G]( # but it has no significant impact on performance case c - of 2: withEndo(msm_vartime_parallel, tp, r, coefs, points, N, c = 2) - of 3: withEndo(msm_vartime_parallel, tp, r, coefs, points, N, c = 3) - of 4: withEndo(msm_vartime_parallel, tp, r, coefs, points, N, c = 4) - of 5: withEndo(msm_vartime_parallel, tp, r, coefs, points, N, c = 5) - of 6: withEndo(msm_vartime_parallel, tp, r, coefs, points, N, c = 6) + of 2: withEndo(msmImpl_vartime_parallel, tp, r, coefs, points, N, c = 2) + of 3: withEndo(msmImpl_vartime_parallel, tp, r, coefs, points, N, c = 3) + of 4: withEndo(msmImpl_vartime_parallel, tp, r, coefs, points, N, c = 4) + of 5: withEndo(msmImpl_vartime_parallel, tp, r, coefs, points, N, c = 5) + of 6: withEndo(msmImpl_vartime_parallel, tp, r, coefs, points, N, c = 6) - of 7: msm_vartime_parallel(tp, r, coefs, points, N, c = 7) - of 8: msm_vartime_parallel(tp, r, coefs, points, N, c = 8) + of 7: msmImpl_vartime_parallel(tp, r, coefs, points, N, c = 7) + of 8: msmImpl_vartime_parallel(tp, r, coefs, points, N, c = 8) of 9: withEndo(msmAffine_vartime_parallel_split, tp, r, coefs, points, N, c = 9, useParallelBuckets = true) of 10: withEndo(msmAffine_vartime_parallel_split, tp, r, coefs, points, N, c = 10, useParallelBuckets = true) @@ -579,23 +579,23 @@ proc multiScalarMul_dispatch_vartime_parallel[bits: static int, F]( # but it has no significant impact on performance case c - of 2: withEndo(msm_vartime_parallel, tp, r, coefs, points, N, c = 2) - of 3: withEndo(msm_vartime_parallel, tp, r, coefs, points, N, c = 3) - of 4: withEndo(msm_vartime_parallel, tp, r, coefs, points, N, c = 4) - of 5: withEndo(msm_vartime_parallel, tp, r, coefs, points, N, c = 5) - of 6: withEndo(msm_vartime_parallel, tp, r, coefs, points, N, c = 6) - - of 7: msm_vartime_parallel(tp, r, coefs, points, N, c = 7) - of 8: msm_vartime_parallel(tp, r, coefs, points, N, c = 8) - of 9: msm_vartime_parallel(tp, r, coefs, points, N, c = 9) - of 10: msm_vartime_parallel(tp, r, coefs, points, N, c = 10) - of 11: msm_vartime_parallel(tp, r, coefs, points, N, c = 11) - of 12: msm_vartime_parallel(tp, r, coefs, points, N, c = 12) - of 13: msm_vartime_parallel(tp, r, coefs, points, N, c = 13) - of 14: msm_vartime_parallel(tp, r, coefs, points, N, c = 14) - of 15: msm_vartime_parallel(tp, r, coefs, points, N, c = 16) - - of 16..17: msm_vartime_parallel(tp, r, coefs, points, N, c = 16) + of 2: withEndo(msmImpl_vartime_parallel, tp, r, coefs, points, N, c = 2) + of 3: withEndo(msmImpl_vartime_parallel, tp, r, coefs, points, N, c = 3) + of 4: withEndo(msmImpl_vartime_parallel, tp, r, coefs, points, N, c = 4) + of 5: withEndo(msmImpl_vartime_parallel, tp, r, coefs, points, N, c = 5) + of 6: withEndo(msmImpl_vartime_parallel, tp, r, coefs, points, N, c = 6) + + of 7: msmImpl_vartime_parallel(tp, r, coefs, points, N, c = 7) + of 8: msmImpl_vartime_parallel(tp, r, coefs, points, N, c = 8) + of 9: msmImpl_vartime_parallel(tp, r, coefs, points, N, c = 9) + of 10: msmImpl_vartime_parallel(tp, r, coefs, points, N, c = 10) + of 11: msmImpl_vartime_parallel(tp, r, coefs, points, N, c = 11) + of 12: msmImpl_vartime_parallel(tp, r, coefs, points, N, c = 12) + of 13: msmImpl_vartime_parallel(tp, r, coefs, points, N, c = 13) + of 14: msmImpl_vartime_parallel(tp, r, coefs, points, N, c = 14) + of 15: msmImpl_vartime_parallel(tp, r, coefs, points, N, c = 16) + + of 16..17: msmImpl_vartime_parallel(tp, r, coefs, points, N, c = 16) else: unreachable() @@ -620,7 +620,6 @@ proc multiScalarMul_vartime_parallel*[bits: static int, EC, ECaff]( ## This function can be nested in another parallel function debug: doAssert coefs.len == points.len let N = points.len - tp.multiScalarMul_dispatch_vartime_parallel(r.addr, coefs.asUnchecked(), points.asUnchecked(), N) proc multiScalarMul_vartime_parallel*[F, EC, ECaff]( @@ -631,7 +630,6 @@ proc multiScalarMul_vartime_parallel*[F, EC, ECaff]( len: int) {.meter.} = ## Multiscalar multiplication: ## r <- [a₀]P₀ + [a₁]P₁ + ... + [aₙ₋₁]Pₙ₋₁ - let n = cast[int](len) let coefs_big = allocHeapArrayAligned(F.getBigInt(), n, alignment = 64) @@ -650,8 +648,6 @@ proc multiScalarMul_vartime_parallel*[EC, ECaff]( points: openArray[ECaff]) {.inline.} = ## Multiscalar multiplication: ## r <- [a₀]P₀ + [a₁]P₁ + ... + [aₙ₋₁]Pₙ₋₁ - debug: doAssert coefs.len == points.len let N = points.len - tp.multiScalarMul_vartime_parallel(r.addr, coefs.asUnchecked(), points.asUnchecked(), N) diff --git a/constantine/math/elliptic/ec_multi_scalar_mul_scheduler.nim b/constantine/math/elliptic/ec_multi_scalar_mul_scheduler.nim index f18af92ac..043b326b2 100644 --- a/constantine/math/elliptic/ec_multi_scalar_mul_scheduler.nim +++ b/constantine/math/elliptic/ec_multi_scalar_mul_scheduler.nim @@ -213,6 +213,7 @@ func bestBucketBitSize*(inputSize: int, scalarBitwidth: static int, useSignedBuc # L1, L2 caches, TLB and 64 aliasing conflict # are not taken into account in previous formula. # Each increase in c doubles memory used. + # TODO: use the element size for thresholds. when useManualTuning: if 14 <= result: result -= 1 diff --git a/constantine/math/pairings/cyclotomic_subgroups.nim b/constantine/math/pairings/cyclotomic_subgroups.nim index f04714c93..9bb54e401 100644 --- a/constantine/math/pairings/cyclotomic_subgroups.nim +++ b/constantine/math/pairings/cyclotomic_subgroups.nim @@ -225,9 +225,9 @@ func cyclotomic_square_cube_over_quad(r: var CubicExt, a: CubicExt) = # https://eprint.iacr.org/2009/565.pdf # Cubic extension field - # A = 3a² − 2 ̄a - # B = 3 √i c² + 2 ̄b - # C = 3b² − 2 ̄c + # A = 3a² − 2a̅ + # B = 3 √i c² + 2b̅ + # C = 3b² − 2c̅ var v0{.noInit.}, v1{.noInit.}, v2{.noInit.}: typeof(a.c0) template a0: untyped = a.c0.c0 @@ -261,7 +261,7 @@ func cyclotomic_square_cube_over_quad(r: var CubicExt, a: CubicExt) = r.c2.c1.double() r.c2.c1 += v1.c1 - # Now B = 3 √i c² + 2 ̄b + # Now B = 3 √i c² + 2b̅ # beware of mul by non residue: √i v₂ = ξv₂₁ + v₂₀√i # 3 (√i c²)₀ + 2a₂ @@ -291,9 +291,9 @@ func cyclotomic_square_quad_over_cube[F](r: var QuadraticExt[F], a: QuadraticExt # c₅ <=> a₅ <=> b₅ # # Hence, this formula for a cubic extension field - # A = 3a² − 2 ̄a - # B = 3 √i c² + 2 ̄b - # C = 3b² − 2 ̄c + # A = 3a² − 2a̅ + # B = 3 √i c² + 2b̅ + # C = 3b² − 2c̅ # # becomes # A = (b₀, b₄) = 3(b₀, b₄)² - 2(b₀,-b₄) diff --git a/constantine/math/pairings/gt_multiexp.nim b/constantine/math/pairings/gt_multiexp.nim new file mode 100644 index 000000000..0e78fce5f --- /dev/null +++ b/constantine/math/pairings/gt_multiexp.nim @@ -0,0 +1,489 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import constantine/named/algebras, + constantine/math/endomorphisms/split_scalars, + constantine/math/extension_fields, + constantine/math/arithmetic/bigints, + constantine/named/zoo_endomorphisms, + constantine/platforms/abstractions, + ./cyclotomic_subgroups + +# No exceptions allowed in core cryptographic operations +{.push raises: [].} +{.push checks: off.} + +# ########################################################### # +# # +# Multi-Exponentiation in 𝔾ₜ # +# # +# ########################################################### # + +# General utilities +# ------------------------------------------------------------- + +func bestBucketBitSize(inputSize: int, scalarBitwidth: static int, useSignedBuckets, useManualTuning: static bool): int {.inline.} = + ## Evaluate the best bucket bit-size for the input size. + ## That bucket size minimize group operations. + ## This ignore cache effect. Computation can become memory-bound, especially with large buckets + ## that don't fit in L1 cache, trigger the 64K aliasing conflict or worse (overflowing L2 cache or TLB). + ## Especially, scalars are expected to be indistinguishable from random so buckets accessed during accumulation + ## will be in a random pattern, triggering cache misses. + + # Raw operation cost is approximately + # 1. Bucket accumulation + # n - (2ᶜ-1) mul for b/c windows or n - (2ᶜ⁻¹-1) if using signed buckets + # 2. Bucket reduction + # 2x(2ᶜ-2) mul for b/c windows or 2*(2ᶜ⁻¹-2) + # 3. Final reduction + # (b/c - 1) x (c cyclotomic squarings + 1 multiplication) + # Total + # b/c (n + 2ᶜ - 2) A + (b/c - 1) * (c*D + A) + # https://www.youtube.com/watch?v=Bl5mQA7UL2I + + # A cyclotomic square costs ~50% of a 𝔾ₜ multiplication with Granger-Scott formula + + const M = 5300'f32 # Mul cost (in cycles) + const S = 2100'f32 # Cyclotomic square cost (in cycles) + + const s = int useSignedBuckets + let n = inputSize + let b = float32(scalarBitwidth) + var minCost = float32(Inf) + for c in 2 .. 20: # cap return value at 17 after manual tuning + let b_over_c = b/c.float32 + + let bucket_accumulate_reduce = b_over_c * float32(n + (1 shl (c-s)) - 2) * M + let final_reduction = (b_over_c - 1'f32) * (c.float32*S + M) + let cost = bucket_accumulate_reduce + final_reduction + if cost < minCost: + minCost = cost + result = c + + # Manual tuning, memory bandwidth / cache boundaries of + # L1, L2 caches, TLB and 64 aliasing conflict + # are not taken into account in previous formula. + # Each increase in c doubles memory used. + # Compared to 𝔾₁, 𝔾ₜ elements are 6x bigger so we shift by 3 + when useManualTuning: + if 11 <= result: + result -= 1 + if 12 <= result: + result -= 1 + if 13 <= result: + result -= 1 + +func `~*=`[Gt: ExtensionField](a: var Gt, b: Gt) {.inline.} = + # TODO: Analyze the inputs to see if there is avalue in more complex shortcuts (-1, or partial 0 coordinates) + if a.isOne().bool(): + a = b + elif b.isOne().bool(): + discard + else: + a *= b + +func `~/=`[Gt: ExtensionField](a: var Gt, b: Gt) {.inline.} = + ## Cyclotomic division + var t {.noInit.}: Gt + t.cyclotomic_inv(b) + a ~*= t + +func setNeutral[Gt: ExtensionField](a: var Gt) {.inline.} = + a.setOne() + +# Reference multi-exponentiation +# ------------------------------------------------------------- + +func multiExpImpl_reference_vartime[bits: static int, Gt]( + r: var Gt, + elems: ptr UncheckedArray[Gt], + expos: ptr UncheckedArray[BigInt[bits]], + N: int, c: static int) {.tags:[VarTime, HeapAlloc].} = + ## Inner implementation of MEXP, for static dispatch over c, the bucket bit length + ## This is a straightforward simple translation of BDLO12, section 4 + + # Prologue + # -------- + const numBuckets = 1 shl c - 1 # bucket 0 is unused + const numWindows = bits.ceilDiv_vartime(c) + + let miniEXPs = allocHeapArray(Gt, numWindows) + let buckets = allocHeapArray(Gt, numBuckets) + + # Algorithm + # --------- + for w in 0 ..< numWindows: + # Place our elements in a bucket corresponding to + # how many times their bit pattern in the current window of size c + for i in 0 ..< numBuckets: + buckets[i].setNeutral() + + # 1. Bucket accumulation. Cost: n - (2ᶜ-1) => n elems in 2ᶜ-1 elems, first elem per bucket is just copied + for j in 0 ..< N: + let b = cast[int](expos[j].getWindowAt(w*c, c)) + if b == 0: # bucket 0 is unused, no need to add aⱼ⁰ + continue + else: + buckets[b-1] ~*= elems[j] + + # 2. Bucket reduction. Cost: 2x(2ᶜ-2) => 2 additions per 2ᶜ-1 bucket, last bucket is just copied + # We have ordered subset sums in each bucket, we now need to compute the mini-exponentiation + # S₁¹ + S₂² + S₃³ + ... + (S₂c₋₁)^(2ᶜ-1) + var accumBuckets{.noInit.}, miniEXP{.noInit.}: Gt + accumBuckets = buckets[numBuckets-1] + miniEXP = buckets[numBuckets-1] + + # Example with c = 3, 2³ = 8 + for k in countdown(numBuckets-2, 0): + accumBuckets ~*= buckets[k] # Stores S₈ then S₈ +S₇ then S₈ +S₇ +S₆ then ... + miniEXP ~*= accumBuckets # Stores S₈ then S₈²+S₇ then S₈³+S₇²+S₆ then ... + + miniEXPs[w] = miniEXP + + # 3. Final reduction. Cost: (b/c - 1)x(c+1) => b/c windows, first is copied, c squarings + 1 mul per window + r = miniEXPs[numWindows-1] + for w in countdown(numWindows-2, 0): + for _ in 0 ..< c: + r.cyclotomic_square() + r ~*= miniEXPs[w] + + # Cleanup + # ------- + buckets.freeHeap() + miniEXPs.freeHeap() + +func multiExp_reference_dispatch_vartime[bits: static int, Gt]( + r: var Gt, + elems: ptr UncheckedArray[Gt], + expos: ptr UncheckedArray[BigInt[bits]], + N: int) {.tags:[VarTime, HeapAlloc].} = + ## Multiexponentiation: + ## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ + let c = bestBucketBitSize(N, bits, useSignedBuckets = false, useManualTuning = false) + + case c + of 2: multiExpImpl_reference_vartime(r, elems, expos, N, c = 2) + of 3: multiExpImpl_reference_vartime(r, elems, expos, N, c = 3) + of 4: multiExpImpl_reference_vartime(r, elems, expos, N, c = 4) + of 5: multiExpImpl_reference_vartime(r, elems, expos, N, c = 5) + of 6: multiExpImpl_reference_vartime(r, elems, expos, N, c = 6) + of 7: multiExpImpl_reference_vartime(r, elems, expos, N, c = 7) + of 8: multiExpImpl_reference_vartime(r, elems, expos, N, c = 8) + of 9: multiExpImpl_reference_vartime(r, elems, expos, N, c = 9) + of 10: multiExpImpl_reference_vartime(r, elems, expos, N, c = 10) + of 11: multiExpImpl_reference_vartime(r, elems, expos, N, c = 11) + of 12: multiExpImpl_reference_vartime(r, elems, expos, N, c = 12) + of 13: multiExpImpl_reference_vartime(r, elems, expos, N, c = 13) + of 14: multiExpImpl_reference_vartime(r, elems, expos, N, c = 14) + of 15: multiExpImpl_reference_vartime(r, elems, expos, N, c = 15) + + of 16..20: multiExpImpl_reference_vartime(r, elems, expos, N, c = 16) + else: + unreachable() + +func multiExp_reference_vartime*[bits: static int, Gt]( + r: var Gt, + elems: ptr UncheckedArray[Gt], + expos: ptr UncheckedArray[BigInt[bits]], + N: int) {.tags:[VarTime, HeapAlloc].} = + ## Multiexponentiation: + ## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ + multiExp_reference_dispatch_vartime(r, elems, expos, N) + +func multiExp_reference_vartime*[Gt](r: var Gt, elems: openArray[Gt], expos: openArray[BigInt]) {.tags:[VarTime, HeapAlloc].} = + ## Multiexponentiation: + ## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ + debug: doAssert expos.len == elems.len + let N = elems.len + multiExp_reference_dispatch_vartime(r, elems.asUnchecked(), expos.asUnchecked(), N) + +func multiExp_reference_vartime*[F, Gt]( + r: var Gt, + elems: ptr UncheckedArray[Gt], + expos: ptr UncheckedArray[F], + len: int) {.tags:[VarTime, Alloca, HeapAlloc], meter.} = + ## Multiexponentiation: + ## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ + let n = cast[int](len) + let expos_big = allocHeapArrayAligned(F.getBigInt(), n, alignment = 64) + expos_big.batchFromField(expos, n) + r.multiExp_reference_vartime(elems, expos_big, n) + + freeHeapAligned(expos_big) + +func multiExp_reference_vartime*[Gt]( + r: var Gt, + elems: openArray[Gt], + expos: openArray[Fr]) {.tags:[VarTime, Alloca, HeapAlloc], inline.} = + ## Multiexponentiation: + ## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ + debug: doAssert expos.len == elems.len + let N = elems.len + multiExp_reference_vartime(r, elems.asUnchecked(), expos.asUnchecked(), N) + +# ########################################################### # +# # +# Multi-exponentiations in 𝔾ₜ # +# Optimized version # +# # +# ########################################################### # + + +func accumulate[GT](buckets: ptr UncheckedArray[GT], val: SecretWord, negate: SecretBool, elem: GT) {.inline, meter.} = + let val = BaseType(val) + if val == 0: # Skip g⁰ + return + elif negate.bool: + buckets[val-1] ~/= elem + else: + buckets[val-1] ~*= elem + +func bucketReduce[GT](r: var GT, buckets: ptr UncheckedArray[GT], numBuckets: static int) {.meter.} = + # We interleave reduction with one-ing the bucket to use instruction-level parallelism + + var accumBuckets{.noInit.}: typeof(r) + accumBuckets = buckets[numBuckets-1] + r = buckets[numBuckets-1] + buckets[numBuckets-1].setNeutral() + + for k in countdown(numBuckets-2, 0): + accumBuckets ~*= buckets[k] + r ~*= accumBuckets + buckets[k].setNeutral() + +type MiniMultiExpKind* = enum + kTopWindow + kFullWindow + kBottomWindow + +func bucketAccumReduce[bits: static int, GT]( + r: var GT, + buckets: ptr UncheckedArray[GT], + bitIndex: int, miniMultiExpKind: static MiniMultiExpKind, c: static int, + elems: ptr UncheckedArray[GT], expos: ptr UncheckedArray[BigInt[bits]], N: int) = + + const excess = bits mod c + const top = bits - excess + + # 1. Bucket Accumulation + var curVal, nextVal: SecretWord + var curNeg, nextNeg: SecretBool + + template getSignedWindow(j : int): tuple[val: SecretWord, neg: SecretBool] = + when miniMultiExpKind == kBottomWindow: expos[j].getSignedBottomWindow(c) + elif miniMultiExpKind == kTopWindow: expos[j].getSignedTopWindow(top, excess) + else: expos[j].getSignedFullWindowAt(bitIndex, c) + + (curVal, curNeg) = getSignedWindow(0) + for j in 0 ..< N-1: + (nextVal, nextNeg) = getSignedWindow(j+1) + if nextVal.BaseType != 0: + # In cryptography, points are indistinguishable from random + # hence, without prefetching, accessing the next bucket is a guaranteed cache miss + prefetchLarge(buckets[nextVal.BaseType-1].addr, Write, HighTemporalLocality, maxCacheLines = 2) + buckets.accumulate(curVal, curNeg, elems[j]) + curVal = nextVal + curNeg = nextNeg + buckets.accumulate(curVal, curNeg, elems[N-1]) + + # 2. Bucket Reduction + r.bucketReduce(buckets, numBuckets = 1 shl (c-1)) + +func miniMultiExp[bits: static int, GT]( + r: var GT, + buckets: ptr UncheckedArray[GT], + bitIndex: int, miniMultiExpKind: static MiniMultiExpKind, c: static int, + elems: ptr UncheckedArray[GT], expos: ptr UncheckedArray[BigInt[bits]], N: int) {.meter.} = + ## Apply a mini-Multi-Exponentiation on [bitIndex, bitIndex+window) + ## slice of all (coef, point) pairs + + var windowProd{.noInit.}: typeof(r) + windowProd.bucketAccumReduce( + buckets, bitIndex, miniMultiExpKind, c, + elems, expos, N) + + # 3. Mini-MultiExp on the slice [bitIndex, bitIndex+window) + r ~*= windowProd + when miniMultiExpKind != kBottomWindow: + for _ in 0 ..< c: + r.cyclotomic_square() + +func multiExpImpl_vartime[bits: static int, GT]( + r: var GT, + elems: ptr UncheckedArray[GT], expos: ptr UncheckedArray[BigInt[bits]], + N: int, c: static int) {.tags:[VarTime, HeapAlloc], meter.} = + ## Multiexponentiation: + ## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ + + # Setup + # ----- + const numBuckets = 1 shl (c-1) + + let buckets = allocHeapArray(GT, numBuckets) + for i in 0 ..< numBuckets: + buckets[i].setNeutral() + + # Algorithm + # --------- + const excess = bits mod c + const top = bits - excess + var w = top + r.setNeutral() + + when top != 0: # Prologue + when excess != 0: + r.miniMultiExp(buckets, w, kTopWindow, c, elems, expos, N) + w -= c + else: + # If c divides bits exactly, the signed windowed recoding still needs to see an extra 0 + # Since we did r.setNeutral() earlier, this is a no-op + discard + + while w != 0: # Steady state + r.miniMultiExp(buckets, w, kFullWindow, c, elems, expos, N) + w -= c + + block: # Epilogue + r.miniMultiExp(buckets, w, kBottomWindow, c, elems, expos, N) + + # Cleanup + # ------- + buckets.freeHeap() + +# Endomorphism acceleration +# ----------------------------------------------------------------------------------------------------------------------- + +proc applyEndomorphism[bits: static int, GT]( + elems: ptr UncheckedArray[GT], + expos: ptr UncheckedArray[BigInt[bits]], + N: int): auto = + ## Decompose (elems, expos) into mini-scalars + ## Returns a new triplet (endoElems, endoExpos, N) + ## endoElems and endoExpos MUST be freed afterwards + + const M = when Gt is Fp6: 2 + elif Gt is Fp12: 4 + else: {.error: "Unconfigured".} + + const L = Fr[Gt.Name].bits().computeEndoRecodedLength(M) + let splitExpos = allocHeapArray(array[M, BigInt[L]], N) + let endoBasis = allocHeapArray(array[M, GT], N) + + for i in 0 ..< N: + var negateElems {.noinit.}: array[M, SecretBool] + splitExpos[i].decomposeEndo(negateElems, expos[i], Fr[Gt.Name].bits(), Gt.Name, G2) # 𝔾ₜ has same decomposition as 𝔾₂ + if negateElems[0].bool: + endoBasis[i][0].cyclotomic_inv(elems[i]) + else: + endoBasis[i][0] = elems[i] + + cast[ptr array[M-1, GT]](endoBasis[i][1].addr)[].computeEndomorphisms(elems[i]) + for m in 1 ..< M: + if negateElems[m].bool: + endoBasis[i][m].cyclotomic_inv() + + let endoElems = cast[ptr UncheckedArray[GT]](endoBasis) + let endoExpos = cast[ptr UncheckedArray[BigInt[L]]](splitExpos) + + return (endoElems, endoExpos, M*N) + +template withEndo[exponentsBits: static int, GT]( + multiExpProc: untyped, + r: var GT, + elems: ptr UncheckedArray[GT], + expos: ptr UncheckedArray[BigInt[exponentsBits]], + N: int, c: static int) = + when Gt.Name.hasEndomorphismAcceleration() and + EndomorphismThreshold <= exponentsBits and + exponentsBits <= Fr[Gt.Name].bits(): + let (endoElems, endoExpos, endoN) = applyEndomorphism(elems, expos, N) + # Given that bits and N changed, we are able to use a bigger `c` + # TODO: bench + multiExpProc(r, endoElems, endoExpos, endoN, c) + freeHeap(endoElems) + freeHeap(endoExpos) + else: + multiExpProc(r, elems, expos, N, c) + +# Algorithm selection +# ----------------------------------------------------------------------------------------------------------------------- + +func multiexp_dispatch_vartime[bits: static int, GT]( + r: var GT, + elems: ptr UncheckedArray[GT], + expos: ptr UncheckedArray[BigInt[bits]], N: int) = + ## Multiexponentiation: + ## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ + let c = bestBucketBitSize(N, bits, useSignedBuckets = true, useManualTuning = true) + + # Given that bits and N change after applying an endomorphism, + # we are able to use a bigger `c` + # TODO: benchmark + + case c + of 2: withEndo(multiExpImpl_vartime, r, elems, expos, N, c = 2) + of 3: withEndo(multiExpImpl_vartime, r, elems, expos, N, c = 3) + of 4: withEndo(multiExpImpl_vartime, r, elems, expos, N, c = 4) + of 5: withEndo(multiExpImpl_vartime, r, elems, expos, N, c = 5) + of 6: withEndo(multiExpImpl_vartime, r, elems, expos, N, c = 6) + of 7: withEndo(multiExpImpl_vartime, r, elems, expos, N, c = 7) + of 8: withEndo(multiExpImpl_vartime, r, elems, expos, N, c = 8) + of 9: withEndo(multiExpImpl_vartime, r, elems, expos, N, c = 9) + of 10: withEndo(multiExpImpl_vartime, r, elems, expos, N, c = 10) + of 11: withEndo(multiExpImpl_vartime, r, elems, expos, N, c = 11) + of 12: withEndo(multiExpImpl_vartime, r, elems, expos, N, c = 12) + of 13: withEndo(multiExpImpl_vartime, r, elems, expos, N, c = 13) + of 14: multiExpImpl_vartime(r, elems, expos, N, c = 14) + of 15: multiExpImpl_vartime(r, elems, expos, N, c = 15) + + of 16..17: multiExpImpl_vartime(r, elems, expos, N, c = 16) + else: + unreachable() + +func multiExp_vartime*[bits: static int, GT]( + r: var GT, + elems: ptr UncheckedArray[GT], + expos: ptr UncheckedArray[BigInt[bits]], + len: int) {.tags:[VarTime, Alloca, HeapAlloc], meter, inline.} = + ## Multiexponentiation: + ## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ + multiExp_dispatch_vartime(r, elems, expos, len) + +func multiExp_vartime*[bits: static int, GT]( + r: var GT, + elems: openArray[GT], + expos: openArray[BigInt[bits]]) {.tags:[VarTime, Alloca, HeapAlloc], meter, inline.} = + ## Multiexponentiation: + ## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ + debug: doAssert elems.len == expos.len + let N = elems.len + multiExp_dispatch_vartime(r, elems.asUnchecked(), expos.asUnchecked(), N) + +func multiExp_vartime*[F, GT]( + r: var GT, + elems: ptr UncheckedArray[GT], + expos: ptr UncheckedArray[F], + len: int) {.tags:[VarTime, Alloca, HeapAlloc], meter.} = + ## Multiexponentiation: + ## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ + let n = cast[int](len) + let expos_big = allocHeapArrayAligned(F.getBigInt(), n, alignment = 64) + expos_big.batchFromField(expos, n) + r.multiExp_vartime(elems, expos_big, n) + + freeHeapAligned(expos_big) + +func multiExp_vartime*[GT]( + r: var GT, + elems: openArray[GT], + expos: openArray[Fr]) {.tags:[VarTime, Alloca, HeapAlloc], inline.} = + ## Multiexponentiation: + ## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ + debug: doAssert elems.len == expos.len + let N = elems.len + multiExp_vartime(r, elems.asUnchecked(), expos.asUnchecked(), N) diff --git a/constantine/math/pairings/gt_multiexp_parallel.nim b/constantine/math/pairings/gt_multiexp_parallel.nim new file mode 100644 index 000000000..cb81c6138 --- /dev/null +++ b/constantine/math/pairings/gt_multiexp_parallel.nim @@ -0,0 +1,258 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import constantine/named/algebras, + constantine/math/endomorphisms/split_scalars, + constantine/math/extension_fields, + constantine/math/arithmetic, + constantine/named/zoo_endomorphisms, + constantine/platforms/abstractions, + ./cyclotomic_subgroups, + constantine/threadpool + +import ./gt_multiexp {.all.} + +# No exceptions allowed in core cryptographic operations +{.push raises: [].} +{.push checks: off.} + +# ########################################################### # +# # +# Multi-Exponentiation in 𝔾ₜ # +# # +# ########################################################### # + +proc bucketAccumReduce_withInit[bits: static int, GT]( + windowProd: ptr GT, + buckets: ptr GT or ptr UncheckedArray[GT], + bitIndex: int, miniMultiExpKind: static MiniMultiExpKind, c: static int, + elems: ptr UncheckedArray[GT], expos: ptr UncheckedArray[BigInt[bits]], N: int) = + const numBuckets = 1 shl (c-1) + let buckets = cast[ptr UncheckedArray[GT]](buckets) + for i in 0 ..< numBuckets: + buckets[i].setNeutral() + bucketAccumReduce(windowProd[], buckets, bitIndex, miniMultiExpKind, c, elems, expos, N) + +proc multiexpImpl_vartime_parallel[bits: static int, GT]( + tp: Threadpool, + r: ptr GT, + elems: ptr UncheckedArray[GT], expos: ptr UncheckedArray[BigInt[bits]], + N: int, c: static int) = + + # Prologue + # -------- + const numBuckets = 1 shl (c-1) + const numFullWindows = bits div c + const numWindows = numFullWindows + 1 # Even if `bits div c` is exact, the signed recoding needs to see an extra 0 after the MSB + + # Instead of storing the result in futures, risking them being scattered in memory + # we store them in a contiguous array, and the synchronizing future just returns a bool. + # top window is done on this thread + let miniMultiExpsResults = allocHeapArray(GT, numFullWindows) + let miniMultiExpsReady = allocStackArray(FlowVar[bool], numFullWindows) + + let bucketsMatrix = allocHeapArray(GT, numBuckets*numWindows) + + # Algorithm + # --------- + + block: # 1. Bucket accumulation and reduction + miniMultiExpsReady[0] = tp.spawnAwaitable bucketAccumReduce_withInit( + miniMultiExpsResults[0].addr, + bucketsMatrix[0].addr, + bitIndex = 0, kBottomWindow, c, + elems, expos, N) + + for w in 1 ..< numFullWindows: + miniMultiExpsReady[w] = tp.spawnAwaitable bucketAccumReduce_withInit( + miniMultiExpsResults[w].addr, + bucketsMatrix[w*numBuckets].addr, + bitIndex = w*c, kFullWindow, c, + elems, expos, N) + + # Last window is done sync on this thread, directly initializing r + const excess = bits mod c + const top = bits-excess + + when top != 0: + when excess != 0: + bucketAccumReduce_withInit( + r, + bucketsMatrix[numFullWindows*numBuckets].addr, + bitIndex = top, kTopWindow, c, + elems, expos, N) + else: + r[].setNeutral() + + # 3. Final reduction, r initialized to what would be miniMSMsReady[numWindows-1] + when excess != 0: + for w in countdown(numWindows-2, 0): + for _ in 0 ..< c: + r[].cyclotomic_square() + discard sync miniMultiExpsReady[w] + r[] ~*= miniMultiExpsResults[w] + elif numWindows >= 2: + discard sync miniMultiExpsReady[numWindows-2] + r[] = miniMultiExpsResults[numWindows-2] + for w in countdown(numWindows-3, 0): + for _ in 0 ..< c: + r[].cyclotomic_square() + discard sync miniMultiExpsReady[w] + r[] ~*= miniMultiExpsResults[w] + + # Cleanup + # ------- + miniMultiExpsResults.freeHeap() + bucketsMatrix.freeHeap() + +# Endomorphism acceleration +# ----------------------------------------------------------------------------------------------------------------------- + +proc applyEndomorphism_parallel[bits: static int, GT]( + tp: Threadpool, + elems: ptr UncheckedArray[GT], + expos: ptr UncheckedArray[BigInt[bits]], + N: int): auto = + ## Decompose (elems, expos) into mini-scalars + ## Returns a new triplet (endoElems, endoExpos, N) + ## endoElems and endoExpos MUST be freed afterwards + + const M = when Gt is Fp6: 2 + elif Gt is Fp12: 4 + else: {.error: "Unconfigured".} + + const L = Fr[Gt.Name].bits().computeEndoRecodedLength(M) + let splitExpos = allocHeapArray(array[M, BigInt[L]], N) + let endoBasis = allocHeapArray(array[M, GT], N) + + syncScope: + tp.parallelFor i in 0 ..< N: + captures: {elems, expos, splitExpos, endoBasis} + + var negateElems {.noinit.}: array[M, SecretBool] + splitExpos[i].decomposeEndo(negateElems, expos[i], Fr[Gt.Name].bits(), Gt.Name, G2) # 𝔾ₜ has same decomposition as 𝔾₂ + if negateElems[0].bool: + endoBasis[i][0].cyclotomic_inv(elems[i]) + else: + endoBasis[i][0] = elems[i] + + cast[ptr array[M-1, GT]](endoBasis[i][1].addr)[].computeEndomorphisms(elems[i]) + for m in 1 ..< M: + if negateElems[m].bool: + endoBasis[i][m].cyclotomic_inv() + + let endoElems = cast[ptr UncheckedArray[GT]](endoBasis) + let endoExpos = cast[ptr UncheckedArray[BigInt[L]]](splitExpos) + + return (endoElems, endoExpos, M*N) + +template withEndo[exponentsBits: static int, GT]( + multiExpProc: untyped, + tp: Threadpool, + r: ptr GT, + elems: ptr UncheckedArray[GT], + expos: ptr UncheckedArray[BigInt[exponentsBits]], + N: int, c: static int) = + when Gt.Name.hasEndomorphismAcceleration() and + EndomorphismThreshold <= exponentsBits and + exponentsBits <= Fr[Gt.Name].bits(): + let (endoElems, endoExpos, endoN) = applyEndomorphism_parallel(tp, elems, expos, N) + # Given that bits and N changed, we are able to use a bigger `c` + # TODO: bench + multiExpProc(tp, r, endoElems, endoExpos, endoN, c) + freeHeap(endoElems) + freeHeap(endoExpos) + else: + multiExpProc(tp, r, elems, expos, N, c) + +# Algorithm selection +# ----------------------------------------------------------------------------------------------------------------------- + +proc multiexp_dispatch_vartime_parallel[bits: static int, GT]( + tp: Threadpool, + r: ptr GT, + elems: ptr UncheckedArray[GT], + expos: ptr UncheckedArray[BigInt[bits]], N: int) = + ## Multiexponentiation: + ## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ + let c = bestBucketBitSize(N, bits, useSignedBuckets = true, useManualTuning = true) + + # Given that bits and N change after applying an endomorphism, + # we are able to use a bigger `c` + # TODO: benchmark + + case c + of 2: withEndo(multiExpImpl_vartime_parallel, tp, r, elems, expos, N, c = 2) + of 3: withEndo(multiExpImpl_vartime_parallel, tp, r, elems, expos, N, c = 3) + of 4: withEndo(multiExpImpl_vartime_parallel, tp, r, elems, expos, N, c = 4) + of 5: withEndo(multiExpImpl_vartime_parallel, tp, r, elems, expos, N, c = 5) + of 6: withEndo(multiExpImpl_vartime_parallel, tp, r, elems, expos, N, c = 6) + of 7: withEndo(multiExpImpl_vartime_parallel, tp, r, elems, expos, N, c = 7) + of 8: withEndo(multiExpImpl_vartime_parallel, tp, r, elems, expos, N, c = 8) + of 9: withEndo(multiExpImpl_vartime_parallel, tp, r, elems, expos, N, c = 9) + of 10: withEndo(multiExpImpl_vartime_parallel, tp, r, elems, expos, N, c = 10) + of 11: withEndo(multiExpImpl_vartime_parallel, tp, r, elems, expos, N, c = 11) + of 12: withEndo(multiExpImpl_vartime_parallel, tp, r, elems, expos, N, c = 12) + of 13: withEndo(multiExpImpl_vartime_parallel, tp, r, elems, expos, N, c = 13) + of 14: multiExpImpl_vartime_parallel(tp, r, elems, expos, N, c = 14) + of 15: multiExpImpl_vartime_parallel(tp, r, elems, expos, N, c = 15) + + of 16..17: multiExpImpl_vartime_parallel(tp, r, elems, expos, N, c = 16) + else: + unreachable() + +proc multiExp_vartime_parallel*[bits: static int, GT]( + tp: Threadpool, + r: ptr GT, + elems: ptr UncheckedArray[GT], + expos: ptr UncheckedArray[BigInt[bits]], + len: int) {.meter, inline.} = + ## Multiexponentiation: + ## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ + tp.multiExp_dispatch_vartime_parallel(r, elems, expos, len) + +proc multiExp_vartime_parallel*[bits: static int, GT]( + tp: Threadpool, + r: var GT, + elems: openArray[GT], + expos: openArray[BigInt[bits]]) {.meter, inline.} = + ## Multiexponentiation: + ## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ + debug: doAssert elems.len == expos.len + let N = elems.len + tp.multiExp_dispatch_vartime_parallel(r.addr, elems.asUnchecked(), expos.asUnchecked(), N) + +proc multiExp_vartime_parallel*[F, GT]( + tp: Threadpool, + r: ptr GT, + elems: ptr UncheckedArray[GT], + expos: ptr UncheckedArray[F], + len: int) {.meter.} = + ## Multiexponentiation: + ## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ + let n = cast[int](len) + let expos_big = allocHeapArrayAligned(F.getBigInt(), n, alignment = 64) + + syncScope: + tp.parallelFor i in 0 ..< n: + captures: {expos, expos_big} + expos_big[i].fromField(expos[i]) + tp.multiExp_vartime_parallel(r, elems, expos_big, n) + + freeHeapAligned(expos_big) + +proc multiExp_vartime_parallel*[GT]( + tp: Threadpool, + r: var GT, + elems: openArray[GT], + expos: openArray[Fr]) {.meter, inline.} = + ## Multiexponentiation: + ## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ + debug: doAssert elems.len == expos.len + let N = elems.len + tp.multiExp_vartime_parallel(r.addr, elems.asUnchecked(), expos.asUnchecked(), N) diff --git a/constantine/math/pairings/pairings_generic.nim b/constantine/math/pairings/pairings_generic.nim index a8a79f696..df95b44e6 100644 --- a/constantine/math/pairings/pairings_generic.nim +++ b/constantine/math/pairings/pairings_generic.nim @@ -27,11 +27,16 @@ func millerLoop*[Name: static Algebra](gt: var Fp12[Name], Q, P: auto, n: int) { else: gt.millerLoopAddchain(Q, P, n) -func finalExp*[Name: static Algebra](gt: var Fp12[Name]){.inline.} = - gt.finalExpEasy() +export finalExpEasy + +func finalExpHard*[Name: static Algebra](gt: var Fp12[Name]) {.inline.} = when family(Name) == BarretoNaehrig: gt.finalExpHard_BN() elif family(Name) == BarretoLynnScott: gt.finalExpHard_BLS12() else: {.error: "Final Exponentiation not implemented for " & $Name.} + +func finalExp*[Name: static Algebra](gt: var Fp12[Name]){.inline.} = + gt.finalExpEasy() + gt.finalExpHard() diff --git a/tests/math_pairings/t_pairing_bls12_377_optate.nim b/tests/math_pairings/t_pairing_bls12_377_optate.nim index d6cbe09ee..87f2cbd75 100644 --- a/tests/math_pairings/t_pairing_bls12_377_optate.nim +++ b/tests/math_pairings/t_pairing_bls12_377_optate.nim @@ -6,14 +6,11 @@ # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). # at your option. This file may not be copied, modified, or distributed except according to those terms. -import - constantine/math/pairings/pairings_bls12, - # Test utilities - ./t_pairing_template +import ./t_pairing_template runPairingTests( - 4, BLS12_377, + BLS12_377, G1 = EC_ShortW_Prj[Fp[BLS12_377], G1], G2 = EC_ShortW_Prj[Fp2[BLS12_377], G2], GT = Fp12[BLS12_377], - pairing_bls12) + iters = 4) diff --git a/tests/math_pairings/t_pairing_bls12_381_gt_exp.nim b/tests/math_pairings/t_pairing_bls12_381_gt_exp.nim index 2309c628c..9f65921d1 100644 --- a/tests/math_pairings/t_pairing_bls12_381_gt_exp.nim +++ b/tests/math_pairings/t_pairing_bls12_381_gt_exp.nim @@ -10,6 +10,4 @@ import # Test utilities ./t_pairing_template -runGTexponentiationTests( - Iters = 4, - GT = Fp12[BLS12_381]) +runGTexponentiationTests(GT = Fp12[BLS12_381], iters =4) diff --git a/tests/math_pairings/t_pairing_bls12_381_gt_multiexp.nim b/tests/math_pairings/t_pairing_bls12_381_gt_multiexp.nim new file mode 100644 index 000000000..3a95462c9 --- /dev/null +++ b/tests/math_pairings/t_pairing_bls12_381_gt_multiexp.nim @@ -0,0 +1,18 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + # Test utilities + ./t_pairing_template + +const numPoints = [1, 2, 8, 16, 128, 256, 1024] + +runGTmultiexpTests( + GT = Fp12[BLS12_381], + numPoints, + iters = 4) diff --git a/tests/math_pairings/t_pairing_bls12_381_gt_subgroup.nim b/tests/math_pairings/t_pairing_bls12_381_gt_subgroup.nim index 371ef6226..31ea0f87d 100644 --- a/tests/math_pairings/t_pairing_bls12_381_gt_subgroup.nim +++ b/tests/math_pairings/t_pairing_bls12_381_gt_subgroup.nim @@ -6,12 +6,6 @@ # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). # at your option. This file may not be copied, modified, or distributed except according to those terms. -import - constantine/math/pairings/pairings_bls12, - # Test utilities - ./t_pairing_template +import ./t_pairing_template -runGTsubgroupTests( - Iters = 4, - GT = Fp12[BLS12_381], - finalExpHard_BLS12) +runGTsubgroupTests(GT = Fp12[BLS12_381], iters = 4) diff --git a/tests/math_pairings/t_pairing_bls12_381_optate.nim b/tests/math_pairings/t_pairing_bls12_381_optate.nim index eda18b2e9..b75fa67bc 100644 --- a/tests/math_pairings/t_pairing_bls12_381_optate.nim +++ b/tests/math_pairings/t_pairing_bls12_381_optate.nim @@ -6,14 +6,11 @@ # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). # at your option. This file may not be copied, modified, or distributed except according to those terms. -import - constantine/math/pairings/pairings_bls12, - # Test utilities - ./t_pairing_template +import ./t_pairing_template runPairingTests( - 4, BLS12_381, + BLS12_381, G1 = EC_ShortW_Prj[Fp[BLS12_381], G1], G2 = EC_ShortW_Prj[Fp2[BLS12_381], G2], GT = Fp12[BLS12_381], - pairing_bls12) + iters = 4) diff --git a/tests/math_pairings/t_pairing_bn254_nogami_gt_subgroup.nim b/tests/math_pairings/t_pairing_bn254_nogami_gt_subgroup.nim index 0838afd0f..71b9a8ad2 100644 --- a/tests/math_pairings/t_pairing_bn254_nogami_gt_subgroup.nim +++ b/tests/math_pairings/t_pairing_bn254_nogami_gt_subgroup.nim @@ -6,12 +6,6 @@ # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). # at your option. This file may not be copied, modified, or distributed except according to those terms. -import - constantine/math/pairings/pairings_bn, - # Test utilities - ./t_pairing_template +import ./t_pairing_template -runGTsubgroupTests( - Iters = 4, - GT = Fp12[BN254_Nogami], - finalExpHard_BN) +runGTsubgroupTests(GT = Fp12[BN254_Nogami], iters = 4) diff --git a/tests/math_pairings/t_pairing_bn254_nogami_optate.nim b/tests/math_pairings/t_pairing_bn254_nogami_optate.nim index 6c72eea58..d5b3fc755 100644 --- a/tests/math_pairings/t_pairing_bn254_nogami_optate.nim +++ b/tests/math_pairings/t_pairing_bn254_nogami_optate.nim @@ -6,14 +6,11 @@ # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). # at your option. This file may not be copied, modified, or distributed except according to those terms. -import - constantine/math/pairings/pairings_bn, - # Test utilities - ./t_pairing_template +import ./t_pairing_template runPairingTests( - 4, BN254_Nogami, + BN254_Nogami, G1 = EC_ShortW_Prj[Fp[BN254_Nogami], G1], G2 = EC_ShortW_Prj[Fp2[BN254_Nogami], G2], GT = Fp12[BN254_Nogami], - pairing_bn) + iters = 4) diff --git a/tests/math_pairings/t_pairing_bn254_snarks_gt_exp.nim b/tests/math_pairings/t_pairing_bn254_snarks_gt_exp.nim index dea4efd8b..38d8cc3f4 100644 --- a/tests/math_pairings/t_pairing_bn254_snarks_gt_exp.nim +++ b/tests/math_pairings/t_pairing_bn254_snarks_gt_exp.nim @@ -10,6 +10,4 @@ import # Test utilities ./t_pairing_template -runGTexponentiationTests( - Iters = 4, - GT = Fp12[BN254_Nogami]) +runGTexponentiationTests(GT = Fp12[BN254_Nogami], iters = 4) diff --git a/tests/math_pairings/t_pairing_bn254_snarks_gt_multiexp.nim b/tests/math_pairings/t_pairing_bn254_snarks_gt_multiexp.nim new file mode 100644 index 000000000..d9e746a95 --- /dev/null +++ b/tests/math_pairings/t_pairing_bn254_snarks_gt_multiexp.nim @@ -0,0 +1,18 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + # Test utilities + ./t_pairing_template + +const numPoints = [1, 2, 8, 16, 128, 256, 1024] + +runGTmultiexpTests( + GT = Fp12[BN254_Snarks], + numPoints, + iters = 4) diff --git a/tests/math_pairings/t_pairing_bn254_snarks_gt_subgroup.nim b/tests/math_pairings/t_pairing_bn254_snarks_gt_subgroup.nim index 7a3002de8..d78797797 100644 --- a/tests/math_pairings/t_pairing_bn254_snarks_gt_subgroup.nim +++ b/tests/math_pairings/t_pairing_bn254_snarks_gt_subgroup.nim @@ -6,12 +6,6 @@ # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). # at your option. This file may not be copied, modified, or distributed except according to those terms. -import - constantine/math/pairings/pairings_bn, - # Test utilities - ./t_pairing_template +import ./t_pairing_template -runGTsubgroupTests( - Iters = 4, - GT = Fp12[BN254_Snarks], - finalExpHard_BN) +runGTsubgroupTests(GT = Fp12[BN254_Snarks], iters = 4) diff --git a/tests/math_pairings/t_pairing_bn254_snarks_optate.nim b/tests/math_pairings/t_pairing_bn254_snarks_optate.nim index 59635f766..6cbd21ea0 100644 --- a/tests/math_pairings/t_pairing_bn254_snarks_optate.nim +++ b/tests/math_pairings/t_pairing_bn254_snarks_optate.nim @@ -6,14 +6,11 @@ # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). # at your option. This file may not be copied, modified, or distributed except according to those terms. -import - constantine/math/pairings/pairings_bn, - # Test utilities - ./t_pairing_template +import ./t_pairing_template runPairingTests( - 4, BN254_Snarks, + BN254_Snarks, G1 = EC_ShortW_Prj[Fp[BN254_Snarks], G1], G2 = EC_ShortW_Prj[Fp2[BN254_Snarks], G2], GT = Fp12[BN254_Snarks], - pairing_bn) + iters = 4) diff --git a/tests/math_pairings/t_pairing_bw6_761_gt_subgroup.nim b/tests/math_pairings/t_pairing_bw6_761_gt_subgroup.nim index 7be0753b8..4a5e72c10 100644 --- a/tests/math_pairings/t_pairing_bw6_761_gt_subgroup.nim +++ b/tests/math_pairings/t_pairing_bw6_761_gt_subgroup.nim @@ -6,12 +6,6 @@ # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). # at your option. This file may not be copied, modified, or distributed except according to those terms. -import - constantine/math/pairings/pairings_bw6_761, - # Test utilities - ./t_pairing_template +import ./t_pairing_template -runGTsubgroupTests( - Iters = 4, - GT = Fp6[BW6_761], - finalExpHard_BW6_761) +runGTsubgroupTests(GT = Fp6[BW6_761], iters = 4) diff --git a/tests/math_pairings/t_pairing_bw6_761_optate.nim b/tests/math_pairings/t_pairing_bw6_761_optate.nim index 34870bed3..7c92c115a 100644 --- a/tests/math_pairings/t_pairing_bw6_761_optate.nim +++ b/tests/math_pairings/t_pairing_bw6_761_optate.nim @@ -6,14 +6,11 @@ # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). # at your option. This file may not be copied, modified, or distributed except according to those terms. -import - constantine/math/pairings/pairings_bw6_761, - # Test utilities - ./t_pairing_template +import ./t_pairing_template runPairingTests( - 4, BW6_761, + BW6_761, G1 = EC_ShortW_Prj[Fp[BW6_761], G1], G2 = EC_ShortW_Prj[Fp[BW6_761], G2], GT = Fp6[BW6_761], - pairing_bw6_761_reference) + iters = 4) diff --git a/tests/math_pairings/t_pairing_template.nim b/tests/math_pairings/t_pairing_template.nim index 0c7382478..a19b777b4 100644 --- a/tests/math_pairings/t_pairing_template.nim +++ b/tests/math_pairings/t_pairing_template.nim @@ -14,12 +14,13 @@ import constantine/math/arithmetic, constantine/math/extension_fields, constantine/named/algebras, - constantine/named/zoo_subgroups, + constantine/named/[zoo_subgroups, zoo_pairings], constantine/math/elliptic/[ec_shortweierstrass_affine, ec_shortweierstrass_projective], constantine/math/pairings/[ cyclotomic_subgroups, gt_exponentiations, gt_exponentiations_vartime, + gt_multiexp, pairings_generic], constantine/math/io/io_extfields, @@ -27,13 +28,10 @@ import helpers/prng_unsafe export - prng_unsafe, times, unittest, + unittest, # Generic sandwich ec_shortweierstrass_affine, ec_shortweierstrass_projective, - arithmetic, extension_fields, - io_extfields, - cyclotomic_subgroups, - gt_exponentiations, gt_exponentiations_vartime, - abstractions, algebras + extension_fields, + algebras type RandomGen* = enum @@ -64,7 +62,7 @@ func random_point*(rng: var RngState, EC: typedesc, randZ: bool, gen: RandomGen) result = rng.random_long01Seq(EC) result.clearCofactor() -template runPairingTests*(Iters: static int, Name: static Algebra, G1, G2, GT: typedesc, pairing_fn: untyped): untyped {.dirty.}= +proc runPairingTests*(Name: static Algebra, G1, G2, GT: typedesc, iters: int) = bind affineType var rng: RngState @@ -73,8 +71,8 @@ template runPairingTests*(Iters: static int, Name: static Algebra, G1, G2, GT: t echo "\n------------------------------------------------------\n" echo "test_pairing_",$Name,"_optate xoshiro512** seed: ", timeseed - proc test_bilinearity_double_impl(randZ: bool, gen: RandomGen) = - for _ in 0 ..< Iters: + proc test_bilinearity_double_impl(randZ: bool, gen: RandomGen, iters: int) = + for _ in 0 ..< iters: let P = rng.random_point(G1, randZ, gen) let Q = rng.random_point(G2, randZ, gen) var P2: typeof(P) @@ -93,10 +91,10 @@ template runPairingTests*(Iters: static int, Name: static Algebra, G1, G2, GT: t Qa.affine(Q) Qa2.affine(Q2) - r.pairing_fn(Pa, Qa) + r.pairing(Pa, Qa) r.square() - r2.pairing_fn(Pa2, Qa) - r3.pairing_fn(Pa, Qa2) + r2.pairing(Pa2, Qa) + r3.pairing(Pa, Qa2) doAssert bool(not r.isZero()) doAssert bool(not r.isOne()) @@ -106,11 +104,11 @@ template runPairingTests*(Iters: static int, Name: static Algebra, G1, G2, GT: t suite "Pairing - Optimal Ate on " & $Name & " [" & $WordBitWidth & "-bit words]": test "Bilinearity e([2]P, Q) = e(P, [2]Q) = e(P, Q)^2": - test_bilinearity_double_impl(randZ = false, gen = Uniform) - test_bilinearity_double_impl(randZ = false, gen = HighHammingWeight) - test_bilinearity_double_impl(randZ = false, gen = Long01Sequence) + test_bilinearity_double_impl(randZ = false, gen = Uniform, iters) + test_bilinearity_double_impl(randZ = false, gen = HighHammingWeight, iters) + test_bilinearity_double_impl(randZ = false, gen = Long01Sequence, iters) -func random_elem(rng: var RngState, F: typedesc, gen: RandomGen): F {.inline, noInit.} = +func random_elem*(rng: var RngState, F: typedesc, gen: RandomGen): F {.noInit.} = if gen == Uniform: result = rng.random_unsafe(F) elif gen == HighHammingWeight: @@ -118,7 +116,7 @@ func random_elem(rng: var RngState, F: typedesc, gen: RandomGen): F {.inline, no else: result = rng.random_long01Seq(F) -template runGTsubgroupTests*(Iters: static int, GT: typedesc, finalExpHard_fn: untyped): untyped {.dirty.}= +proc runGTsubgroupTests*(GT: typedesc, iters: int) = bind affineType, random_elem var rng: RngState @@ -127,9 +125,9 @@ template runGTsubgroupTests*(Iters: static int, GT: typedesc, finalExpHard_fn: u echo "\n------------------------------------------------------\n" echo "test_pairing_",$GT.Name,"_gt xoshiro512** seed: ", timeseed - proc test_gt_impl(gen: RandomGen) = + proc test_gt_impl(gen: RandomGen, iters: int) = stdout.write " " - for _ in 0 ..< Iters: + for _ in 0 ..< iters: let a = rng.random_elem(GT, gen) doAssert not bool a.isInCyclotomicSubgroup(), "The odds of generating randomly such an element are too low a: " & a.toHex() var a2 = a @@ -137,7 +135,7 @@ template runGTsubgroupTests*(Iters: static int, GT: typedesc, finalExpHard_fn: u doAssert bool a2.isInCyclotomicSubgroup() doAssert not bool a2.isInPairingSubgroup(), "The odds of generating randomly such an element are too low a2: " & a.toHex() var a3 = a2 - finalExpHard_fn(a3) + finalExpHard(a3) doAssert bool a3.isInCyclotomicSubgroup() doAssert bool a3.isInPairingSubgroup() stdout.write '.' @@ -146,30 +144,24 @@ template runGTsubgroupTests*(Iters: static int, GT: typedesc, finalExpHard_fn: u suite "Pairing - 𝔾ₜ subgroup " & $GT.Name & " [" & $WordBitWidth & "-bit words]": test "Final Exponentiation and 𝔾ₜ-subgroup membership": - test_gt_impl(gen = Uniform) - test_gt_impl(gen = HighHammingWeight) - test_gt_impl(gen = Long01Sequence) - -func random_gt*(rng: var RngState, F: typedesc, gen: RandomGen): F {.inline, noInit.} = - if gen == Uniform: - result = rng.random_unsafe(F) - elif gen == HighHammingWeight: - result = rng.random_highHammingWeight(F) - else: - result = rng.random_long01Seq(F) + test_gt_impl(gen = Uniform, iters) + test_gt_impl(gen = HighHammingWeight, iters) + test_gt_impl(gen = Long01Sequence, iters) +func random_gt(rng: var RngState, F: typedesc, gen: RandomGen): F {.noInit.} = + result = rng.random_elem(F, gen) result.finalExp() -template runGTexponentiationTests*(Iters: static int, GT: typedesc): untyped {.dirty.} = +proc runGTexponentiationTests*(GT: typedesc, iters: int) = var rng: RngState let timeseed = uint32(toUnix(getTime()) and (1'i64 shl 32 - 1)) # unixTime mod 2^32 seed(rng, timeseed) echo "\n------------------------------------------------------\n" echo "test_pairing_",$GT.Name,"_gt_exponentiation xoshiro512** seed: ", timeseed - proc test_gt_exponentiation_impl(gen: RandomGen) = + proc test_gt_exponentiation_impl(gen: RandomGen, iters: int) = stdout.write " " - for _ in 0 ..< Iters: + for _ in 0 ..< iters: let a = rng.random_gt(GT, gen) let kUnred = rng.random_long01seq(GT.Name.getBigInt(kScalarField)) var k {.noInit.}: GT.Name.getBigInt(kScalarField) @@ -219,6 +211,46 @@ template runGTexponentiationTests*(Iters: static int, GT: typedesc): untyped {.d suite "Pairing - Exponentiation for 𝔾ₜ " & $GT.Name & " [" & $WordBitWidth & "-bit words]": test "𝔾ₜ exponentiation consistency": - test_gt_exponentiation_impl(gen = Uniform) - test_gt_exponentiation_impl(gen = HighHammingWeight) - test_gt_exponentiation_impl(gen = Long01Sequence) + test_gt_exponentiation_impl(gen = Uniform, iters) + test_gt_exponentiation_impl(gen = HighHammingWeight, iters) + test_gt_exponentiation_impl(gen = Long01Sequence, iters) + +proc runGTmultiexpTests*[N: static int](GT: typedesc, num_points: array[N, int], iters: int) = + var rng: RngState + let timeseed = uint32(toUnix(getTime()) and (1'i64 shl 32 - 1)) # unixTime mod 2^32 + seed(rng, timeseed) + echo "\n------------------------------------------------------\n" + echo "test_pairing_",$GT.Name,"_gt_multiexp xoshiro512** seed: ", timeseed + + proc test_gt_multiexp_impl[N](GT: typedesc, rng: var RngState, num_points: array[N, int], gen: RandomGen, iters: int) = + for N in num_points: + stdout.write " " + for _ in 0 ..< iters: + var elems = newSeq[GT](N) + var exponents = newSeq[Fr[GT.Name]](N) + + for i in 0 ..< N: + elems[i] = rng.random_gt(GT, gen) + exponents[i] = rng.random_elem(Fr[GT.Name], gen) + + var naive: GT + naive.setOne() + for i in 0 ..< N: + var t {.noInit.}: GT + t.gtExp_vartime(elems[i], exponents[i]) + naive *= t + + var mexp_ref, mexp_opt: GT + mexp_ref.multiExp_reference_vartime(elems, exponents) + mexp_opt.multiExp_vartime(elems, exponents) + + doAssert bool(naive == mexp_ref) + doAssert bool(naive == mexp_opt) + + stdout.write '.' + + stdout.write '\n' + + suite "Pairing - MultiExponentiation for 𝔾ₜ " & $GT.Name & " [" & $WordBitWidth & "-bit words]": + test "𝔾ₜ multi-exponentiation consistency": + test_gt_multiexp_impl(GT, rng, num_points, gen = Long01Sequence, iters) diff --git a/tests/parallel/t_ec_template_parallel.nim b/tests/parallel/t_ec_template_parallel.nim index 06102c546..b6f1902f8 100644 --- a/tests/parallel/t_ec_template_parallel.nim +++ b/tests/parallel/t_ec_template_parallel.nim @@ -33,7 +33,7 @@ import # Test utilities helpers/prng_unsafe -export unittest, abstractions, arithmetic # Generic sandwich +export unittest, abstractions, arithmetic, ec_twistededwards_affine # Generic sandwich type RandomGen* = enum diff --git a/tests/parallel/t_pairing_bls12_381_gt_multiexp_parallel.nim b/tests/parallel/t_pairing_bls12_381_gt_multiexp_parallel.nim new file mode 100644 index 000000000..98fd820c4 --- /dev/null +++ b/tests/parallel/t_pairing_bls12_381_gt_multiexp_parallel.nim @@ -0,0 +1,18 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + # Test utilities + ./t_pairing_template_parallel + +const numPoints = [1, 2, 8, 16, 128, 256, 1024] + +runGTmultiexp_parallel_Tests( + GT = Fp12[BLS12_381], + numPoints, + Iters = 4) diff --git a/tests/parallel/t_pairing_template_parallel.nim b/tests/parallel/t_pairing_template_parallel.nim new file mode 100644 index 000000000..cfa6db6a4 --- /dev/null +++ b/tests/parallel/t_pairing_template_parallel.nim @@ -0,0 +1,88 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + # Standard library + std/unittest, times, + # Internals + constantine/platforms/abstractions, + constantine/math/extension_fields, + constantine/named/algebras, + constantine/math/pairings/[ + gt_exponentiations_vartime, + gt_multiexp_parallel, + pairings_generic], + constantine/threadpool, + + # Test utilities + helpers/prng_unsafe + +export + unittest, # generic sandwich + abstractions, # generic sandwich + extension_fields, + algebras + +type + RandomGen* = enum + Uniform + HighHammingWeight + Long01Sequence + +func random_elem(rng: var RngState, F: typedesc, gen: RandomGen): F {.noInit.} = + if gen == Uniform: + result = rng.random_unsafe(F) + elif gen == HighHammingWeight: + result = rng.random_highHammingWeight(F) + else: + result = rng.random_long01Seq(F) + +func random_gt(rng: var RngState, F: typedesc, gen: RandomGen): F {.noInit.} = + result = rng.random_elem(F, gen) + result.finalExp() + +proc runGTmultiexp_parallel_Tests*[N: static int](GT: typedesc, num_points: array[N, int], Iters: int) = + var rng: RngState + let timeseed = uint32(toUnix(getTime()) and (1'i64 shl 32 - 1)) # unixTime mod 2^32 + seed(rng, timeseed) + echo "\n------------------------------------------------------\n" + echo "test_pairing_",$GT.Name,"_gt_multiexp_parallel xoshiro512** seed: ", timeseed + + proc test_gt_multiexp_parallel_impl[N](GT: typedesc, rng: var RngState, num_points: array[N, int], gen: RandomGen, iters: int) = + let tp = Threadpool.new() + defer: tp.shutdown() + + for N in num_points: + stdout.write " " + for _ in 0 ..< iters: + var elems = newSeq[GT](N) + var exponents = newSeq[Fr[GT.Name]](N) + + for i in 0 ..< N: + elems[i] = rng.random_gt(GT, gen) + exponents[i] = rng.random_elem(Fr[GT.Name], gen) + + var naive: GT + naive.setOne() + for i in 0 ..< N: + var t {.noInit.}: GT + t.gtExp_vartime(elems[i], exponents[i]) + naive *= t + + var mexp: GT + tp.multiExp_vartime_parallel(mexp, elems, exponents) + + doAssert bool(naive == mexp) + + stdout.write '.' + + stdout.write '\n' + + suite "Pairing - Parallel MultiExponentiation for 𝔾ₜ " & $GT.Name & " [" & $WordBitWidth & "-bit words]": + test "Parallel 𝔾ₜ multi-exponentiation consistency": + test_gt_multiexp_parallel_impl(GT, rng, num_points, gen = Long01Sequence, Iters)