From 8fb61c6d264e6107eda69a01f84fae82ce344185 Mon Sep 17 00:00:00 2001 From: Mamy Ratsimbazafy Date: Tue, 16 Jul 2024 16:50:49 +0200 Subject: [PATCH] =?UTF-8?q?feat(gt-multiexp):=20add=20baseline=20multi-exp?= =?UTF-8?q?onentiation=20on=20=F0=9D=94=BE=E2=82=9C=20based=20on=20BDLO12?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- benchmarks/bench_ec_msm_bandersnatch.nimcfg | 1 - benchmarks/bench_ec_msm_bls12_381_g1.nim.cfg | 1 - benchmarks/bench_ec_msm_bls12_381_g2.nim.cfg | 1 - .../bench_ec_msm_bn254_snarks_g1.nim.cfg | 1 - benchmarks/bench_ec_msm_pasta.nim.cfg | 1 - .../bench_elliptic_parallel_template.nim | 4 +- ... bench_elliptic_parallel_template.nim.cfg} | 0 benchmarks/bench_gt.nim | 1 - benchmarks/bench_gt_multiexp_bls12_381.nim | 46 ++++ .../bench_gt_multiexp_bls12_381.nim.cfg | 1 + benchmarks/bench_gt_parallel_template.nim | 162 ++++++++++++ benchmarks/bench_gt_template.nim | 1 - constantine.nimble | 7 + .../ec_multi_scalar_mul_scheduler.nim | 1 + .../math/pairings/cyclotomic_subgroups.nim | 14 +- constantine/math/pairings/gt_multiexp.nim | 233 ++++++++++++++++++ .../math/pairings/pairings_generic.nim | 9 +- .../t_pairing_bls12_381_gt_multiexp.nim | 18 ++ .../t_pairing_bn254_snarks_gt_multiexp.nim | 18 ++ tests/math_pairings/t_pairing_template.nim | 83 +++++-- 20 files changed, 563 insertions(+), 40 deletions(-) delete mode 100644 benchmarks/bench_ec_msm_bandersnatch.nimcfg delete mode 100644 benchmarks/bench_ec_msm_bls12_381_g1.nim.cfg delete mode 100644 benchmarks/bench_ec_msm_bls12_381_g2.nim.cfg delete mode 100644 benchmarks/bench_ec_msm_bn254_snarks_g1.nim.cfg delete mode 100644 benchmarks/bench_ec_msm_pasta.nim.cfg rename benchmarks/{bench_ec_g1_batch.nim.cfg => bench_elliptic_parallel_template.nim.cfg} (100%) create mode 100644 benchmarks/bench_gt_multiexp_bls12_381.nim create mode 100644 benchmarks/bench_gt_multiexp_bls12_381.nim.cfg create mode 100644 benchmarks/bench_gt_parallel_template.nim create mode 100644 constantine/math/pairings/gt_multiexp.nim create mode 100644 tests/math_pairings/t_pairing_bls12_381_gt_multiexp.nim create mode 100644 tests/math_pairings/t_pairing_bn254_snarks_gt_multiexp.nim diff --git a/benchmarks/bench_ec_msm_bandersnatch.nimcfg b/benchmarks/bench_ec_msm_bandersnatch.nimcfg deleted file mode 100644 index 9d57ecf9..00000000 --- a/benchmarks/bench_ec_msm_bandersnatch.nimcfg +++ /dev/null @@ -1 +0,0 @@ ---threads:on \ No newline at end of file diff --git a/benchmarks/bench_ec_msm_bls12_381_g1.nim.cfg b/benchmarks/bench_ec_msm_bls12_381_g1.nim.cfg deleted file mode 100644 index 9d57ecf9..00000000 --- a/benchmarks/bench_ec_msm_bls12_381_g1.nim.cfg +++ /dev/null @@ -1 +0,0 @@ ---threads:on \ No newline at end of file diff --git a/benchmarks/bench_ec_msm_bls12_381_g2.nim.cfg b/benchmarks/bench_ec_msm_bls12_381_g2.nim.cfg deleted file mode 100644 index 9d57ecf9..00000000 --- a/benchmarks/bench_ec_msm_bls12_381_g2.nim.cfg +++ /dev/null @@ -1 +0,0 @@ ---threads:on \ No newline at end of file diff --git a/benchmarks/bench_ec_msm_bn254_snarks_g1.nim.cfg b/benchmarks/bench_ec_msm_bn254_snarks_g1.nim.cfg deleted file mode 100644 index 9d57ecf9..00000000 --- a/benchmarks/bench_ec_msm_bn254_snarks_g1.nim.cfg +++ /dev/null @@ -1 +0,0 @@ ---threads:on \ No newline at end of file diff --git a/benchmarks/bench_ec_msm_pasta.nim.cfg b/benchmarks/bench_ec_msm_pasta.nim.cfg deleted file mode 100644 index 9d57ecf9..00000000 --- a/benchmarks/bench_ec_msm_pasta.nim.cfg +++ /dev/null @@ -1 +0,0 @@ ---threads:on \ No newline at end of file diff --git a/benchmarks/bench_elliptic_parallel_template.nim b/benchmarks/bench_elliptic_parallel_template.nim index afc0a649..647a9f81 100644 --- a/benchmarks/bench_elliptic_parallel_template.nim +++ b/benchmarks/bench_elliptic_parallel_template.nim @@ -115,7 +115,7 @@ proc msmParallelBench*[EC](ctx: var BenchMsmContext[EC], numInputs: int, iters: var startNaive, stopNaive, startMSMbaseline, stopMSMbaseline, startMSMopt, stopMSMopt, startMSMpara, stopMSMpara: MonoTime if numInputs <= 100000: - startNaive = getMonotime() + # startNaive = getMonotime() bench("EC scalar muls " & align($numInputs, 10) & " (" & $bits & "-bit coefs, points)", EC, iters): var tmp: EC r.setNeutral() @@ -123,7 +123,7 @@ proc msmParallelBench*[EC](ctx: var BenchMsmContext[EC], numInputs: int, iters: tmp.fromAffine(points[i]) tmp.scalarMul(coefs[i]) r += tmp - stopNaive = getMonotime() + # stopNaive = getMonotime() if numInputs <= 100000: startNaive = getMonotime() diff --git a/benchmarks/bench_ec_g1_batch.nim.cfg b/benchmarks/bench_elliptic_parallel_template.nim.cfg similarity index 100% rename from benchmarks/bench_ec_g1_batch.nim.cfg rename to benchmarks/bench_elliptic_parallel_template.nim.cfg diff --git a/benchmarks/bench_gt.nim b/benchmarks/bench_gt.nim index cc10108e..a3599266 100644 --- a/benchmarks/bench_gt.nim +++ b/benchmarks/bench_gt.nim @@ -33,7 +33,6 @@ proc main() = separator() staticFor i, 0, AvailableCurves.len: const curve = AvailableCurves[i] - const bits = Fr[curve].bits() separator() mulBench(Fp12[curve], Iters) sqrBench(Fp12[curve], Iters) diff --git a/benchmarks/bench_gt_multiexp_bls12_381.nim b/benchmarks/bench_gt_multiexp_bls12_381.nim new file mode 100644 index 00000000..ef60000a --- /dev/null +++ b/benchmarks/bench_gt_multiexp_bls12_381.nim @@ -0,0 +1,46 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + # Internals + constantine/named/algebras, + constantine/math/extension_fields, + # Helpers + ./bench_gt_parallel_template + +# ############################################################ +# +# Benchmark of the 𝔾ₜ group of +# Pairing Friendly curves +# +# ############################################################ + +const Iters = 10000 +const AvailableCurves = [ + # BN254_Nogami, + # BN254_Snarks, + # BLS12_377, + BLS12_381, +] + +const testNumPoints = [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024] + +proc main() = + separator() + staticFor i, 0, AvailableCurves.len: + const curve = AvailableCurves[i] + var ctx = createBenchMultiExpContext(Fp12[curve], testNumPoints) + separator() + for numPoints in testNumPoints: + let batchIters = max(1, Iters div numPoints) + ctx.multiExpParallelBench(numPoints, batchIters) + separator() + separator() + +main() +notes() diff --git a/benchmarks/bench_gt_multiexp_bls12_381.nim.cfg b/benchmarks/bench_gt_multiexp_bls12_381.nim.cfg new file mode 100644 index 00000000..aed303ee --- /dev/null +++ b/benchmarks/bench_gt_multiexp_bls12_381.nim.cfg @@ -0,0 +1 @@ +--threads:on diff --git a/benchmarks/bench_gt_parallel_template.nim b/benchmarks/bench_gt_parallel_template.nim new file mode 100644 index 00000000..197d1b17 --- /dev/null +++ b/benchmarks/bench_gt_parallel_template.nim @@ -0,0 +1,162 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +# ############################################################ +# +# Summary of the performance of a curve +# +# ############################################################ + +import + # Standard library + std/[monotimes, times], + # Internals + constantine/platforms/abstractions, + constantine/named/algebras, + constantine/math/[arithmetic, extension_fields], + constantine/math/pairings/[ + pairings_generic, + gt_exponentiations, + gt_exponentiations_vartime, + gt_multiexp + ], + constantine/threadpool, + # Helpers + helpers/prng_unsafe, + ./bench_blueprint + +export times, monotimes +export notes +export abstractions +proc separator*() = separator(168) + +proc report(op, domain: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) = + let ns = inNanoseconds((stop-start) div iters) + let throughput = 1e9 / float64(ns) + when SupportsGetTicks: + echo &"{op:<68} {domain:<20} {throughput:>15.3f} ops/s {ns:>9} ns/op {(stopClk - startClk) div iters:>9} CPU cycles (approx)" + else: + echo &"{op:<68} {domain:<20} {throughput:>15.3f} ops/s {ns:>9} ns/op" + +macro fixFieldDisplay(T: typedesc): untyped = + # At compile-time, enums are integers and their display is buggy + # we get the Curve ID instead of the curve name. + let instantiated = T.getTypeInst() + var name = $instantiated[1][0] # Fp + name.add "[" & $Algebra(instantiated[1][1].intVal) & "]" + result = newLit name + +func fixDisplay(T: typedesc): string = + when T is (Fp or Fp2 or Fp4 or Fp6 or Fp12): + fixFieldDisplay(T) + else: + $T + +func fixDisplay(T: Algebra): string = + $T + +template bench(op: string, T: typed, iters: int, body: untyped): untyped = + measure(iters, startTime, stopTime, startClk, stopClk, body) + report(op, fixDisplay(T), startTime, stopTime, startClk, stopClk, iters) + +func random_gt*(rng: var RngState, F: typedesc): F {.inline, noInit.} = + result = rng.random_unsafe(F) + result.finalExp() + +# Multi-exponentiations +# --------------------------------------------------------------------------- + +type BenchMultiexpContext*[GT] = object + tp: Threadpool + numInputs: int + elems: seq[GT] + exponents: seq[getBigInt(GT.Name, kScalarField)] + +proc createBenchMultiExpContext*(GT: typedesc, inputSizes: openArray[int]): BenchMultiexpContext[GT] = + result.tp = Threadpool.new() + let maxNumInputs = inputSizes.max() + + const bits = Fr[GT.Name].bits() + + result.numInputs = maxNumInputs + result.elems = newSeq[GT](maxNumInputs) + result.exponents = newSeq[BigInt[bits]](maxNumInputs) + + proc genElemExponentPairsChunk[GT](rngSeed: uint64, start, len: int, elems: ptr GT, exponents: ptr BigInt[bits]) {.nimcall.} = + let elems = cast[ptr UncheckedArray[GT]](elems) + let exponents = cast[ptr UncheckedArray[BigInt[bits]]](exponents) + + # RNGs are not threadsafe, create a threadlocal one seeded from the global RNG + var threadRng: RngState + threadRng.seed(rngSeed) + + for i in start ..< start + len: + elems[i] = threadRng.random_gt(GT) + exponents[i] = threadRng.random_unsafe(BigInt[bits]) + + let chunks = balancedChunksPrioNumber(0, maxNumInputs, result.tp.numThreads) + + stdout.write &"Generating {maxNumInputs} (elems, exponents) pairs ... " + stdout.flushFile() + + let start = getMonotime() + + syncScope: + for (id, start, size) in items(chunks): + result.tp.spawn genElemExponentPairsChunk(rng.next(), start, size, result.elems[0].addr, result.exponents[0].addr) + + # Even if child threads are sleeping, it seems like perf is lower when there are threads around + # maybe because the kernel has more overhead or time quantum to keep track off so shut them down. + result.tp.shutdown() + + let stop = getMonotime() + stdout.write &"in {float64(inNanoSeconds(stop-start)) / 1e6:6.3f} ms\n" + +proc multiExpParallelBench*[GT](ctx: var BenchMultiExpContext[GT], numInputs: int, iters: int) = + const bits = Fr[GT.Name].bits() + + template elems: untyped = ctx.elems.toOpenArray(0, numInputs-1) + template exponents: untyped = ctx.exponents.toOpenArray(0, numInputs-1) + + + var r{.noInit.}: GT + var startNaive, stopNaive, startMultiExpBaseline, stopMultiExpBaseline: MonoTime + + if numInputs <= 100000: + # startNaive = getMonotime() + bench("𝔾ₜ exponentiations " & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters): + var tmp: GT + r.setOne() + for i in 0 ..< elems.len: + tmp.gtExp(elems[i], exponents[i]) + r *= tmp + # stopNaive = getMonotime() + + if numInputs <= 100000: + startNaive = getMonotime() + bench("𝔾ₜ exponentiations vartime " & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters): + var tmp: GT + r.setOne() + for i in 0 ..< elems.len: + tmp.gtExp_vartime(elems[i], exponents[i]) + r *= tmp + stopNaive = getMonotime() + + if numInputs <= 100000: + startMultiExpBaseline = getMonotime() + bench("𝔾ₜ multi-exponentiations baseline " & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters): + r.multiExp_reference_vartime(elems, exponents) + stopMultiExpBaseline = getMonotime() + + + let perfNaive = inNanoseconds((stopNaive-startNaive) div iters) + let perfMSMbaseline = inNanoseconds((stopMultiExpBaseline-startMultiExpBaseline) div iters) + + if numInputs <= 100000: + let speedupBaseline = float(perfNaive) / float(perfMSMbaseline) + echo &"Speedup ratio baseline over naive linear combination: {speedupBaseline:>6.3f}x" diff --git a/benchmarks/bench_gt_template.nim b/benchmarks/bench_gt_template.nim index 4e41b5f1..a4d8d9e0 100644 --- a/benchmarks/bench_gt_template.nim +++ b/benchmarks/bench_gt_template.nim @@ -27,7 +27,6 @@ import helpers/prng_unsafe, ./bench_blueprint - export notes export abstractions proc separator*() = separator(168) diff --git a/constantine.nimble b/constantine.nimble index 4ad7e001..3761b313 100644 --- a/constantine.nimble +++ b/constantine.nimble @@ -654,6 +654,7 @@ const benchDesc = [ "bench_pairing_bn254_nogami", "bench_pairing_bn254_snarks", "bench_gt", + "bench_gt_multiexp_bls12_381", "bench_summary_bls12_377", "bench_summary_bls12_381", "bench_summary_bn254_nogami", @@ -1036,6 +1037,12 @@ task bench_ec_g2_scalar_mul, "Run benchmark on Elliptic Curve group 𝔾2 (Multi task bench_gt, "Run 𝔾ₜ benchmarks - CC compiler": runBench("bench_gt") +# 𝔾ₜ - multi-exponentiation +# ------------------------------------------ + +task bench_gt_multiexp_bls12_381, "Run 𝔾ₜ multiexponentiation benchmarks for BLS12-381 - CC compiler": + runBench("bench_gt_multiexp_bls12_381") + # Pairings # ------------------------------------------ diff --git a/constantine/math/elliptic/ec_multi_scalar_mul_scheduler.nim b/constantine/math/elliptic/ec_multi_scalar_mul_scheduler.nim index f18af92a..043b326b 100644 --- a/constantine/math/elliptic/ec_multi_scalar_mul_scheduler.nim +++ b/constantine/math/elliptic/ec_multi_scalar_mul_scheduler.nim @@ -213,6 +213,7 @@ func bestBucketBitSize*(inputSize: int, scalarBitwidth: static int, useSignedBuc # L1, L2 caches, TLB and 64 aliasing conflict # are not taken into account in previous formula. # Each increase in c doubles memory used. + # TODO: use the element size for thresholds. when useManualTuning: if 14 <= result: result -= 1 diff --git a/constantine/math/pairings/cyclotomic_subgroups.nim b/constantine/math/pairings/cyclotomic_subgroups.nim index f04714c9..9bb54e40 100644 --- a/constantine/math/pairings/cyclotomic_subgroups.nim +++ b/constantine/math/pairings/cyclotomic_subgroups.nim @@ -225,9 +225,9 @@ func cyclotomic_square_cube_over_quad(r: var CubicExt, a: CubicExt) = # https://eprint.iacr.org/2009/565.pdf # Cubic extension field - # A = 3a² − 2 ̄a - # B = 3 √i c² + 2 ̄b - # C = 3b² − 2 ̄c + # A = 3a² − 2a̅ + # B = 3 √i c² + 2b̅ + # C = 3b² − 2c̅ var v0{.noInit.}, v1{.noInit.}, v2{.noInit.}: typeof(a.c0) template a0: untyped = a.c0.c0 @@ -261,7 +261,7 @@ func cyclotomic_square_cube_over_quad(r: var CubicExt, a: CubicExt) = r.c2.c1.double() r.c2.c1 += v1.c1 - # Now B = 3 √i c² + 2 ̄b + # Now B = 3 √i c² + 2b̅ # beware of mul by non residue: √i v₂ = ξv₂₁ + v₂₀√i # 3 (√i c²)₀ + 2a₂ @@ -291,9 +291,9 @@ func cyclotomic_square_quad_over_cube[F](r: var QuadraticExt[F], a: QuadraticExt # c₅ <=> a₅ <=> b₅ # # Hence, this formula for a cubic extension field - # A = 3a² − 2 ̄a - # B = 3 √i c² + 2 ̄b - # C = 3b² − 2 ̄c + # A = 3a² − 2a̅ + # B = 3 √i c² + 2b̅ + # C = 3b² − 2c̅ # # becomes # A = (b₀, b₄) = 3(b₀, b₄)² - 2(b₀,-b₄) diff --git a/constantine/math/pairings/gt_multiexp.nim b/constantine/math/pairings/gt_multiexp.nim new file mode 100644 index 00000000..c98f905e --- /dev/null +++ b/constantine/math/pairings/gt_multiexp.nim @@ -0,0 +1,233 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import constantine/named/algebras, + constantine/math/endomorphisms/split_scalars, + constantine/math/extension_fields, + constantine/math/arithmetic/bigints, + constantine/named/zoo_endomorphisms, + constantine/platforms/abstractions, + ./cyclotomic_subgroups + +# No exceptions allowed in core cryptographic operations +{.push raises: [].} +{.push checks: off.} + +# ########################################################### # +# # +# Multi-Exponentiation in 𝔾ₜ # +# # +# ########################################################### # + +# General utilities +# ------------------------------------------------------------- + +func bestBucketBitSize*(inputSize: int, scalarBitwidth: static int, useSignedBuckets, useManualTuning: static bool): int {.inline.} = + ## Evaluate the best bucket bit-size for the input size. + ## That bucket size minimize group operations. + ## This ignore cache effect. Computation can become memory-bound, especially with large buckets + ## that don't fit in L1 cache, trigger the 64K aliasing conflict or worse (overflowing L2 cache or TLB). + ## Especially, scalars are expected to be indistinguishable from random so buckets accessed during accumulation + ## will be in a random pattern, triggering cache misses. + + # Raw operation cost is approximately + # 1. Bucket accumulation + # n - (2ᶜ-1) mul for b/c windows or n - (2ᶜ⁻¹-1) if using signed buckets + # 2. Bucket reduction + # 2x(2ᶜ-2) mul for b/c windows or 2*(2ᶜ⁻¹-2) + # 3. Final reduction + # (b/c - 1) x (c cyclotomic squarings + 1 multiplication) + # Total + # b/c (n + 2ᶜ - 2) A + (b/c - 1) * (c*D + A) + # https://www.youtube.com/watch?v=Bl5mQA7UL2I + + # A cyclotomic square costs ~50% of a 𝔾ₜ multiplication with Granger-Scott formula + + const M = 5300'f32 # Mul cost (in cycles) + const S = 2100'f32 # Cyclotomic square cost (in cycles) + + const s = int useSignedBuckets + let n = inputSize + let b = float32(scalarBitwidth) + var minCost = float32(Inf) + for c in 2 .. 20: # cap return value at 17 after manual tuning + let b_over_c = b/c.float32 + + let bucket_accumulate_reduce = b_over_c * float32(n + (1 shl (c-s)) - 2) * M + let final_reduction = (b_over_c - 1'f32) * (c.float32*S + M) + let cost = bucket_accumulate_reduce + final_reduction + if cost < minCost: + minCost = cost + result = c + + # Manual tuning, memory bandwidth / cache boundaries of + # L1, L2 caches, TLB and 64 aliasing conflict + # are not taken into account in previous formula. + # Each increase in c doubles memory used. + # Compared to 𝔾₁, 𝔾ₜ elements are 6x bigger so we shift by 3 + when useManualTuning: + if 11 <= result: + result -= 1 + if 12 <= result: + result -= 1 + if 13 <= result: + result -= 1 + +func `~*=`*[Gt: ExtensionField](a: var Gt, b: Gt) = + + # TODO: Analyze the inputs to see if there is avalue in more complex shortcuts (-1, or partial 0 coordinates) + if a.isOne().bool(): + a = b + elif b.isOne().bool(): + discard + else: + a *= b + +func `~/=`*[Gt: ExtensionField](a: var Gt, b: Gt) = + ## Cyclotomic division + var t {.noInit.}: Gt + t.cyclotomic_inv(b) + a ~*= b + +# Reference multi-exponentiation +# ------------------------------------------------------------- + +func multiExpImpl_reference_vartime[bits: static int, Gt]( + r: var Gt, + elems: ptr UncheckedArray[Gt], + exponents: ptr UncheckedArray[BigInt[bits]], + N: int, c: static int) {.tags:[VarTime, HeapAlloc].} = + ## Inner implementation of MEXP, for static dispatch over c, the bucket bit length + ## This is a straightforward simple translation of BDLO12, section 4 + + # Prologue + # -------- + const numBuckets = 1 shl c - 1 # bucket 0 is unused + const numWindows = bits.ceilDiv_vartime(c) + + let miniEXPs = allocHeapArray(Gt, numWindows) + let buckets = allocHeapArray(Gt, numBuckets) + + # Algorithm + # --------- + for w in 0 ..< numWindows: + # Place our elements in a bucket corresponding to + # how many times their bit pattern in the current window of size c + for i in 0 ..< numBuckets: + buckets[i].setOne() + + # 1. Bucket accumulation. Cost: n - (2ᶜ-1) => n elems in 2ᶜ-1 elems, first elem per bucket is just copied + for j in 0 ..< N: + let b = cast[int](exponents[j].getWindowAt(w*c, c)) + if b == 0: # bucket 0 is unused, no need to add aⱼ⁰ + continue + else: + buckets[b-1] ~*= elems[j] + + # 2. Bucket reduction. Cost: 2x(2ᶜ-2) => 2 additions per 2ᶜ-1 bucket, last bucket is just copied + # We have ordered subset sums in each bucket, we now need to compute the mini-exponentiation + # S₁¹ + S₂² + S₃³ + ... + (S₂c₋₁)^(2ᶜ-1) + var accumBuckets{.noInit.}, miniEXP{.noInit.}: Gt + accumBuckets = buckets[numBuckets-1] + miniEXP = buckets[numBuckets-1] + + # Example with c = 3, 2³ = 8 + for k in countdown(numBuckets-2, 0): + accumBuckets ~*= buckets[k] # Stores S₈ then S₈ +S₇ then S₈ +S₇ +S₆ then ... + miniEXP ~*= accumBuckets # Stores S₈ then S₈²+S₇ then S₈³+S₇²+S₆ then ... + + miniEXPs[w] = miniEXP + + # 3. Final reduction. Cost: (b/c - 1)x(c+1) => b/c windows, first is copied, c squarings + 1 mul per window + r = miniEXPs[numWindows-1] + for w in countdown(numWindows-2, 0): + for _ in 0 ..< c: + r.cyclotomic_square() + r ~*= miniEXPs[w] + + # Cleanup + # ------- + buckets.freeHeap() + miniEXPs.freeHeap() + +func multiExp_reference_dispatch_vartime[bits: static int, Gt]( + r: var Gt, + elems: ptr UncheckedArray[Gt], + exponents: ptr UncheckedArray[BigInt[bits]], + N: int) {.tags:[VarTime, HeapAlloc].} = + ## Multiexponentiation: + ## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ + let c = bestBucketBitSize(N, bits, useSignedBuckets = false, useManualTuning = false) + + case c + of 2: multiExpImpl_reference_vartime(r, elems, exponents, N, c = 2) + of 3: multiExpImpl_reference_vartime(r, elems, exponents, N, c = 3) + of 4: multiExpImpl_reference_vartime(r, elems, exponents, N, c = 4) + of 5: multiExpImpl_reference_vartime(r, elems, exponents, N, c = 5) + of 6: multiExpImpl_reference_vartime(r, elems, exponents, N, c = 6) + of 7: multiExpImpl_reference_vartime(r, elems, exponents, N, c = 7) + of 8: multiExpImpl_reference_vartime(r, elems, exponents, N, c = 8) + of 9: multiExpImpl_reference_vartime(r, elems, exponents, N, c = 9) + of 10: multiExpImpl_reference_vartime(r, elems, exponents, N, c = 10) + of 11: multiExpImpl_reference_vartime(r, elems, exponents, N, c = 11) + of 12: multiExpImpl_reference_vartime(r, elems, exponents, N, c = 12) + of 13: multiExpImpl_reference_vartime(r, elems, exponents, N, c = 13) + of 14: multiExpImpl_reference_vartime(r, elems, exponents, N, c = 14) + of 15: multiExpImpl_reference_vartime(r, elems, exponents, N, c = 15) + + of 16..20: multiExpImpl_reference_vartime(r, elems, exponents, N, c = 16) + else: + unreachable() + +func multiExp_reference_vartime*[bits: static int, Gt]( + r: var Gt, + elems: ptr UncheckedArray[Gt], + exponents: ptr UncheckedArray[BigInt[bits]], + N: int) {.tags:[VarTime, HeapAlloc].} = + ## Multiexponentiation: + ## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ + multiExp_reference_dispatch_vartime(r, elems, exponents, N) + +func multiExp_reference_vartime*[Gt](r: var Gt, elems: openArray[Gt], exponents: openArray[BigInt]) {.tags:[VarTime, HeapAlloc].} = + ## Multiexponentiation: + ## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ + debug: doAssert exponents.len == elems.len + let N = elems.len + multiExp_reference_dispatch_vartime(r, elems.asUnchecked(), exponents.asUnchecked(), N) + +func multiExp_reference_vartime*[F, Gt]( + r: var Gt, + elems: ptr UncheckedArray[Gt], + exponents: ptr UncheckedArray[F], + len: int) {.tags:[VarTime, Alloca, HeapAlloc], meter.} = + ## Multiexponentiation: + ## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ + let n = cast[int](len) + let exponents_big = allocHeapArrayAligned(F.getBigInt(), n, alignment = 64) + exponents_big.batchFromField(exponents, n) + r.multiExp_reference_vartime(elems, exponents_big, n) + + freeHeapAligned(exponents_big) + +func multiExp_reference_vartime*[Gt]( + r: var Gt, + elems: openArray[Gt], + exponents: openArray[Fr]) {.tags:[VarTime, Alloca, HeapAlloc], inline.} = + ## Multiexponentiation: + ## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ + debug: doAssert exponents.len == elems.len + let N = elems.len + multiExp_reference_vartime(r, elems.asUnchecked(), exponents.asUnchecked(), N) diff --git a/constantine/math/pairings/pairings_generic.nim b/constantine/math/pairings/pairings_generic.nim index a8a79f69..df95b44e 100644 --- a/constantine/math/pairings/pairings_generic.nim +++ b/constantine/math/pairings/pairings_generic.nim @@ -27,11 +27,16 @@ func millerLoop*[Name: static Algebra](gt: var Fp12[Name], Q, P: auto, n: int) { else: gt.millerLoopAddchain(Q, P, n) -func finalExp*[Name: static Algebra](gt: var Fp12[Name]){.inline.} = - gt.finalExpEasy() +export finalExpEasy + +func finalExpHard*[Name: static Algebra](gt: var Fp12[Name]) {.inline.} = when family(Name) == BarretoNaehrig: gt.finalExpHard_BN() elif family(Name) == BarretoLynnScott: gt.finalExpHard_BLS12() else: {.error: "Final Exponentiation not implemented for " & $Name.} + +func finalExp*[Name: static Algebra](gt: var Fp12[Name]){.inline.} = + gt.finalExpEasy() + gt.finalExpHard() diff --git a/tests/math_pairings/t_pairing_bls12_381_gt_multiexp.nim b/tests/math_pairings/t_pairing_bls12_381_gt_multiexp.nim new file mode 100644 index 00000000..e9a838f1 --- /dev/null +++ b/tests/math_pairings/t_pairing_bls12_381_gt_multiexp.nim @@ -0,0 +1,18 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + # Test utilities + ./t_pairing_template + +const numPoints = [1, 2, 8, 16, 128, 256, 1024] + +runGTmultiexpTests( + GT = Fp12[BLS12_381], + numPoints, + Iters = 4) diff --git a/tests/math_pairings/t_pairing_bn254_snarks_gt_multiexp.nim b/tests/math_pairings/t_pairing_bn254_snarks_gt_multiexp.nim new file mode 100644 index 00000000..cda9c5b4 --- /dev/null +++ b/tests/math_pairings/t_pairing_bn254_snarks_gt_multiexp.nim @@ -0,0 +1,18 @@ +# Constantine +# Copyright (c) 2018-2019 Status Research & Development GmbH +# Copyright (c) 2020-Present Mamy André-Ratsimbazafy +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed except according to those terms. + +import + # Test utilities + ./t_pairing_template + +const numPoints = [1, 2, 8, 16, 128, 256, 1024] + +runGTmultiexpTests( + GT = Fp12[BN254_Snarks], + numPoints, + Iters = 4) diff --git a/tests/math_pairings/t_pairing_template.nim b/tests/math_pairings/t_pairing_template.nim index 0c738247..0df29200 100644 --- a/tests/math_pairings/t_pairing_template.nim +++ b/tests/math_pairings/t_pairing_template.nim @@ -20,6 +20,7 @@ import cyclotomic_subgroups, gt_exponentiations, gt_exponentiations_vartime, + gt_multiexp, pairings_generic], constantine/math/io/io_extfields, @@ -64,7 +65,7 @@ func random_point*(rng: var RngState, EC: typedesc, randZ: bool, gen: RandomGen) result = rng.random_long01Seq(EC) result.clearCofactor() -template runPairingTests*(Iters: static int, Name: static Algebra, G1, G2, GT: typedesc, pairing_fn: untyped): untyped {.dirty.}= +template runPairingTests*(Name: static Algebra, G1, G2, GT: typedesc, Iters: int) = bind affineType var rng: RngState @@ -73,8 +74,8 @@ template runPairingTests*(Iters: static int, Name: static Algebra, G1, G2, GT: t echo "\n------------------------------------------------------\n" echo "test_pairing_",$Name,"_optate xoshiro512** seed: ", timeseed - proc test_bilinearity_double_impl(randZ: bool, gen: RandomGen) = - for _ in 0 ..< Iters: + proc test_bilinearity_double_impl(randZ: bool, gen: RandomGen, iters: int) = + for _ in 0 ..< iters: let P = rng.random_point(G1, randZ, gen) let Q = rng.random_point(G2, randZ, gen) var P2: typeof(P) @@ -93,10 +94,10 @@ template runPairingTests*(Iters: static int, Name: static Algebra, G1, G2, GT: t Qa.affine(Q) Qa2.affine(Q2) - r.pairing_fn(Pa, Qa) + r.pairing(Pa, Qa) r.square() - r2.pairing_fn(Pa2, Qa) - r3.pairing_fn(Pa, Qa2) + r2.pairing(Pa2, Qa) + r3.pairing(Pa, Qa2) doAssert bool(not r.isZero()) doAssert bool(not r.isOne()) @@ -106,9 +107,9 @@ template runPairingTests*(Iters: static int, Name: static Algebra, G1, G2, GT: t suite "Pairing - Optimal Ate on " & $Name & " [" & $WordBitWidth & "-bit words]": test "Bilinearity e([2]P, Q) = e(P, [2]Q) = e(P, Q)^2": - test_bilinearity_double_impl(randZ = false, gen = Uniform) - test_bilinearity_double_impl(randZ = false, gen = HighHammingWeight) - test_bilinearity_double_impl(randZ = false, gen = Long01Sequence) + test_bilinearity_double_impl(randZ = false, gen = Uniform, Iters) + test_bilinearity_double_impl(randZ = false, gen = HighHammingWeight, Iters) + test_bilinearity_double_impl(randZ = false, gen = Long01Sequence, Iters) func random_elem(rng: var RngState, F: typedesc, gen: RandomGen): F {.inline, noInit.} = if gen == Uniform: @@ -118,7 +119,7 @@ func random_elem(rng: var RngState, F: typedesc, gen: RandomGen): F {.inline, no else: result = rng.random_long01Seq(F) -template runGTsubgroupTests*(Iters: static int, GT: typedesc, finalExpHard_fn: untyped): untyped {.dirty.}= +template runGTsubgroupTests*(GT: typedesc, Iters: int) = bind affineType, random_elem var rng: RngState @@ -127,9 +128,9 @@ template runGTsubgroupTests*(Iters: static int, GT: typedesc, finalExpHard_fn: u echo "\n------------------------------------------------------\n" echo "test_pairing_",$GT.Name,"_gt xoshiro512** seed: ", timeseed - proc test_gt_impl(gen: RandomGen) = + proc test_gt_impl(gen: RandomGen, iters: int) = stdout.write " " - for _ in 0 ..< Iters: + for _ in 0 ..< iters: let a = rng.random_elem(GT, gen) doAssert not bool a.isInCyclotomicSubgroup(), "The odds of generating randomly such an element are too low a: " & a.toHex() var a2 = a @@ -137,7 +138,7 @@ template runGTsubgroupTests*(Iters: static int, GT: typedesc, finalExpHard_fn: u doAssert bool a2.isInCyclotomicSubgroup() doAssert not bool a2.isInPairingSubgroup(), "The odds of generating randomly such an element are too low a2: " & a.toHex() var a3 = a2 - finalExpHard_fn(a3) + finalExpHard(a3) doAssert bool a3.isInCyclotomicSubgroup() doAssert bool a3.isInPairingSubgroup() stdout.write '.' @@ -146,9 +147,9 @@ template runGTsubgroupTests*(Iters: static int, GT: typedesc, finalExpHard_fn: u suite "Pairing - 𝔾ₜ subgroup " & $GT.Name & " [" & $WordBitWidth & "-bit words]": test "Final Exponentiation and 𝔾ₜ-subgroup membership": - test_gt_impl(gen = Uniform) - test_gt_impl(gen = HighHammingWeight) - test_gt_impl(gen = Long01Sequence) + test_gt_impl(gen = Uniform, Iters) + test_gt_impl(gen = HighHammingWeight, Iters) + test_gt_impl(gen = Long01Sequence, Iters) func random_gt*(rng: var RngState, F: typedesc, gen: RandomGen): F {.inline, noInit.} = if gen == Uniform: @@ -160,16 +161,16 @@ func random_gt*(rng: var RngState, F: typedesc, gen: RandomGen): F {.inline, noI result.finalExp() -template runGTexponentiationTests*(Iters: static int, GT: typedesc): untyped {.dirty.} = +template runGTexponentiationTests*(GT: typedesc, Iters: int) = var rng: RngState let timeseed = uint32(toUnix(getTime()) and (1'i64 shl 32 - 1)) # unixTime mod 2^32 seed(rng, timeseed) echo "\n------------------------------------------------------\n" echo "test_pairing_",$GT.Name,"_gt_exponentiation xoshiro512** seed: ", timeseed - proc test_gt_exponentiation_impl(gen: RandomGen) = + proc test_gt_exponentiation_impl(gen: RandomGen, iters: int) = stdout.write " " - for _ in 0 ..< Iters: + for _ in 0 ..< iters: let a = rng.random_gt(GT, gen) let kUnred = rng.random_long01seq(GT.Name.getBigInt(kScalarField)) var k {.noInit.}: GT.Name.getBigInt(kScalarField) @@ -219,6 +220,44 @@ template runGTexponentiationTests*(Iters: static int, GT: typedesc): untyped {.d suite "Pairing - Exponentiation for 𝔾ₜ " & $GT.Name & " [" & $WordBitWidth & "-bit words]": test "𝔾ₜ exponentiation consistency": - test_gt_exponentiation_impl(gen = Uniform) - test_gt_exponentiation_impl(gen = HighHammingWeight) - test_gt_exponentiation_impl(gen = Long01Sequence) + test_gt_exponentiation_impl(gen = Uniform, Iters) + test_gt_exponentiation_impl(gen = HighHammingWeight, Iters) + test_gt_exponentiation_impl(gen = Long01Sequence, Iters) + +proc runGTmultiexpTests*[N: static int](GT: typedesc, num_points: array[N, int], Iters: int) = + var rng: RngState + let timeseed = uint32(toUnix(getTime()) and (1'i64 shl 32 - 1)) # unixTime mod 2^32 + seed(rng, timeseed) + echo "\n------------------------------------------------------\n" + echo "test_pairing_",$GT.Name,"_gt_multiexp xoshiro512** seed: ", timeseed + + proc test_gt_multiexp_impl[N](GT: typedesc, rng: var RngState, num_points: array[N, int], gen: RandomGen, iters: int) = + for N in num_points: + stdout.write " " + for _ in 0 ..< iters: + var elems = newSeq[GT](N) + var exponents = newSeq[Fr[GT.Name]](N) + + for i in 0 ..< N: + elems[i] = rng.random_gt(GT, gen) + exponents[i] = rng.random_elem(Fr[GT.Name], gen) + + var naive: GT + naive.setOne() + for i in 0 ..< N: + var t {.noInit.}: GT + t.gtExp_vartime(elems[i], exponents[i]) + naive *= t + + var mexp_ref: GT + mexp_ref.multiExp_reference_vartime(elems, exponents) + + doAssert bool(naive == mexp_ref) + + stdout.write '.' + + stdout.write '\n' + + suite "Pairing - MultiExponentiation for 𝔾ₜ " & $GT.Name & " [" & $WordBitWidth & "-bit words]": + test "𝔾ₜ multi-exponentiation consistency": + test_gt_multiexp_impl(GT, rng, num_points, gen = Long01Sequence, Iters)