𝔾ₜ exponentiation, with endomorphism acceleration (#429)

* feat(𝔾ₜ exponentiation): Add square-multiply and signed recoding and wNAF 𝔾ₜ exponentiation * feat(𝔾ₜ exponentiation): Add andomorphism acceleration + wNAF * feat(𝔾ₜ exponentiation): Add benchmarks * fix(bench): forgot declaring var in refactoring
mratsim · Jul 14, 2024 · d8721a1 · d8721a1
1 parent 52a8fc4
commit d8721a1
Show file tree

Hide file tree

Showing 19 changed files with 732 additions and 68 deletions.
diff --git a/README-PERFORMANCE.md b/README-PERFORMANCE.md
@@ -58,7 +58,7 @@ The full list of benchmarks is available in the [`benchmarks`](./benchmarks) fol
 
 As mentioned in the [Compiler caveats](#compiler-caveats) section, GCC is up to 2x slower than Clang due to mishandling of carries and register usage.
 
-#### Ethereum BLS signatures (over BLS12-381 G2)
+#### Ethereum BLS signatures (over BLS12-381 𝔾₂)
 
 ![Bench Ethereum BLS signature](./media/ethereum_bls_signatures.png)
 
@@ -192,4 +192,4 @@ Constantine does not use heap allocation.
 At the moment Constantine is optimized for 32-bit and 64-bit CPUs.
 
 When performance and code size conflicts, a careful and informed default is chosen.
-In the future, a compile-time flag that goes beyond the compiler `-Os` might be provided.
+In the future, a compile-time flag that goes beyond the compiler `-Os` might be provided.
diff --git a/README.md b/README.md
@@ -90,8 +90,8 @@ For all elliptic curves, the following arithmetic is supported
     - on Fr (i.e. modulo the 255-bit curve order)
     - on Fp (i.e. modulo the 381-bit prime modulus)
   - elliptic curve arithmetic:
-    - on elliptic curve over Fp (EC G1) with affine, jacobian and homogenous projective coordinates
-    - on elliptic curve over Fp2 (EC G2) with affine, jacobian and homogenous projective coordinates
+    - on elliptic curve over Fp (EC 𝔾₁) with affine, jacobian and homogenous projective coordinates
+    - on elliptic curve over Fp2 (EC 𝔾₂) with affine, jacobian and homogenous projective coordinates
     - including scalar multiplication, multi-scalar-multiplication (MSM) and parallel MSM
 
 _All operations are constant-time unless explicitly mentioned_ vartime.
@@ -222,7 +222,7 @@ and modify Constantine's [`build.rs`](https://github.com/mratsim/constantine/blo
     ```
     > [!IMPORTANT]
     > Constantine uses a separate modfile for tests.<br />It has no dependencies (key to avoid supply chain attacks) except for testing.
-    
+
 ### From C
 
 1. Install a C compiler, `clang` is recommended, for example:

diff --git a/benchmarks/bench_fields_template.nim b/benchmarks/bench_fields_template.nim
@@ -229,13 +229,13 @@ proc sqrtRatioVartimeBench*(T: typedesc, iters: int) =
 proc powBench*(T: typedesc, iters: int) =
   let x = rng.random_unsafe(T)
   let exponent = rng.random_unsafe(BigInt[Fr[T.Name].bits()])
+  var r = x
   bench("Exp curve order (constant-time) - " & $exponent.bits & "-bit", T, iters):
-    var r = x
     r.pow(exponent)
 
-proc powUnsafeBench*(T: typedesc, iters: int) =
+proc powVartimeBench*(T: typedesc, iters: int) =
   let x = rng.random_unsafe(T)
   let exponent = rng.random_unsafe(BigInt[Fr[T.Name].bits()])
-  bench("Exp curve order (Leak exponent bits) - " & $exponent.bits & "-bit", T, iters):
-    var r = x
+  var r = x
+  bench("Exp by curve order (vartime) - " & $exponent.bits & "-bit", T, iters):
     r.pow_vartime(exponent)
diff --git a/benchmarks/bench_fp.nim b/benchmarks/bench_fp.nim
@@ -70,7 +70,7 @@ proc main() =
       sqrtRatioVartimeBench(Fp[curve], ExponentIters)
     # Exponentiation by a "secret" of size ~the curve order
     powBench(Fp[curve], ExponentIters)
-    powUnsafeBench(Fp[curve], ExponentIters)
+    powVartimeBench(Fp[curve], ExponentIters)
     separator()
 
 main()

diff --git a/benchmarks/bench_gt.nim b/benchmarks/bench_gt.nim
@@ -0,0 +1,64 @@
+# Constantine
+# Copyright (c) 2018-2019    Status Research & Development GmbH
+# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
+# Licensed and distributed under either of
+#   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
+#   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
+# at your option. This file may not be copied, modified, or distributed except according to those terms.
+
+import
+  # Internals
+  constantine/named/algebras,
+  constantine/math/extension_fields,
+  # Helpers
+  ./bench_gt_template
+
+# ############################################################
+#
+#               Benchmark of the 𝔾ₜ group of
+#                  Pairing Friendly curves
+#
+# ############################################################
+
+const Iters = 10000
+const ExpIters = 1000
+const AvailableCurves = [
+  # BN254_Nogami,
+  BN254_Snarks,
+  # BLS12_377,
+  BLS12_381,
+]
+
+proc main() =
+  separator()
+  staticFor i, 0, AvailableCurves.len:
+    const curve = AvailableCurves[i]
+    const bits = Fr[curve].bits()
+    separator()
+    mulBench(Fp12[curve], Iters)
+    sqrBench(Fp12[curve], Iters)
+    invBench(Fp12[curve], Iters)
+    separator()
+    cyclotomicSquare_Bench(Fp12[curve], Iters)
+    cyclotomicInv_Bench(Fp12[curve], Iters)
+    cyclotomicSquareCompressed_Bench(Fp12[curve], Iters)
+    cyclotomicDecompression_Bench(Fp12[curve], Iters)
+    separator()
+    powVartimeBench(Fp12[curve], window = 2, ExpIters)
+    powVartimeBench(Fp12[curve], window = 3, ExpIters)
+    powVartimeBench(Fp12[curve], window = 4, ExpIters)
+    separator()
+    gtExp_sqrmul_vartimeBench(Fp12[curve], ExpIters)
+    gtExp_minHammingWeight_vartimeBench(Fp12[curve], ExpIters)
+    separator()
+    gtExp_wNAF_vartimeBench(Fp12[curve], window = 2, ExpIters)
+    gtExp_wNAF_vartimeBench(Fp12[curve], window = 3, ExpIters)
+    gtExp_wNAF_vartimeBench(Fp12[curve], window = 4, ExpIters)
+    separator()
+    gtExp_endo_wNAF_vartimeBench(Fp12[curve], window = 2, ExpIters)
+    gtExp_endo_wNAF_vartimeBench(Fp12[curve], window = 3, ExpIters)
+    gtExp_endo_wNAF_vartimeBench(Fp12[curve], window = 4, ExpIters)
+    separator()
+
+main()
+notes()
diff --git a/benchmarks/bench_gt_template.nim b/benchmarks/bench_gt_template.nim
@@ -0,0 +1,167 @@
+# Constantine
+# Copyright (c) 2018-2019    Status Research & Development GmbH
+# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
+# Licensed and distributed under either of
+#   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
+#   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
+# at your option. This file may not be copied, modified, or distributed except according to those terms.
+
+# ############################################################
+#
+#             Summary of the performance of a curve
+#
+# ############################################################
+
+import
+  # Internals
+  constantine/platforms/abstractions,
+  constantine/named/algebras,
+  constantine/math/[arithmetic, extension_fields],
+  constantine/math/pairings/[
+    pairings_generic,
+    cyclotomic_subgroups,
+    gt_exponentiations_vartime
+  ],
+  # Helpers
+  helpers/prng_unsafe,
+  ./bench_blueprint
+
+
+export notes
+export abstractions
+proc separator*() = separator(168)
+
+proc report(op, domain: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) =
+  let ns = inNanoseconds((stop-start) div iters)
+  let throughput = 1e9 / float64(ns)
+  when SupportsGetTicks:
+    echo &"{op:<68} {domain:<20} {throughput:>15.3f} ops/s     {ns:>9} ns/op     {(stopClk - startClk) div iters:>9} CPU cycles (approx)"
+  else:
+    echo &"{op:<68} {domain:<20} {throughput:>15.3f} ops/s     {ns:>9} ns/op"
+
+macro fixFieldDisplay(T: typedesc): untyped =
+  # At compile-time, enums are integers and their display is buggy
+  # we get the Curve ID instead of the curve name.
+  let instantiated = T.getTypeInst()
+  var name = $instantiated[1][0] # Fp
+  name.add "[" & $Algebra(instantiated[1][1].intVal) & "]"
+  result = newLit name
+
+func fixDisplay(T: typedesc): string =
+  when T is (Fp or Fp2 or Fp4 or Fp6 or Fp12):
+    fixFieldDisplay(T)
+  else:
+    $T
+
+func fixDisplay(T: Algebra): string =
+  $T
+
+template bench(op: string, T: typed, iters: int, body: untyped): untyped =
+  measure(iters, startTime, stopTime, startClk, stopClk, body)
+  report(op, fixDisplay(T), startTime, stopTime, startClk, stopClk, iters)
+
+func random_gt*(rng: var RngState, F: typedesc): F {.inline, noInit.} =
+  result = rng.random_unsafe(F)
+  result.finalExp()
+
+proc mulBench*(T: typedesc, iters: int) =
+  var r: T
+  let x = rng.random_gt(T)
+  let y = rng.random_gt(T)
+  preventOptimAway(r)
+  bench("Multiplication", T, iters):
+    r.prod(x, y)
+
+proc sqrBench*(T: typedesc, iters: int) =
+  var r: T
+  let x = rng.random_gt(T)
+  preventOptimAway(r)
+  bench("Squaring", T, iters):
+    r.square(x)
+
+proc invBench*(T: typedesc, iters: int) =
+  var r: T
+  let x = rng.random_gt(T)
+  preventOptimAway(r)
+  bench("Inversion", T, iters):
+    r.inv(x)
+
+proc cyclotomicSquare_Bench*(T: typedesc, iters: int) =
+  var f = rng.random_gt(T)
+
+  bench("Squaring in cyclotomic subgroup", T, iters):
+    f.cyclotomic_square()
+
+proc cyclotomicInv_Bench*(T: typedesc, iters: int) =
+  var f = rng.random_gt(T)
+
+  bench("Inversion in cyclotomic subgroup", T, iters):
+    f.cyclotomic_inv()
+
+proc cyclotomicSquareCompressed_Bench*(T: typedesc, iters: int) =
+  var f = rng.random_gt(T)
+
+  when T is Fp12:
+    type F = Fp2[T.Name]
+  else:
+    {.error: "Only compression of Fp12 extension is configured".}
+
+  var g: G2345[F]
+  g.fromFpk(f)
+
+  bench("Cyclotomic Compressed Squaring", T, iters):
+    g.cyclotomic_square_compressed()
+
+proc cyclotomicDecompression_Bench*(T: typedesc, iters: int) =
+  var f = rng.random_gt(T)
+
+  when T is Fp12:
+    type F = Fp2[T.Name]
+  else:
+    {.error: "Only compression of Fp12 extension is configured".}
+
+  var gs: array[1, G2345[F]]
+  gs[0].fromFpk(f)
+
+  var g1s_ratio: array[1, tuple[g1_num, g1_den: F]]
+  var g0s, g1s: array[1, F]
+
+  bench("Cyclotomic Decompression", T, iters):
+    recover_g1(g1s_ratio[0].g1_num, g1s_ratio[0].g1_den, gs[0])
+    g1s.batch_ratio_g1s(g1s_ratio)
+    g0s[0].recover_g0(g1s[0], gs[0])
+
+proc powVartimeBench*(T: typedesc, window: static int, iters: int) =
+  let x = rng.random_gt(T)
+  let exponent = rng.random_unsafe(BigInt[Fr[T.Name].bits()])
+  var r = x
+  bench("Field Exponentiation " & $exponent.bits & "-bit (window-" & $window & ", vartime)", T, iters):
+    r.pow_vartime(exponent, window)
+
+proc gtExp_sqrmul_vartimeBench*(T: typedesc, iters: int) =
+  let x = rng.random_gt(T)
+  let exponent = rng.random_unsafe(BigInt[Fr[T.Name].bits()])
+  var r {.noInit.}: T
+  bench("𝔾ₜ Exponentiation " & $exponent.bits & "-bit (cyclotomic square-multiply, vartime)", T, iters):
+    r.gtExp_sqrmul_vartime(x, exponent)
+
+proc gtExp_minHammingWeight_vartimeBench*(T: typedesc, iters: int) =
+  let x = rng.random_gt(T)
+  let exponent = rng.random_unsafe(BigInt[Fr[T.Name].bits()])
+  var r {.noInit.}: T
+  bench("𝔾ₜ Exponentiation " & $exponent.bits & "-bit (signed recoding, vartime)", T, iters):
+    r.gtExp_minHammingWeight_vartime(x, exponent)
+
+proc gtExp_wNAF_vartimeBench*(T: typedesc, window: static int, iters: int) =
+  let x = rng.random_gt(T)
+  let exponent = rng.random_unsafe(BigInt[Fr[T.Name].bits()])
+  var r {.noInit.}: T
+  bench("𝔾ₜ Exponentiation " & $exponent.bits & "-bit (wNAF-" & $window & ", vartime)", T, iters):
+    r.gtExp_minHammingWeight_windowed_vartime(x, exponent, window)
+
+proc gtExp_endo_wNAF_vartimeBench*(T: typedesc, window: static int, iters: int) =
+  let x = rng.random_gt(T)
+  let exponent = rng.random_unsafe(BigInt[Fr[T.Name].bits()])
+  var r {.noInit.}: T
+  bench("𝔾ₜ Exponentiation " & $exponent.bits & "-bit (endomorphism, wNAF-" & $window & ", vartime)", T, iters):
+    r.gtExpEndo_minHammingWeight_windowed_vartime(x, exponent, window)
diff --git a/constantine.nimble b/constantine.nimble
@@ -430,7 +430,7 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
   # ----------------------------------------------------------
   ("tests/math_elliptic_curves/t_ec_conversion.nim", false),
 
-  # Elliptic curve arithmetic G1
+  # Elliptic curve arithmetic 𝔾₁
   # ----------------------------------------------------------
   ("tests/math_elliptic_curves/t_ec_shortw_prj_g1_add_double.nim", false),
   # ("tests/math_elliptic_curves/t_ec_shortw_prj_g1_mul_sanity.nim", false),
@@ -458,7 +458,7 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
   ("tests/math_elliptic_curves/t_ec_twedwards_mul_endomorphism_bandersnatch", false),
 
 
-  # Elliptic curve arithmetic G2
+  # Elliptic curve arithmetic 𝔾₂
   # ----------------------------------------------------------
   # ("tests/math_elliptic_curves/t_ec_shortw_prj_g2_add_double_bn254_snarks.nim", false),
   # ("tests/math_elliptic_curves/t_ec_shortw_prj_g2_mul_sanity_bn254_snarks.nim", false),
@@ -551,7 +551,7 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
   # ("tests/math_pairings/t_pairing_bls12_381_gt_subgroup.nim", false),
   # ("tests/math_pairings/t_pairing_bw6_761_gt_subgroup.nim", false),
 
-  # Pairing
+  # Pairing &
   # ----------------------------------------------------------
   # ("tests/math_pairings/t_pairing_bls12_377_line_functions.nim", false),
   # ("tests/math_pairings/t_pairing_bls12_381_line_functions.nim", false),
@@ -562,6 +562,9 @@ const testDesc: seq[tuple[path: string, useGMP: bool]] = @[
   # ("tests/math_pairings/t_pairing_bls12_377_optate.nim", false),
   # ("tests/math_pairings/t_pairing_bls12_381_optate.nim", false),
 
+  ("tests/math_pairings/t_pairing_bn254_snarks_gt_exp.nim", false),
+  ("tests/math_pairings/t_pairing_bls12_381_gt_exp.nim", false),
+
   # Multi-Pairing
   # ----------------------------------------------------------
   ("tests/math_pairings/t_pairing_bn254_nogami_multi.nim", false),
@@ -650,6 +653,7 @@ const benchDesc = [
   "bench_pairing_bls12_381",
   "bench_pairing_bn254_nogami",
   "bench_pairing_bn254_snarks",
+  "bench_gt",
   "bench_summary_bls12_377",
   "bench_summary_bls12_381",
   "bench_summary_bn254_nogami",
@@ -977,25 +981,25 @@ task bench_fp6, "Run benchmark 𝔽p6 with your CC compiler":
 task bench_fp12, "Run benchmark 𝔽p12 with your CC compiler":
   runBench("bench_fp12")
 
-# Elliptic curve G1
+# Elliptic curve 𝔾₁
 # ------------------------------------------
 
 task bench_ec_g1, "Run benchmark on Elliptic Curve group 𝔾1 - CC compiler":
   runBench("bench_ec_g1")
 
-# Elliptic curve G1 - batch operations
+# Elliptic curve 𝔾₁ - batch operations
 # ------------------------------------------
 
 task bench_ec_g1_batch, "Run benchmark on Elliptic Curve group 𝔾1 (batch ops) - CC compiler":
   runBench("bench_ec_g1_batch")
 
-# Elliptic curve G1 - scalar multiplication
+# Elliptic curve 𝔾₁ - scalar multiplication
 # ------------------------------------------
 
 task bench_ec_g1_scalar_mul, "Run benchmark on Elliptic Curve group 𝔾1 (Scalar Multiplication) - CC compiler":
   runBench("bench_ec_g1_scalar_mul")
 
-# Elliptic curve G1 - Multi-scalar-mul
+# Elliptic curve 𝔾₁ - Multi-scalar-mul
 # ------------------------------------------
 
 task bench_ec_msm_pasta, "Run benchmark: Multi-Scalar-Mul for Pasta curves - CC compiler":
@@ -1014,18 +1018,24 @@ task bench_ec_msm_bandersnatch, "Run benchmark: Multi-Scalar-Mul for Bandersnatc
   runBench("bench_ec_msm_bandersnatch")
 
 
-# Elliptic curve G2
+# Elliptic curve 𝔾₂
 # ------------------------------------------
 
 task bench_ec_g2, "Run benchmark on Elliptic Curve group 𝔾2 - CC compiler":
   runBench("bench_ec_g2")
 
-# Elliptic curve G2 - scalar multiplication
+# Elliptic curve 𝔾₂ - scalar multiplication
 # ------------------------------------------
 
 task bench_ec_g2_scalar_mul, "Run benchmark on Elliptic Curve group 𝔾2 (Multi-Scalar-Mul) - CC compiler":
   runBench("bench_ec_g2_scalar_mul")
 
+# 𝔾ₜ
+# ------------------------------------------
+
+task bench_gt, "Run 𝔾ₜ benchmarks - CC compiler":
+  runBench("bench_gt")
+
 # Pairings
 # ------------------------------------------