Skip to content

Commit

Permalink
feat(gt-multiexp): add baseline multi-exponentiation on 𝔾ₜ based on B…
Browse files Browse the repository at this point in the history
…DLO12
  • Loading branch information
mratsim committed Jul 16, 2024
1 parent 4f31dcb commit 8fb61c6
Show file tree
Hide file tree
Showing 20 changed files with 563 additions and 40 deletions.
1 change: 0 additions & 1 deletion benchmarks/bench_ec_msm_bandersnatch.nimcfg

This file was deleted.

1 change: 0 additions & 1 deletion benchmarks/bench_ec_msm_bls12_381_g1.nim.cfg

This file was deleted.

1 change: 0 additions & 1 deletion benchmarks/bench_ec_msm_bls12_381_g2.nim.cfg

This file was deleted.

1 change: 0 additions & 1 deletion benchmarks/bench_ec_msm_bn254_snarks_g1.nim.cfg

This file was deleted.

1 change: 0 additions & 1 deletion benchmarks/bench_ec_msm_pasta.nim.cfg

This file was deleted.

4 changes: 2 additions & 2 deletions benchmarks/bench_elliptic_parallel_template.nim
Original file line number Diff line number Diff line change
Expand Up @@ -115,15 +115,15 @@ proc msmParallelBench*[EC](ctx: var BenchMsmContext[EC], numInputs: int, iters:
var startNaive, stopNaive, startMSMbaseline, stopMSMbaseline, startMSMopt, stopMSMopt, startMSMpara, stopMSMpara: MonoTime

if numInputs <= 100000:
startNaive = getMonotime()
# startNaive = getMonotime()
bench("EC scalar muls " & align($numInputs, 10) & " (" & $bits & "-bit coefs, points)", EC, iters):
var tmp: EC
r.setNeutral()
for i in 0 ..< points.len:
tmp.fromAffine(points[i])
tmp.scalarMul(coefs[i])
r += tmp
stopNaive = getMonotime()
# stopNaive = getMonotime()

if numInputs <= 100000:
startNaive = getMonotime()
Expand Down
File renamed without changes.
1 change: 0 additions & 1 deletion benchmarks/bench_gt.nim
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ proc main() =
separator()
staticFor i, 0, AvailableCurves.len:
const curve = AvailableCurves[i]
const bits = Fr[curve].bits()
separator()
mulBench(Fp12[curve], Iters)
sqrBench(Fp12[curve], Iters)
Expand Down
46 changes: 46 additions & 0 deletions benchmarks/bench_gt_multiexp_bls12_381.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.

import
# Internals
constantine/named/algebras,
constantine/math/extension_fields,
# Helpers
./bench_gt_parallel_template

# ############################################################
#
# Benchmark of the 𝔾ₜ group of
# Pairing Friendly curves
#
# ############################################################

const Iters = 10000
const AvailableCurves = [
# BN254_Nogami,
# BN254_Snarks,
# BLS12_377,
BLS12_381,
]

const testNumPoints = [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]

proc main() =
separator()
staticFor i, 0, AvailableCurves.len:
const curve = AvailableCurves[i]
var ctx = createBenchMultiExpContext(Fp12[curve], testNumPoints)
separator()
for numPoints in testNumPoints:
let batchIters = max(1, Iters div numPoints)
ctx.multiExpParallelBench(numPoints, batchIters)
separator()
separator()

main()
notes()
1 change: 1 addition & 0 deletions benchmarks/bench_gt_multiexp_bls12_381.nim.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--threads:on
162 changes: 162 additions & 0 deletions benchmarks/bench_gt_parallel_template.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.

# ############################################################
#
# Summary of the performance of a curve
#
# ############################################################

import
# Standard library
std/[monotimes, times],
# Internals
constantine/platforms/abstractions,
constantine/named/algebras,
constantine/math/[arithmetic, extension_fields],
constantine/math/pairings/[
pairings_generic,
gt_exponentiations,
gt_exponentiations_vartime,
gt_multiexp
],
constantine/threadpool,
# Helpers
helpers/prng_unsafe,
./bench_blueprint

export times, monotimes
export notes
export abstractions
proc separator*() = separator(168)

proc report(op, domain: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) =
let ns = inNanoseconds((stop-start) div iters)
let throughput = 1e9 / float64(ns)
when SupportsGetTicks:
echo &"{op:<68} {domain:<20} {throughput:>15.3f} ops/s {ns:>9} ns/op {(stopClk - startClk) div iters:>9} CPU cycles (approx)"
else:
echo &"{op:<68} {domain:<20} {throughput:>15.3f} ops/s {ns:>9} ns/op"

macro fixFieldDisplay(T: typedesc): untyped =
# At compile-time, enums are integers and their display is buggy
# we get the Curve ID instead of the curve name.
let instantiated = T.getTypeInst()
var name = $instantiated[1][0] # Fp
name.add "[" & $Algebra(instantiated[1][1].intVal) & "]"
result = newLit name

func fixDisplay(T: typedesc): string =
when T is (Fp or Fp2 or Fp4 or Fp6 or Fp12):
fixFieldDisplay(T)
else:
$T

func fixDisplay(T: Algebra): string =
$T

template bench(op: string, T: typed, iters: int, body: untyped): untyped =
measure(iters, startTime, stopTime, startClk, stopClk, body)
report(op, fixDisplay(T), startTime, stopTime, startClk, stopClk, iters)

func random_gt*(rng: var RngState, F: typedesc): F {.inline, noInit.} =
result = rng.random_unsafe(F)
result.finalExp()

# Multi-exponentiations
# ---------------------------------------------------------------------------

type BenchMultiexpContext*[GT] = object
tp: Threadpool
numInputs: int
elems: seq[GT]
exponents: seq[getBigInt(GT.Name, kScalarField)]

proc createBenchMultiExpContext*(GT: typedesc, inputSizes: openArray[int]): BenchMultiexpContext[GT] =
result.tp = Threadpool.new()
let maxNumInputs = inputSizes.max()

const bits = Fr[GT.Name].bits()

result.numInputs = maxNumInputs
result.elems = newSeq[GT](maxNumInputs)
result.exponents = newSeq[BigInt[bits]](maxNumInputs)

proc genElemExponentPairsChunk[GT](rngSeed: uint64, start, len: int, elems: ptr GT, exponents: ptr BigInt[bits]) {.nimcall.} =
let elems = cast[ptr UncheckedArray[GT]](elems)
let exponents = cast[ptr UncheckedArray[BigInt[bits]]](exponents)

# RNGs are not threadsafe, create a threadlocal one seeded from the global RNG
var threadRng: RngState
threadRng.seed(rngSeed)

for i in start ..< start + len:
elems[i] = threadRng.random_gt(GT)
exponents[i] = threadRng.random_unsafe(BigInt[bits])

let chunks = balancedChunksPrioNumber(0, maxNumInputs, result.tp.numThreads)

stdout.write &"Generating {maxNumInputs} (elems, exponents) pairs ... "
stdout.flushFile()

let start = getMonotime()

syncScope:
for (id, start, size) in items(chunks):
result.tp.spawn genElemExponentPairsChunk(rng.next(), start, size, result.elems[0].addr, result.exponents[0].addr)

# Even if child threads are sleeping, it seems like perf is lower when there are threads around
# maybe because the kernel has more overhead or time quantum to keep track off so shut them down.
result.tp.shutdown()

let stop = getMonotime()
stdout.write &"in {float64(inNanoSeconds(stop-start)) / 1e6:6.3f} ms\n"

proc multiExpParallelBench*[GT](ctx: var BenchMultiExpContext[GT], numInputs: int, iters: int) =
const bits = Fr[GT.Name].bits()

template elems: untyped = ctx.elems.toOpenArray(0, numInputs-1)
template exponents: untyped = ctx.exponents.toOpenArray(0, numInputs-1)


var r{.noInit.}: GT
var startNaive, stopNaive, startMultiExpBaseline, stopMultiExpBaseline: MonoTime

if numInputs <= 100000:
# startNaive = getMonotime()
bench("𝔾ₜ exponentiations " & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters):
var tmp: GT
r.setOne()
for i in 0 ..< elems.len:
tmp.gtExp(elems[i], exponents[i])
r *= tmp
# stopNaive = getMonotime()

if numInputs <= 100000:
startNaive = getMonotime()
bench("𝔾ₜ exponentiations vartime " & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters):
var tmp: GT
r.setOne()
for i in 0 ..< elems.len:
tmp.gtExp_vartime(elems[i], exponents[i])
r *= tmp
stopNaive = getMonotime()

if numInputs <= 100000:
startMultiExpBaseline = getMonotime()
bench("𝔾ₜ multi-exponentiations baseline " & align($numInputs, 10) & " (" & $bits & "-bit exponents)", GT, iters):
r.multiExp_reference_vartime(elems, exponents)
stopMultiExpBaseline = getMonotime()


let perfNaive = inNanoseconds((stopNaive-startNaive) div iters)
let perfMSMbaseline = inNanoseconds((stopMultiExpBaseline-startMultiExpBaseline) div iters)

if numInputs <= 100000:
let speedupBaseline = float(perfNaive) / float(perfMSMbaseline)
echo &"Speedup ratio baseline over naive linear combination: {speedupBaseline:>6.3f}x"
1 change: 0 additions & 1 deletion benchmarks/bench_gt_template.nim
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import
helpers/prng_unsafe,
./bench_blueprint


export notes
export abstractions
proc separator*() = separator(168)
Expand Down
7 changes: 7 additions & 0 deletions constantine.nimble
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,7 @@ const benchDesc = [
"bench_pairing_bn254_nogami",
"bench_pairing_bn254_snarks",
"bench_gt",
"bench_gt_multiexp_bls12_381",
"bench_summary_bls12_377",
"bench_summary_bls12_381",
"bench_summary_bn254_nogami",
Expand Down Expand Up @@ -1036,6 +1037,12 @@ task bench_ec_g2_scalar_mul, "Run benchmark on Elliptic Curve group 𝔾2 (Multi
task bench_gt, "Run 𝔾ₜ benchmarks - CC compiler":
runBench("bench_gt")

# 𝔾ₜ - multi-exponentiation
# ------------------------------------------

task bench_gt_multiexp_bls12_381, "Run 𝔾ₜ multiexponentiation benchmarks for BLS12-381 - CC compiler":
runBench("bench_gt_multiexp_bls12_381")

# Pairings
# ------------------------------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ func bestBucketBitSize*(inputSize: int, scalarBitwidth: static int, useSignedBuc
# L1, L2 caches, TLB and 64 aliasing conflict
# are not taken into account in previous formula.
# Each increase in c doubles memory used.
# TODO: use the element size for thresholds.
when useManualTuning:
if 14 <= result:
result -= 1
Expand Down
14 changes: 7 additions & 7 deletions constantine/math/pairings/cyclotomic_subgroups.nim
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,9 @@ func cyclotomic_square_cube_over_quad(r: var CubicExt, a: CubicExt) =
# https://eprint.iacr.org/2009/565.pdf

# Cubic extension field
# A = 3a² − 2 ̄a
# B = 3 √i c² + 2 ̄b
# C = 3b² − 2 ̄c
# A = 3a² − 2a̅
# B = 3 √i c² + 2b̅
# C = 3b² − 2c̅
var v0{.noInit.}, v1{.noInit.}, v2{.noInit.}: typeof(a.c0)

template a0: untyped = a.c0.c0
Expand Down Expand Up @@ -261,7 +261,7 @@ func cyclotomic_square_cube_over_quad(r: var CubicExt, a: CubicExt) =
r.c2.c1.double()
r.c2.c1 += v1.c1

# Now B = 3 √i c² + 2 ̄b
# Now B = 3 √i c² + 2b̅
# beware of mul by non residue: √i v₂ = ξv₂₁ + v₂₀√i

# 3 (√i c²)₀ + 2a₂
Expand Down Expand Up @@ -291,9 +291,9 @@ func cyclotomic_square_quad_over_cube[F](r: var QuadraticExt[F], a: QuadraticExt
# c₅ <=> a₅ <=> b₅
#
# Hence, this formula for a cubic extension field
# A = 3a² − 2 ̄a
# B = 3 √i c² + 2 ̄b
# C = 3b² − 2 ̄c
# A = 3a² − 2a̅
# B = 3 √i c² + 2b̅
# C = 3b² − 2c̅
#
# becomes
# A = (b₀, b₄) = 3(b₀, b₄)² - 2(b₀,-b₄)
Expand Down
Loading

0 comments on commit 8fb61c6

Please sign in to comment.