Skip to content

Commit

Permalink
Pasta / Halo2 MSM bench (#243)
Browse files Browse the repository at this point in the history
* Pasta bench

* cleanup env variables

* [MSM]: generate benchmark coef-points pairs in parallel

* try to fix windows Ci

* add diagnostic info

* fix old test for new codecs/io primitives

* Ensure the projective point at infinity is not all zeros, but (0, 1, 0)
  • Loading branch information
mratsim authored Jun 4, 2023
1 parent 1325d24 commit 0eba593
Show file tree
Hide file tree
Showing 35 changed files with 250 additions and 278 deletions.
21 changes: 13 additions & 8 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,10 @@ jobs:
run: |
sudo dpkg --add-architecture i386
sudo apt-fast update -qq
# Try to fix "E: Unable to correct problems, you have held broken packages."
sudo apt-fast clean
sudo DEBIAN_FRONTEND='noninteractive' apt-fast install \
--no-install-recommends -yq \
gcc-multilib g++-multilib \
Expand Down Expand Up @@ -216,11 +220,12 @@ jobs:
nimble refresh --verbose -y
nimble install --verbose -y gmp jsony asynctools
- name: Print Nim version
if: runner.os != 'Windows'
- name: Print Nim & compiler versions
shell: bash
# gcc is an alias to Clang on MacOS
run: |
nim -v
gcc -v
- name: Run Constantine tests (UNIX with Assembly)
if: runner.os != 'Windows' && matrix.target.BACKEND == 'ASM'
Expand All @@ -235,9 +240,9 @@ jobs:
shell: bash
run: |
cd constantine
nimble bindings_no_asm --verbose
nimble test_bindings --verbose
nimble test_parallel_no_asm --verbose
CTT_ASM=0 nimble bindings --verbose
nimble test_bindings --verbose
CTT_ASM=0 nimble test_parallel --verbose
- name: Run Constantine tests (Windows with Assembly)
# So "test_bindings" uses C and can find GMP
# but nim-gmp cannot find GMP on Windows CI
Expand All @@ -255,6 +260,6 @@ jobs:
shell: msys2 {0}
run: |
cd constantine
nimble bindings_no_asm --verbose
nimble test_bindings --verbose
nimble test_parallel_no_gmp_no_asm --verbose
CTT_ASM=0 nimble bindings --verbose
nimble test_bindings --verbose
CTT_ASM=0 nimble test_parallel_no_gmp --verbose
2 changes: 1 addition & 1 deletion benchmarks/bench_blueprint.nim
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ echo " release: ", defined(release)
echo " danger: ", defined(danger)
echo " inline assembly: ", UseASM_X86_64

when (sizeof(int) == 4) or defined(Ctt32):
when (sizeof(int) == 4) or defined(CTT_32):
echo "⚠️ Warning: using Constantine with 32-bit limbs"
else:
echo "Using Constantine with 64-bit limbs"
Expand Down
50 changes: 50 additions & 0 deletions benchmarks/bench_ec_g1_msm_pasta.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.

import
# Internals
../constantine/math/config/curves,
../constantine/math/arithmetic,
../constantine/math/elliptic/[
ec_shortweierstrass_projective,
ec_shortweierstrass_jacobian],
# Helpers
../helpers/prng_unsafe,
./bench_elliptic_parallel_template

# ############################################################
#
# Benchmark of the G1 group of
# Short Weierstrass elliptic curves
# in (homogeneous) projective coordinates
#
# ############################################################


const Iters = 10_000
const AvailableCurves = [
Pallas, Vesta
]

# const testNumPoints = [10, 100, 1000, 10000, 100000]
# const testNumPoints = [64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072]
const testNumPoints = [1 shl 8, 1 shl 9, 1 shl 10, 1 shl 11, 1 shl 12, 1 shl 13, 1 shl 14, 1 shl 15, 1 shl 16, 1 shl 17, 1 shl 22]

proc main() =
separator()
staticFor i, 0, AvailableCurves.len:
const curve = AvailableCurves[i]
separator()
for numPoints in testNumPoints:
let batchIters = max(1, Iters div numPoints)
msmParallelBench(ECP_ShortW_Jac[Fp[curve], G1], numPoints, batchIters)
separator()
separator()

main()
notes()
1 change: 1 addition & 0 deletions benchmarks/bench_ec_g1_msm_pasta.nim.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--threads:on
35 changes: 28 additions & 7 deletions benchmarks/bench_elliptic_parallel_template.nim
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import
ec_multi_scalar_mul_parallel],
../constantine/math/constants/zoo_subgroups,
# Threadpool
../constantine/threadpool/threadpool,
../constantine/threadpool/[threadpool, partitioners],
# Helpers
../helpers/prng_unsafe,
./bench_elliptic_template,
Expand Down Expand Up @@ -55,11 +55,32 @@ proc msmParallelBench*(EC: typedesc, numPoints: int, iters: int) =
var points = newSeq[ECP_ShortW_Aff[EC.F, EC.G]](numPoints)
var scalars = newSeq[BigInt[bits]](numPoints)

for i in 0 ..< numPoints:
var tmp = rng.random_unsafe(EC)
tmp.clearCofactor()
points[i].affine(tmp)
scalars[i] = rng.random_unsafe(BigInt[bits])
# Creating millions of points and clearing their cofactor takes a long long time
var tp = Threadpool.new()

proc genCoefPointPairs(rngSeed: uint64, start, len: int, points: ptr ECP_ShortW_Aff[EC.F, EC.G], scalars: ptr BigInt[bits]) {.nimcall.} =
let points = cast[ptr UncheckedArray[ECP_ShortW_Aff[EC.F, EC.G]]](points) # TODO use views to reduce verbosity
let scalars = cast[ptr UncheckedArray[BigInt[bits]]](scalars)

# RNGs are not threadsafe, create a threadlocal one seeded from the global RNG
var threadRng: RngState
threadRng.seed(rngSeed)

for i in start ..< start + len:
var tmp = threadRng.random_unsafe(EC)
tmp.clearCofactor()
points[i].affine(tmp)
scalars[i] = rng.random_unsafe(BigInt[bits])

let chunks = balancedChunksPrioNumber(0, numPoints, tp.numThreads)

syncScope:
for (id, start, size) in items(chunks):
tp.spawn genCoefPointPairs(rng.next(), start, size, points[0].addr, scalars[0].addr)

# Even if child threads are sleeping, it seems like perf is lower when there are threads around
# maybe because the kernel has more overhead or time quantum to keep track off so shut them down.
tp.shutdown()

var r{.noInit.}: EC
var startNaive, stopNaive, startMSMbaseline, stopMSMbaseline, startMSMopt, stopMSMopt, startMSMpara, stopMSMpara: MonoTime
Expand Down Expand Up @@ -88,7 +109,7 @@ proc msmParallelBench*(EC: typedesc, numPoints: int, iters: int) =
stopMSMopt = getMonotime()

block:
var tp = Threadpool.new()
tp = Threadpool.new()

startMSMpara = getMonotime()
bench("EC multi-scalar-mul" & align($tp.numThreads & " threads", 11) & align($numPoints, 10) & " (" & $bits & "-bit coefs, points)", EC, iters):
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_elliptic_template.nim
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ macro fixEllipticDisplay(EC: typedesc): untyped =
var name = $instantiated[1][0] # EllipticEquationFormCoordinates
let fieldName = $instantiated[1][1][0]
let curveName = $Curve(instantiated[1][1][1].intVal)
name.add "[" & fieldName & "[" & curveName & ']'
name.add "[" & fieldName & "[" & curveName & "]]"
result = newLit name

proc report(op, elliptic: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) =
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_fp_double_precision.nim
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ echo " release: ", defined(release)
echo " danger: ", defined(danger)
echo " inline assembly: ", UseASM_X86_64

when (sizeof(int) == 4) or defined(Ctt32):
when (sizeof(int) == 4) or defined(CTT_32):
echo "⚠️ Warning: using Constantine with 32-bit limbs"
else:
echo "Using Constantine with 64-bit limbs"
Expand Down
8 changes: 4 additions & 4 deletions bindings_generators/constantine_bls12_381.nim
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ type
bls12381_fr = Fr[BLS12_381]
bls12381_fp = Fp[BLS12_381]
bls12381_fp2 = Fp2[BLS12_381]
bls12381_ec_g1_aff = ECP_ShortW_Aff[Fp[BLS12_381], G1]
bls12381_ec_g1_aff = ECP_ShortW_Aff[Fp[BLS12_381], G1]
bls12381_ec_g1_jac = ECP_ShortW_Jac[Fp[BLS12_381], G1]
bls12381_ec_g1_prj = ECP_ShortW_Prj[Fp[BLS12_381], G1]
bls12381_ec_g2_aff = ECP_ShortW_Aff[Fp2[BLS12_381], G2]
bls12381_ec_g2_aff = ECP_ShortW_Aff[Fp2[BLS12_381], G2]
bls12381_ec_g2_jac = ECP_ShortW_Jac[Fp2[BLS12_381], G2]
bls12381_ec_g2_prj = ECP_ShortW_Prj[Fp2[BLS12_381], G2]

Expand All @@ -33,9 +33,9 @@ collectBindings(cBindings):
genBindings_EC_ShortW_NonAffine(bls12381_ec_g2_prj, bls12381_ec_g2_aff, bls12381_fp2)

# Write header
when isMainModule and defined(CttGenerateHeaders):
when isMainModule and defined(CTT_GENERATE_HEADERS):
import std/[os, strformat]

proc main() =
# echo "Running bindings generation for " & getAppFilename().extractFilename()

Expand Down
8 changes: 4 additions & 4 deletions bindings_generators/constantine_pasta.nim
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ type
pallas_fp = Fp[Pallas]
vesta_fr = Fr[Vesta]
vesta_fp = Fp[Vesta]
pallas_ec_aff = ECP_ShortW_Aff[Fp[Pallas], G1]
pallas_ec_aff = ECP_ShortW_Aff[Fp[Pallas], G1]
pallas_ec_jac = ECP_ShortW_Jac[Fp[Pallas], G1]
pallas_ec_prj = ECP_ShortW_Prj[Fp[Pallas], G1]
vesta_ec_aff = ECP_ShortW_Aff[Fp[Vesta], G1]
vesta_ec_aff = ECP_ShortW_Aff[Fp[Vesta], G1]
vesta_ec_jac = ECP_ShortW_Jac[Fp[Vesta], G1]
vesta_ec_prj = ECP_ShortW_Prj[Fp[Vesta], G1]

Expand All @@ -35,9 +35,9 @@ collectBindings(cBindings):
genBindings_EC_ShortW_NonAffine(vesta_ec_prj, vesta_ec_aff, vesta_fp)

# Write header
when isMainModule and defined(CttGenerateHeaders):
when isMainModule and defined(CTT_GENERATE_HEADERS):
import std/[os, strformat]

proc main() =
# echo "Running bindings generation for " & getAppFilename().extractFilename()

Expand Down
2 changes: 1 addition & 1 deletion bindings_generators/gen_header.nim
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ macro collectBindings*(cBindingsStr: untyped, body: typed): untyped =

cBindings &= ");"

if defined(CttGenerateHeaders):
if defined(CTT_GENERATE_HEADERS):
result = newConstStmt(cBindingsStr, newLit cBindings)
else:
result = body
Loading

0 comments on commit 0eba593

Please sign in to comment.