Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pasta / Halo2 MSM bench #243

Merged
merged 7 commits into from
Jun 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,10 @@ jobs:
run: |
sudo dpkg --add-architecture i386
sudo apt-fast update -qq

# Try to fix "E: Unable to correct problems, you have held broken packages."
sudo apt-fast clean

sudo DEBIAN_FRONTEND='noninteractive' apt-fast install \
--no-install-recommends -yq \
gcc-multilib g++-multilib \
Expand Down Expand Up @@ -216,11 +220,12 @@ jobs:
nimble refresh --verbose -y
nimble install --verbose -y gmp jsony asynctools

- name: Print Nim version
if: runner.os != 'Windows'
- name: Print Nim & compiler versions
shell: bash
# gcc is an alias to Clang on MacOS
run: |
nim -v
gcc -v

- name: Run Constantine tests (UNIX with Assembly)
if: runner.os != 'Windows' && matrix.target.BACKEND == 'ASM'
Expand All @@ -235,9 +240,9 @@ jobs:
shell: bash
run: |
cd constantine
nimble bindings_no_asm --verbose
nimble test_bindings --verbose
nimble test_parallel_no_asm --verbose
CTT_ASM=0 nimble bindings --verbose
nimble test_bindings --verbose
CTT_ASM=0 nimble test_parallel --verbose
- name: Run Constantine tests (Windows with Assembly)
# So "test_bindings" uses C and can find GMP
# but nim-gmp cannot find GMP on Windows CI
Expand All @@ -255,6 +260,6 @@ jobs:
shell: msys2 {0}
run: |
cd constantine
nimble bindings_no_asm --verbose
nimble test_bindings --verbose
nimble test_parallel_no_gmp_no_asm --verbose
CTT_ASM=0 nimble bindings --verbose
nimble test_bindings --verbose
CTT_ASM=0 nimble test_parallel_no_gmp --verbose
2 changes: 1 addition & 1 deletion benchmarks/bench_blueprint.nim
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ echo " release: ", defined(release)
echo " danger: ", defined(danger)
echo " inline assembly: ", UseASM_X86_64

when (sizeof(int) == 4) or defined(Ctt32):
when (sizeof(int) == 4) or defined(CTT_32):
echo "⚠️ Warning: using Constantine with 32-bit limbs"
else:
echo "Using Constantine with 64-bit limbs"
Expand Down
50 changes: 50 additions & 0 deletions benchmarks/bench_ec_g1_msm_pasta.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Constantine
# Copyright (c) 2018-2019 Status Research & Development GmbH
# Copyright (c) 2020-Present Mamy André-Ratsimbazafy
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.

import
# Internals
../constantine/math/config/curves,
../constantine/math/arithmetic,
../constantine/math/elliptic/[
ec_shortweierstrass_projective,
ec_shortweierstrass_jacobian],
# Helpers
../helpers/prng_unsafe,
./bench_elliptic_parallel_template

# ############################################################
#
# Benchmark of the G1 group of
# Short Weierstrass elliptic curves
# in (homogeneous) projective coordinates
#
# ############################################################


const Iters = 10_000
const AvailableCurves = [
Pallas, Vesta
]

# const testNumPoints = [10, 100, 1000, 10000, 100000]
# const testNumPoints = [64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072]
const testNumPoints = [1 shl 8, 1 shl 9, 1 shl 10, 1 shl 11, 1 shl 12, 1 shl 13, 1 shl 14, 1 shl 15, 1 shl 16, 1 shl 17, 1 shl 22]

proc main() =
separator()
staticFor i, 0, AvailableCurves.len:
const curve = AvailableCurves[i]
separator()
for numPoints in testNumPoints:
let batchIters = max(1, Iters div numPoints)
msmParallelBench(ECP_ShortW_Jac[Fp[curve], G1], numPoints, batchIters)
separator()
separator()

main()
notes()
1 change: 1 addition & 0 deletions benchmarks/bench_ec_g1_msm_pasta.nim.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--threads:on
35 changes: 28 additions & 7 deletions benchmarks/bench_elliptic_parallel_template.nim
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import
ec_multi_scalar_mul_parallel],
../constantine/math/constants/zoo_subgroups,
# Threadpool
../constantine/threadpool/threadpool,
../constantine/threadpool/[threadpool, partitioners],
# Helpers
../helpers/prng_unsafe,
./bench_elliptic_template,
Expand Down Expand Up @@ -55,11 +55,32 @@ proc msmParallelBench*(EC: typedesc, numPoints: int, iters: int) =
var points = newSeq[ECP_ShortW_Aff[EC.F, EC.G]](numPoints)
var scalars = newSeq[BigInt[bits]](numPoints)

for i in 0 ..< numPoints:
var tmp = rng.random_unsafe(EC)
tmp.clearCofactor()
points[i].affine(tmp)
scalars[i] = rng.random_unsafe(BigInt[bits])
# Creating millions of points and clearing their cofactor takes a long long time
var tp = Threadpool.new()

proc genCoefPointPairs(rngSeed: uint64, start, len: int, points: ptr ECP_ShortW_Aff[EC.F, EC.G], scalars: ptr BigInt[bits]) {.nimcall.} =
let points = cast[ptr UncheckedArray[ECP_ShortW_Aff[EC.F, EC.G]]](points) # TODO use views to reduce verbosity
let scalars = cast[ptr UncheckedArray[BigInt[bits]]](scalars)

# RNGs are not threadsafe, create a threadlocal one seeded from the global RNG
var threadRng: RngState
threadRng.seed(rngSeed)

for i in start ..< start + len:
var tmp = threadRng.random_unsafe(EC)
tmp.clearCofactor()
points[i].affine(tmp)
scalars[i] = rng.random_unsafe(BigInt[bits])

let chunks = balancedChunksPrioNumber(0, numPoints, tp.numThreads)

syncScope:
for (id, start, size) in items(chunks):
tp.spawn genCoefPointPairs(rng.next(), start, size, points[0].addr, scalars[0].addr)

# Even if child threads are sleeping, it seems like perf is lower when there are threads around
# maybe because the kernel has more overhead or time quantum to keep track off so shut them down.
tp.shutdown()

var r{.noInit.}: EC
var startNaive, stopNaive, startMSMbaseline, stopMSMbaseline, startMSMopt, stopMSMopt, startMSMpara, stopMSMpara: MonoTime
Expand Down Expand Up @@ -88,7 +109,7 @@ proc msmParallelBench*(EC: typedesc, numPoints: int, iters: int) =
stopMSMopt = getMonotime()

block:
var tp = Threadpool.new()
tp = Threadpool.new()

startMSMpara = getMonotime()
bench("EC multi-scalar-mul" & align($tp.numThreads & " threads", 11) & align($numPoints, 10) & " (" & $bits & "-bit coefs, points)", EC, iters):
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_elliptic_template.nim
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ macro fixEllipticDisplay(EC: typedesc): untyped =
var name = $instantiated[1][0] # EllipticEquationFormCoordinates
let fieldName = $instantiated[1][1][0]
let curveName = $Curve(instantiated[1][1][1].intVal)
name.add "[" & fieldName & "[" & curveName & ']'
name.add "[" & fieldName & "[" & curveName & "]]"
result = newLit name

proc report(op, elliptic: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) =
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bench_fp_double_precision.nim
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ echo " release: ", defined(release)
echo " danger: ", defined(danger)
echo " inline assembly: ", UseASM_X86_64

when (sizeof(int) == 4) or defined(Ctt32):
when (sizeof(int) == 4) or defined(CTT_32):
echo "⚠️ Warning: using Constantine with 32-bit limbs"
else:
echo "Using Constantine with 64-bit limbs"
Expand Down
8 changes: 4 additions & 4 deletions bindings_generators/constantine_bls12_381.nim
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ type
bls12381_fr = Fr[BLS12_381]
bls12381_fp = Fp[BLS12_381]
bls12381_fp2 = Fp2[BLS12_381]
bls12381_ec_g1_aff = ECP_ShortW_Aff[Fp[BLS12_381], G1]
bls12381_ec_g1_aff = ECP_ShortW_Aff[Fp[BLS12_381], G1]
bls12381_ec_g1_jac = ECP_ShortW_Jac[Fp[BLS12_381], G1]
bls12381_ec_g1_prj = ECP_ShortW_Prj[Fp[BLS12_381], G1]
bls12381_ec_g2_aff = ECP_ShortW_Aff[Fp2[BLS12_381], G2]
bls12381_ec_g2_aff = ECP_ShortW_Aff[Fp2[BLS12_381], G2]
bls12381_ec_g2_jac = ECP_ShortW_Jac[Fp2[BLS12_381], G2]
bls12381_ec_g2_prj = ECP_ShortW_Prj[Fp2[BLS12_381], G2]

Expand All @@ -33,9 +33,9 @@ collectBindings(cBindings):
genBindings_EC_ShortW_NonAffine(bls12381_ec_g2_prj, bls12381_ec_g2_aff, bls12381_fp2)

# Write header
when isMainModule and defined(CttGenerateHeaders):
when isMainModule and defined(CTT_GENERATE_HEADERS):
import std/[os, strformat]

proc main() =
# echo "Running bindings generation for " & getAppFilename().extractFilename()

Expand Down
8 changes: 4 additions & 4 deletions bindings_generators/constantine_pasta.nim
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ type
pallas_fp = Fp[Pallas]
vesta_fr = Fr[Vesta]
vesta_fp = Fp[Vesta]
pallas_ec_aff = ECP_ShortW_Aff[Fp[Pallas], G1]
pallas_ec_aff = ECP_ShortW_Aff[Fp[Pallas], G1]
pallas_ec_jac = ECP_ShortW_Jac[Fp[Pallas], G1]
pallas_ec_prj = ECP_ShortW_Prj[Fp[Pallas], G1]
vesta_ec_aff = ECP_ShortW_Aff[Fp[Vesta], G1]
vesta_ec_aff = ECP_ShortW_Aff[Fp[Vesta], G1]
vesta_ec_jac = ECP_ShortW_Jac[Fp[Vesta], G1]
vesta_ec_prj = ECP_ShortW_Prj[Fp[Vesta], G1]

Expand All @@ -35,9 +35,9 @@ collectBindings(cBindings):
genBindings_EC_ShortW_NonAffine(vesta_ec_prj, vesta_ec_aff, vesta_fp)

# Write header
when isMainModule and defined(CttGenerateHeaders):
when isMainModule and defined(CTT_GENERATE_HEADERS):
import std/[os, strformat]

proc main() =
# echo "Running bindings generation for " & getAppFilename().extractFilename()

Expand Down
2 changes: 1 addition & 1 deletion bindings_generators/gen_header.nim
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ macro collectBindings*(cBindingsStr: untyped, body: typed): untyped =

cBindings &= ");"

if defined(CttGenerateHeaders):
if defined(CTT_GENERATE_HEADERS):
result = newConstStmt(cBindingsStr, newLit cBindings)
else:
result = body
Loading