From 0f8f289645a0af93f004807997b6d8303b068513 Mon Sep 17 00:00:00 2001 From: Mamy Ratsimbazafy Date: Sat, 27 Jul 2024 16:25:21 +0200 Subject: [PATCH] feat(bench): check overhead of field calls --- .../bench_elliptic_parallel_template.nim | 6 +-- benchmarks/bench_elliptic_template.nim | 36 ++++++------- benchmarks/bench_fields_template.nim | 53 +++++++++++-------- benchmarks/bench_fp.nim | 1 + 4 files changed, 53 insertions(+), 43 deletions(-) diff --git a/benchmarks/bench_elliptic_parallel_template.nim b/benchmarks/bench_elliptic_parallel_template.nim index a6a7c77b..ce048534 100644 --- a/benchmarks/bench_elliptic_parallel_template.nim +++ b/benchmarks/bench_elliptic_parallel_template.nim @@ -35,7 +35,7 @@ export bench_elliptic_template # # ############################################################ -proc multiAddParallelBench*(EC: typedesc, numInputs: int, iters: int) = +proc multiAddParallelBench*(EC: typedesc, numInputs: int, iters: int) {.noinline.} = var points = newSeq[EC_ShortW_Aff[EC.F, EC.G]](numInputs) for i in 0 ..< numInputs: @@ -59,7 +59,7 @@ type BenchMsmContext*[EC] = object coefs: seq[BigInt[64]] # seq[getBigInt(EC.getName(), kScalarField)] points: seq[affine(EC)] -proc createBenchMsmContext*(EC: typedesc, inputSizes: openArray[int]): BenchMsmContext[EC] = +proc createBenchMsmContext*(EC: typedesc, inputSizes: openArray[int]): BenchMsmContext[EC] {.noinline.} = result.tp = Threadpool.new() let maxNumInputs = inputSizes.max() @@ -103,7 +103,7 @@ proc createBenchMsmContext*(EC: typedesc, inputSizes: openArray[int]): BenchMsmC let stop = getMonotime() stdout.write &"in {float64(inNanoSeconds(stop-start)) / 1e6:6.3f} ms\n" -proc msmParallelBench*[EC](ctx: var BenchMsmContext[EC], numInputs: int, iters: int) = +proc msmParallelBench*[EC](ctx: var BenchMsmContext[EC], numInputs: int, iters: int) {.noinline.} = const bits = 64 # EC.getScalarField().bits() type ECaff = affine(EC) diff --git a/benchmarks/bench_elliptic_template.nim b/benchmarks/bench_elliptic_template.nim index 13aca484..cbd20624 100644 --- a/benchmarks/bench_elliptic_template.nim +++ b/benchmarks/bench_elliptic_template.nim @@ -72,7 +72,7 @@ func `+=`[F; G: static Subgroup](P: var EC_ShortW_JacExt[F, G], Q: EC_ShortW_Jac func `+=`[F; G: static Subgroup](P: var EC_ShortW_JacExt[F, G], Q: EC_ShortW_Aff[F, G]) {.inline.}= P.mixedSum_vartime(P, Q) -proc addBench*(EC: typedesc, iters: int) = +proc addBench*(EC: typedesc, iters: int) {.noinline.} = var r {.noInit.}: EC let P = rng.random_unsafe(EC) let Q = rng.random_unsafe(EC) @@ -88,7 +88,7 @@ proc addBench*(EC: typedesc, iters: int) = bench("EC Add vartime " & $EC.G, EC, iters): r.sum_vartime(P, Q) -proc mixedAddBench*(EC: typedesc, iters: int) = +proc mixedAddBench*(EC: typedesc, iters: int) {.noinline.} = var r {.noInit.}: EC let P = rng.random_unsafe(EC) let Q = rng.random_unsafe(EC) @@ -106,25 +106,25 @@ proc mixedAddBench*(EC: typedesc, iters: int) = bench("EC Mixed Addition vartime " & $EC.G, EC, iters): r.mixedSum_vartime(P, Qaff) -proc doublingBench*(EC: typedesc, iters: int) = +proc doublingBench*(EC: typedesc, iters: int) {.noinline.} = var r {.noInit.}: EC let P = rng.random_unsafe(EC) bench("EC Double " & $EC.G, EC, iters): r.double(P) -proc affFromProjBench*(EC: typedesc, iters: int) = +proc affFromProjBench*(EC: typedesc, iters: int) {.noinline.} = var r {.noInit.}: EC_ShortW_Aff[EC.F, EC.G] let P = rng.random_unsafe(EC) bench("EC Projective to Affine " & $EC.G, EC, iters): r.affine(P) -proc affFromJacBench*(EC: typedesc, iters: int) = +proc affFromJacBench*(EC: typedesc, iters: int) {.noinline.} = var r {.noInit.}: EC_ShortW_Aff[EC.F, EC.G] let P = rng.random_unsafe(EC) bench("EC Jacobian to Affine " & $EC.G, EC, iters): r.affine(P) -proc affFromProjBatchBench*(EC: typedesc, numPoints: int, useBatching: bool, iters: int) = +proc affFromProjBatchBench*(EC: typedesc, numPoints: int, useBatching: bool, iters: int) {.noinline.} = var r = newSeq[affine(EC)](numPoints) var points = newSeq[EC](numPoints) @@ -139,7 +139,7 @@ proc affFromProjBatchBench*(EC: typedesc, numPoints: int, useBatching: bool, ite for i in 0 ..< numPoints: r[i].affine(points[i]) -proc affFromJacBatchBench*(EC: typedesc, numPoints: int, useBatching: bool, iters: int) = +proc affFromJacBatchBench*(EC: typedesc, numPoints: int, useBatching: bool, iters: int) {.noinline.} = var r = newSeq[affine(EC)](numPoints) var points = newSeq[EC](numPoints) @@ -154,7 +154,7 @@ proc affFromJacBatchBench*(EC: typedesc, numPoints: int, useBatching: bool, iter for i in 0 ..< numPoints: r[i].affine(points[i]) -proc scalarMulGenericBench*(EC: typedesc, bits, window: static int, iters: int) = +proc scalarMulGenericBench*(EC: typedesc, bits, window: static int, iters: int) {.noinline.} = var r {.noInit.}: EC var P = rng.random_unsafe(EC) P.clearCofactor() @@ -165,7 +165,7 @@ proc scalarMulGenericBench*(EC: typedesc, bits, window: static int, iters: int) r = P r.scalarMulGeneric(exponent, window) -proc scalarMulEndo*(EC: typedesc, bits: static int, iters: int) = +proc scalarMulEndo*(EC: typedesc, bits: static int, iters: int) {.noinline.} = var r {.noInit.}: EC var P = rng.random_unsafe(EC) P.clearCofactor() @@ -176,7 +176,7 @@ proc scalarMulEndo*(EC: typedesc, bits: static int, iters: int) = r = P r.scalarMulEndo(exponent) -proc scalarMulEndoWindow*(EC: typedesc, bits: static int, iters: int) = +proc scalarMulEndoWindow*(EC: typedesc, bits: static int, iters: int) {.noinline.} = var r {.noInit.}: EC var P = rng.random_unsafe(EC) P.clearCofactor() @@ -190,7 +190,7 @@ proc scalarMulEndoWindow*(EC: typedesc, bits: static int, iters: int) = else: {.error: "Not implemented".} -proc scalarMulVartimeDoubleAddBench*(EC: typedesc, bits: static int, iters: int) = +proc scalarMulVartimeDoubleAddBench*(EC: typedesc, bits: static int, iters: int) {.noinline.} = var r {.noInit.}: EC var P = rng.random_unsafe(EC) P.clearCofactor() @@ -201,7 +201,7 @@ proc scalarMulVartimeDoubleAddBench*(EC: typedesc, bits: static int, iters: int) r = P r.scalarMul_doubleAdd_vartime(exponent) -proc scalarMulVartimeMinHammingWeightRecodingBench*(EC: typedesc, bits: static int, iters: int) = +proc scalarMulVartimeMinHammingWeightRecodingBench*(EC: typedesc, bits: static int, iters: int) {.noinline.} = var r {.noInit.}: EC var P = rng.random_unsafe(EC) P.clearCofactor() @@ -212,7 +212,7 @@ proc scalarMulVartimeMinHammingWeightRecodingBench*(EC: typedesc, bits: static i r = P r.scalarMul_jy00_vartime(exponent) -proc scalarMulVartimeWNAFBench*(EC: typedesc, bits, window: static int, iters: int) = +proc scalarMulVartimeWNAFBench*(EC: typedesc, bits, window: static int, iters: int) {.noinline.} = var r {.noInit.}: EC var P = rng.random_unsafe(EC) P.clearCofactor() @@ -223,7 +223,7 @@ proc scalarMulVartimeWNAFBench*(EC: typedesc, bits, window: static int, iters: i r = P r.scalarMul_wNAF_vartime(exponent, window) -proc scalarMulVartimeEndoWNAFBench*(EC: typedesc, bits, window: static int, iters: int) = +proc scalarMulVartimeEndoWNAFBench*(EC: typedesc, bits, window: static int, iters: int) {.noinline.} = var r {.noInit.}: EC var P = rng.random_unsafe(EC) P.clearCofactor() @@ -234,14 +234,14 @@ proc scalarMulVartimeEndoWNAFBench*(EC: typedesc, bits, window: static int, iter r = P r.scalarMulEndo_wNAF_vartime(exponent, window) -proc subgroupCheckBench*(EC: typedesc, iters: int) = +proc subgroupCheckBench*(EC: typedesc, iters: int) {.noinline.} = var P = rng.random_unsafe(EC) P.clearCofactor() bench("Subgroup check", EC, iters): discard P.isInSubgroup() -proc subgroupCheckScalarMulVartimeEndoWNAFBench*(EC: typedesc, bits, window: static int, iters: int) = +proc subgroupCheckScalarMulVartimeEndoWNAFBench*(EC: typedesc, bits, window: static int, iters: int) {.noinline.} = var r {.noInit.}: EC var P = rng.random_unsafe(EC) P.clearCofactor() @@ -253,7 +253,7 @@ proc subgroupCheckScalarMulVartimeEndoWNAFBench*(EC: typedesc, bits, window: sta discard r.isInSubgroup() r.scalarMulEndo_wNAF_vartime(exponent, window) -proc multiAddBench*(EC: typedesc, numPoints: int, useBatching: bool, iters: int) = +proc multiAddBench*(EC: typedesc, numPoints: int, useBatching: bool, iters: int) {.noinline.} = var points = newSeq[EC_ShortW_Aff[EC.F, EC.G]](numPoints) for i in 0 ..< numPoints: @@ -271,7 +271,7 @@ proc multiAddBench*(EC: typedesc, numPoints: int, useBatching: bool, iters: int) r += points[i] -proc msmBench*(EC: typedesc, numPoints: int, iters: int) = +proc msmBench*(EC: typedesc, numPoints: int, iters: int) {.noinline.} = const bits = EC.getScalarField().bits() var points = newSeq[EC_ShortW_Aff[EC.F, EC.G]](numPoints) var scalars = newSeq[BigInt[bits]](numPoints) diff --git a/benchmarks/bench_fields_template.nim b/benchmarks/bench_fields_template.nim index 5ad318e2..f919c6fc 100644 --- a/benchmarks/bench_fields_template.nim +++ b/benchmarks/bench_fields_template.nim @@ -61,37 +61,46 @@ func random_unsafe(rng: var RngState, a: var ExtensionField2x) = for i in 0 ..< a.coords.len: rng.random_unsafe(a.coords[i]) -proc addBench*(T: typedesc, iters: int) = +proc addBench*(T: typedesc, iters: int) {.noinline.} = var x = rng.random_unsafe(T) let y = rng.random_unsafe(T) bench("Addition", T, iters): x += y -proc subBench*(T: typedesc, iters: int) = +proc add10Bench*(T: typedesc, iters: int) {.noinline.} = + var xs: array[10, T] + for x in xs.mitems(): + x = rng.random_unsafe(T) + let y = rng.random_unsafe(T) + bench("Additions (10)", T, iters): + staticFor i, 0, 10: + xs[i] += y + +proc subBench*(T: typedesc, iters: int) {.noinline.} = var x = rng.random_unsafe(T) let y = rng.random_unsafe(T) preventOptimAway(x) bench("Substraction", T, iters): x -= y -proc negBench*(T: typedesc, iters: int) = +proc negBench*(T: typedesc, iters: int) {.noinline.} = var r: T let x = rng.random_unsafe(T) bench("Negation", T, iters): r.neg(x) -proc ccopyBench*(T: typedesc, iters: int) = +proc ccopyBench*(T: typedesc, iters: int) {.noinline.} = var r: T let x = rng.random_unsafe(T) bench("Conditional Copy", T, iters): r.ccopy(x, CtFalse) -proc div2Bench*(T: typedesc, iters: int) = +proc div2Bench*(T: typedesc, iters: int) {.noinline.} = var x = rng.random_unsafe(T) bench("Division by 2", T, iters): x.div2() -proc mulBench*(T: typedesc, iters: int) = +proc mulBench*(T: typedesc, iters: int) {.noinline.} = var r: T let x = rng.random_unsafe(T) let y = rng.random_unsafe(T) @@ -99,14 +108,14 @@ proc mulBench*(T: typedesc, iters: int) = bench("Multiplication", T, iters): r.prod(x, y) -proc sqrBench*(T: typedesc, iters: int) = +proc sqrBench*(T: typedesc, iters: int) {.noinline.} = var r: T let x = rng.random_unsafe(T) preventOptimAway(r) bench("Squaring", T, iters): r.square(x) -proc mul2xUnrBench*(T: typedesc, iters: int) = +proc mul2xUnrBench*(T: typedesc, iters: int) {.noinline.} = var r: doublePrec(T) let x = rng.random_unsafe(T) let y = rng.random_unsafe(T) @@ -114,14 +123,14 @@ proc mul2xUnrBench*(T: typedesc, iters: int) = bench("Multiplication 2x unreduced", T, iters): r.prod2x(x, y) -proc sqr2xUnrBench*(T: typedesc, iters: int) = +proc sqr2xUnrBench*(T: typedesc, iters: int) {.noinline.} = var r: doublePrec(T) let x = rng.random_unsafe(T) preventOptimAway(r) bench("Squaring 2x unreduced", T, iters): r.square2x(x) -proc rdc2xBench*(T: typedesc, iters: int) = +proc rdc2xBench*(T: typedesc, iters: int) {.noinline.} = var r: T var t: doublePrec(T) rng.random_unsafe(t) @@ -129,7 +138,7 @@ proc rdc2xBench*(T: typedesc, iters: int) = bench("Redc 2x", T, iters): r.redc2x(t) -proc sumprodBench*(T: typedesc, iters: int) = +proc sumprodBench*(T: typedesc, iters: int) {.noinline.} = var r: T let a = rng.random_unsafe(T) let b = rng.random_unsafe(T) @@ -139,40 +148,40 @@ proc sumprodBench*(T: typedesc, iters: int) = bench("Linear combination", T, iters): r.sumprod([a, b], [u, v]) -proc toBigBench*(T: typedesc, iters: int) = +proc toBigBench*(T: typedesc, iters: int) {.noinline.} = var r: T.getBigInt() let x = rng.random_unsafe(T) preventOptimAway(r) bench("BigInt <- field conversion", T, iters): r.fromField(x) -proc toFieldBench*(T: typedesc, iters: int) = +proc toFieldBench*(T: typedesc, iters: int) {.noinline.} = var r: T let x = rng.random_unsafe(T.getBigInt()) preventOptimAway(r) bench("BigInt -> field conversion", T, iters): r.fromBig(x) -proc invBench*(T: typedesc, iters: int) = +proc invBench*(T: typedesc, iters: int) {.noinline.} = var r: T let x = rng.random_unsafe(T) preventOptimAway(r) bench("Inversion (constant-time)", T, iters): r.inv(x) -proc invVartimeBench*(T: typedesc, iters: int) = +proc invVartimeBench*(T: typedesc, iters: int) {.noinline.} = var r: T let x = rng.random_unsafe(T) preventOptimAway(r) bench("Inversion (variable-time)", T, iters): r.inv_vartime(x) -proc isSquareBench*(T: typedesc, iters: int) = +proc isSquareBench*(T: typedesc, iters: int) {.noinline.} = let x = rng.random_unsafe(T) bench("isSquare (constant-time)", T, iters): let qrt = x.isSquare() -proc sqrtBench*(T: typedesc, iters: int) = +proc sqrtBench*(T: typedesc, iters: int) {.noinline.} = let x = rng.random_unsafe(T) const algoType = block: @@ -192,14 +201,14 @@ proc sqrtBench*(T: typedesc, iters: int) = var r = x discard r.sqrt_if_square() -proc sqrtRatioBench*(T: typedesc, iters: int) = +proc sqrtRatioBench*(T: typedesc, iters: int) {.noinline.} = var r: T let u = rng.random_unsafe(T) let v = rng.random_unsafe(T) bench("Fused SquareRoot+Division+isSquare sqrt(u/v)", T, iters): let isSquare = r.sqrt_ratio_if_square(u, v) -proc sqrtVartimeBench*(T: typedesc, iters: int) = +proc sqrtVartimeBench*(T: typedesc, iters: int) {.noinline.} = let x = rng.random_unsafe(T) const algoType = block: @@ -219,21 +228,21 @@ proc sqrtVartimeBench*(T: typedesc, iters: int) = var r = x discard r.sqrt_if_square_vartime() -proc sqrtRatioVartimeBench*(T: typedesc, iters: int) = +proc sqrtRatioVartimeBench*(T: typedesc, iters: int) {.noinline.} = var r: T let u = rng.random_unsafe(T) let v = rng.random_unsafe(T) bench("Fused SquareRoot+Division+isSquare sqrt_vartime(u/v)", T, iters): let isSquare = r.sqrt_ratio_if_square_vartime(u, v) -proc powBench*(T: typedesc, iters: int) = +proc powBench*(T: typedesc, iters: int) {.noinline.} = let x = rng.random_unsafe(T) let exponent = rng.random_unsafe(BigInt[Fr[T.Name].bits()]) var r = x bench("Exp curve order (constant-time) - " & $exponent.bits & "-bit", T, iters): r.pow(exponent) -proc powVartimeBench*(T: typedesc, iters: int) = +proc powVartimeBench*(T: typedesc, iters: int) {.noinline.} = let x = rng.random_unsafe(T) let exponent = rng.random_unsafe(BigInt[Fr[T.Name].bits()]) var r = x diff --git a/benchmarks/bench_fp.nim b/benchmarks/bench_fp.nim index 79a511b7..4e818508 100644 --- a/benchmarks/bench_fp.nim +++ b/benchmarks/bench_fp.nim @@ -44,6 +44,7 @@ proc main() = staticFor i, 0, AvailableCurves.len: const curve = AvailableCurves[i] addBench(Fp[curve], Iters) + add10Bench(Fp[curve], Iters) subBench(Fp[curve], Iters) negBench(Fp[curve], Iters) ccopyBench(Fp[curve], Iters)