refactor(poly-commit): +30% kzg parallel perf, add quotient check gen…

…eralization and evalPoly
mratsim · Jun 14, 2024 · ec9f4eb · ec9f4eb
1 parent 9dfe6ac
commit ec9f4eb
Show file tree

Hide file tree

Showing 17 changed files with 600 additions and 521 deletions.
diff --git a/benchmarks/bench_eth_eip4844_kzg.nim b/benchmarks/bench_eth_eip4844_kzg.nim
@@ -54,8 +54,8 @@ proc new(T: type BenchSet, ctx: ptr EthereumKZGContext): T =
   new(result)
   for i in 0 ..< result.N:
     rng.randomize(result.blobs[i])
-    discard ctx.blob_to_kzg_commitment(result.commitments[i], result.blobs[i].addr)
-    discard ctx.compute_blob_kzg_proof(result.proofs[i], result.blobs[i].addr, result.commitments[i])
+    discard ctx.blob_to_kzg_commitment(result.commitments[i], result.blobs[i])
+    discard ctx.compute_blob_kzg_proof(result.proofs[i], result.blobs[i], result.commitments[i])
 
   let opening_challenge = rng.random_unsafe(Fr[BLS12_381])
   let eval_at_challenge = rng.random_unsafe(Fr[BLS12_381])
@@ -69,7 +69,7 @@ proc benchBlobToKzgCommitment(b: BenchSet, ctx: ptr EthereumKZGContext, iters: i
   block:
     bench("blob_to_kzg_commitment", "serial", iters):
       var commitment {.noInit.}: array[48, byte]
-      doAssert cttEthKzg_Success == ctx.blob_to_kzg_commitment(commitment, b.blobs[0].addr)
+      doAssert cttEthKzg_Success == ctx.blob_to_kzg_commitment(commitment, b.blobs[0])
   let stopSerial = getMonotime()
 
   ## We require `tp` to be unintialized as even idle threads somehow reduce perf of serial benches
@@ -79,7 +79,7 @@ proc benchBlobToKzgCommitment(b: BenchSet, ctx: ptr EthereumKZGContext, iters: i
   block:
     bench("blob_to_kzg_commitment", $tp.numThreads & " threads", iters):
       var commitment {.noInit.}: array[48, byte]
-      doAssert cttEthKzg_Success == tp.blob_to_kzg_commitment_parallel(ctx, commitment, b.blobs[0].addr)
+      doAssert cttEthKzg_Success == tp.blob_to_kzg_commitment_parallel(ctx, commitment, b.blobs[0])
   let stopParallel = getMonotime()
 
   tp.shutdown()
@@ -97,7 +97,7 @@ proc benchComputeKzgProof(b: BenchSet, ctx: ptr EthereumKZGContext, iters: int)
     bench("compute_kzg_proof", "serial", iters):
       var proof {.noInit.}: array[48, byte]
       var eval_at_challenge {.noInit.}: array[32, byte]
-      doAssert cttEthKzg_Success == ctx.compute_kzg_proof(proof, eval_at_challenge, b.blobs[0].addr, b.opening_challenge)
+      doAssert cttEthKzg_Success == ctx.compute_kzg_proof(proof, eval_at_challenge, b.blobs[0], b.opening_challenge)
   let stopSerial = getMonotime()
 
   ## We require `tp` to be unintialized as even idle threads somehow reduce perf of serial benches
@@ -108,7 +108,7 @@ proc benchComputeKzgProof(b: BenchSet, ctx: ptr EthereumKZGContext, iters: int)
     bench("compute_kzg_proof", $tp.numThreads & " threads", iters):
       var proof {.noInit.}: array[48, byte]
       var eval_at_challenge {.noInit.}: array[32, byte]
-      doAssert cttEthKzg_Success == tp.compute_kzg_proof_parallel(ctx, proof, eval_at_challenge, b.blobs[0].addr, b.opening_challenge)
+      doAssert cttEthKzg_Success == tp.compute_kzg_proof_parallel(ctx, proof, eval_at_challenge, b.blobs[0], b.opening_challenge)
   let stopParallel = getMonotime()
 
   tp.shutdown()
@@ -125,7 +125,7 @@ proc benchComputeBlobKzgProof(b: BenchSet, ctx: ptr EthereumKZGContext, iters: i
   block:
     bench("compute_blob_kzg_proof", "serial", iters):
       var proof {.noInit.}: array[48, byte]
-      doAssert cttEthKzg_Success == ctx.compute_blob_kzg_proof(proof, b.blobs[0].addr, b.commitments[0])
+      doAssert cttEthKzg_Success == ctx.compute_blob_kzg_proof(proof, b.blobs[0], b.commitments[0])
   let stopSerial = getMonotime()
 
   ## We require `tp` to be unintialized as even idle threads somehow reduce perf of serial benches
@@ -135,7 +135,7 @@ proc benchComputeBlobKzgProof(b: BenchSet, ctx: ptr EthereumKZGContext, iters: i
   block:
     bench("compute_blob_kzg_proof", $tp.numThreads & " threads", iters):
       var proof {.noInit.}: array[48, byte]
-      doAssert cttEthKzg_Success == tp.compute_blob_kzg_proof_parallel(ctx, proof, b.blobs[0].addr, b.commitments[0])
+      doAssert cttEthKzg_Success == tp.compute_blob_kzg_proof_parallel(ctx, proof, b.blobs[0], b.commitments[0])
   let stopParallel = getMonotime()
 
   tp.shutdown()
@@ -158,7 +158,7 @@ proc benchVerifyBlobKzgProof(b: BenchSet, ctx: ptr EthereumKZGContext, iters: in
   let startSerial = getMonotime()
   block:
     bench("verify_blob_kzg_proof", "serial", iters):
-      discard ctx.verify_blob_kzg_proof(b.blobs[0].addr, b.commitments[0], b.proofs[0])
+      discard ctx.verify_blob_kzg_proof(b.blobs[0], b.commitments[0], b.proofs[0])
   let stopSerial = getMonotime()
 
   ## We require `tp` to be unintialized as even idle threads somehow reduce perf of serial benches
@@ -167,7 +167,7 @@ proc benchVerifyBlobKzgProof(b: BenchSet, ctx: ptr EthereumKZGContext, iters: in
   let startParallel = getMonotime()
   block:
     bench("verify_blob_kzg_proof", $tp.numThreads & " threads", iters):
-      discard tp.verify_blob_kzg_proof_parallel(ctx, b.blobs[0].addr, b.commitments[0], b.proofs[0])
+      discard tp.verify_blob_kzg_proof_parallel(ctx, b.blobs[0], b.commitments[0], b.proofs[0])
   let stopParallel = getMonotime()
 
   tp.shutdown()

diff --git a/constantine/commitments/README.md b/constantine/commitments/README.md
@@ -54,3 +54,7 @@ However, even if we generalize the transcript API,
 unfortunately the labeling differ (if any) and the absorb/challenge sequences and what is absorbed in the transcript are very different.
 
 So the commitments have to be protocol-specific.
+
+## Protocols
+
+- quotient check
diff --git a/constantine/commitments/kzg.nim b/constantine/commitments/kzg.nim
@@ -13,7 +13,8 @@ import
   ../math/pairings/pairings_generic,
   ../math/constants/zoo_generators,
   ../math/polynomials/polynomials,
-  ../platforms/[abstractions, views]
+  ../platforms/[abstractions, views],
+  ./protocol_quotient_check
 
 ## ############################################################
 ##
@@ -190,59 +191,18 @@ func kzg_prove*[N: static int, C: static Curve](
        poly: PolynomialEval[N, Fr[C]],
        opening_challenge: Fr[C]) {.tags:[Alloca, HeapAlloc, Vartime].} =
 
-  # Note:
-  #   The order of inputs in
-  #  `kzg_prove`, `evalPolyOffDomainAt`, `differenceQuotientEvalOffDomain`, `differenceQuotientEvalInDomain`
-  #  minimizes register changes when parameter passing.
-  #
-  # z = opening_challenge in the following code
-
-  let diffQuotientPolyFr = allocHeapAligned(PolynomialEval[N, Fr[C]], alignment = 64)
-  let invRootsMinusZ = allocHeapAligned(array[N, Fr[C]], alignment = 64)
-
-  # Compute 1/(ωⁱ - z) with ω a root of unity, i in [0, N).
-  # zIndex = i if ωⁱ - z == 0 (it is the i-th root of unity) and -1 otherwise.
-  let zIndex = invRootsMinusZ[].inverseDifferenceArrayZ(
-                                  domain.rootsOfUnity, opening_challenge,
-                                  differenceKind = kArrayMinusZ,
-                                  earlyReturnOnZero = false)
-
-  if zIndex == -1:
-    # p(z)
-    domain.evalPolyOffDomainAt(
-      eval_at_challenge,
-      poly, opening_challenge,
-      invRootsMinusZ[])
-
-    # q(x) = (p(x) - p(z)) / (x - z)
-    diffQuotientPolyFr[].differenceQuotientEvalOffDomain(
-      poly, eval_at_challenge, invRootsMinusZ[])
-  else:
-    # p(z)
-    # But the opening_challenge z is equal to one of the roots of unity (how likely is that?)
-    eval_at_challenge = poly.evals[zIndex]
-
-    # q(x) = (p(x) - p(z)) / (x - z)
-    domain.differenceQuotientEvalInDomain(
-      diffQuotientPolyFr[],
-      poly, uint32 zIndex, invRootsMinusZ[])
+  let quotientPoly = allocHeapAligned(PolynomialEval[N, Fr[C]], alignment = 64)
 
-  freeHeapAligned(invRootsMinusZ)
-
-  const orderBits = C.getCurveOrderBitwidth()
-  let diffQuotientPolyBigInt = allocHeapAligned(array[N, BigInt[orderBits]], alignment = 64)
-
-  for i in 0 ..< N:
-    diffQuotientPolyBigInt[i].fromField(diffQuotientPolyFr.evals[i])
-
-  freeHeapAligned(diffQuotientPolyFr)
+  domain.getQuotientPoly(
+    quotientPoly[], eval_at_challenge,
+    poly, opening_challenge
+  )
 
   var proofJac {.noInit.}: ECP_ShortW_Jac[Fp[C], G1]
-  proofJac.multiScalarMul_vartime(diffQuotientPolyBigInt[], powers_of_tau.evals)
+  proofJac.multiScalarMul_vartime(quotientPoly.evals, powers_of_tau.evals)
   proof.affine(proofJac)
 
-  freeHeapAligned(diffQuotientPolyBigInt)
-
+  freeHeapAligned(quotientPoly)
 
 # KZG - Verifier
 # ------------------------------------------------------------
@@ -366,9 +326,7 @@ func kzg_verify_batch*[bits: static int, F2; C: static Curve](
   # ∑ [rᵢ][proofᵢ]₁
   # ---------------
   let coefs = allocHeapArrayAligned(matchingOrderBigInt(C), n, alignment = 64)
-  for i in 0 ..< n:
-    coefs[i].fromField(linearIndepRandNumbers[i])
-
+  coefs.batchFromField(linearIndepRandNumbers, n)
   sum_rand_proofs.multiScalarMul_vartime(coefs, proofs, n)
 
   # ∑[rᵢ]([commitmentᵢ]₁ - [eval_at_challengeᵢ]₁)

diff --git a/constantine/commitments/kzg_parallel.nim b/constantine/commitments/kzg_parallel.nim
@@ -14,7 +14,8 @@ import
   ../math/constants/zoo_generators,
   ../math/polynomials/polynomials,
   ../platforms/[abstractions, views],
-  ../threadpool/threadpool
+  ../threadpool/threadpool,
+  ./protocol_quotient_check_parallel
 
 import ./kzg {.all.}
 export kzg
@@ -44,11 +45,11 @@ proc kzg_commit_parallel*[N: static int, C: static Curve](
 proc kzg_prove_parallel*[N: static int, C: static Curve](
        tp: Threadpool,
        powers_of_tau: PolynomialEval[N, ECP_ShortW_Aff[Fp[C], G1]],
-       domain: ptr PolyEvalRootsDomain[N, Fr[C]],
+       domain: PolyEvalRootsDomain[N, Fr[C]],
        proof: var ECP_ShortW_Aff[Fp[C], G1],
        eval_at_challenge: var Fr[C],
-       poly: ptr PolynomialEval[N, Fr[C]],
-       opening_challenge: ptr Fr[C]) =
+       poly: PolynomialEval[N, Fr[C]],
+       opening_challenge: Fr[C]) =
   ## KZG prove commitment to a polynomial in Lagrange / Evaluation form
   ##
   ## Outputs:
@@ -58,61 +59,23 @@ proc kzg_prove_parallel*[N: static int, C: static Curve](
   ## Parallelism: This only returns when computation is fully done
   # Note:
   #   The order of inputs in
-  #  `kzg_prove`, `evalPolyOffDomainAt`, `differenceQuotientEvalOffDomain`, `differenceQuotientEvalInDomain`
+  #  `kzg_prove`, `evalPolyOffDomainAt`, `getQuotientPolyOffDomain`, `getQuotientPolyInDomain`
   #  minimizes register changes when parameter passing.
   #
   # z = opening_challenge in the following code
 
-  let diffQuotientPolyFr = allocHeapAligned(PolynomialEval[N, Fr[C]], alignment = 64)
-  let invRootsMinusZ = allocHeapAligned(array[N, Fr[C]], alignment = 64)
-
-  # Compute 1/(ωⁱ - z) with ω a root of unity, i in [0, N).
-  # zIndex = i if ωⁱ - z == 0 (it is the i-th root of unity) and -1 otherwise.
-  let zIndex = invRootsMinusZ[].inverseDifferenceArrayZ(
-                                  domain.rootsOfUnity, opening_challenge[],
-                                  differenceKind = kArrayMinusZ,
-                                  earlyReturnOnZero = false)
-
-  if zIndex == -1:
-    # p(z)
-    tp.evalPolyOffDomainAt_parallel(
-      domain,
-      eval_at_challenge,
-      poly, opening_challenge,
-      invRootsMinusZ)
-
-    # q(x) = (p(x) - p(z)) / (x - z)
-    tp.differenceQuotientEvalOffDomain_parallel(
-      diffQuotientPolyFr,
-      poly, eval_at_challenge.addr, invRootsMinusZ)
-  else:
-    # p(z)
-    # But the opening_challenge z is equal to one of the roots of unity (how likely is that?)
-    eval_at_challenge = poly.evals[zIndex]
-
-    # q(x) = (p(x) - p(z)) / (x - z)
-    tp.differenceQuotientEvalInDomain_parallel(
-      domain,
-      diffQuotientPolyFr,
-      poly, uint32 zIndex, invRootsMinusZ)
-
-  freeHeapAligned(invRootsMinusZ)
-
-  const orderBits = C.getCurveOrderBitwidth()
-  let diffQuotientPolyBigInt = allocHeapAligned(array[N, BigInt[orderBits]], alignment = 64)
-
-  syncScope:
-    tp.parallelFor i in 0 ..< N:
-      captures: {diffQuotientPolyBigInt, diffQuotientPolyFr}
-      diffQuotientPolyBigInt[i].fromField(diffQuotientPolyFr.evals[i])
-
-  freeHeapAligned(diffQuotientPolyFr)
+  let quotientPoly = allocHeapAligned(PolynomialEval[N, Fr[C]], alignment = 64)
+  tp.getQuotientPoly_parallel(
+    domain,
+    quotientPoly[], eval_at_challenge,
+    poly, opening_challenge
+  )
 
   var proofJac {.noInit.}: ECP_ShortW_Jac[Fp[C], G1]
-  tp.multiScalarMul_vartime_parallel(proofJac, diffQuotientPolyBigInt[], powers_of_tau.evals)
+  tp.multiScalarMul_vartime_parallel(proofJac, quotientPoly.evals, powers_of_tau.evals)
   proof.affine(proofJac)
 
-  freeHeapAligned(diffQuotientPolyBigInt)
+  freeHeapAligned(quotientPoly)
 
 proc kzg_verify_batch_parallel*[bits: static int, F2; C: static Curve](
        tp: Threadpool,