Skip to content

Commit

Permalink
gt: stash progress, Fp12 over Fp6 fails ref or opt multiexp while Fp1…
Browse files Browse the repository at this point in the history
…2 over Fp4 doesn't (without Torus)
  • Loading branch information
mratsim committed Sep 25, 2024
1 parent 1916910 commit 79c3e92
Show file tree
Hide file tree
Showing 6 changed files with 181 additions and 36 deletions.
8 changes: 4 additions & 4 deletions constantine/math/pairings/gt_exponentiations_vartime.nim
Original file line number Diff line number Diff line change
Expand Up @@ -263,8 +263,8 @@ func gtExpEndo_wNAF_vartime*[Gt: ExtensionField, scalBits: static int](
static: doAssert window <= 4, "Window of size " & $window & " is too large and precomputation would use " & $(precompSize * sizeof(Gt)) & " stack space."

# 1. Compute endomorphisms
const M = when Gt is Fp6: 2
elif Gt is Fp12: 4
const M = when Gt.Name.getEmbeddingDegree() == 6: 2
elif Gt.Name.getEmbeddingDegree() == 12: 4
else: {.error: "Unconfigured".}

var endos {.noInit.}: array[M-1, Gt]
Expand Down Expand Up @@ -351,9 +351,9 @@ func gtExp_vartime*[Gt: ExtensionField, scalBits: static int](
when Gt.Name.hasEndomorphismAcceleration():
when scalBits >= EndomorphismThreshold: # Skip static: doAssert when multiplying by intentionally small scalars.
if usedBits >= EndomorphismThreshold:
when Gt is Fp6:
when Gt.Name.getEmbeddingDegree() == 6:
r.gtExpEndo_wNAF_vartime(a, scalar, window = 4)
elif Gt is Fp12:
elif Gt.Name.getEmbeddingDegree() == 12:
r.gtExpEndo_wNAF_vartime(a, scalar, window = 3)
else: # Curves defined on Fp^m with m > 2
{.error: "Unconfigured".}
Expand Down
82 changes: 59 additions & 23 deletions constantine/math/pairings/gt_multiexp.nim
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@
# at your option. This file may not be copied, modified, or distributed except according to those terms.

import constantine/named/algebras,
constantine/math/arithmetic,
constantine/math/endomorphisms/split_scalars,
constantine/math/extension_fields,
constantine/math/arithmetic/bigints,
constantine/named/zoo_endomorphisms,
constantine/platforms/abstractions,
./cyclotomic_subgroups
./cyclotomic_subgroups, ./gt_prj

# No exceptions allowed in core cryptographic operations
{.push raises: [].}
Expand Down Expand Up @@ -96,12 +97,27 @@ func `~/=`[Gt: ExtensionField](a: var Gt, b: Gt) {.inline.} =
func setNeutral[Gt: ExtensionField](a: var Gt) {.inline.} =
a.setOne()

func `~*=`(a: var T2Prj, b: T2Aff) {.inline.} =
a.mixedProd_vartime(a, b)

func `~*=`(a: var T2Prj, b: T2Prj) {.inline.} =
a.prod(a, b)

func `~/=`(a: var T2Prj, b: T2Aff) {.inline.} =
## Cyclotomic division
var t {.noInit.}: T2Aff
t.cyclotomic_inv(b)
a ~*= t

# Reference multi-exponentiation
# -------------------------------------------------------------
# We distinguish GtAcc (GT Accumulators) from GtElt (Gt Element)
# They can map to the same type if using extension fields
# or to 2 different types if using tori (affine and projective torus coordinates)

func multiExpImpl_reference_vartime[bits: static int, Gt](
r: var Gt,
elems: ptr UncheckedArray[Gt],
func multiExpImpl_reference_vartime[bits: static int, GtAcc, GtElt](
r: var GtAcc,
elems: ptr UncheckedArray[GtElt],
expos: ptr UncheckedArray[BigInt[bits]],
N: int, c: static int) {.tags:[VarTime, HeapAlloc].} =
## Inner implementation of MEXP, for static dispatch over c, the bucket bit length
Expand All @@ -112,8 +128,8 @@ func multiExpImpl_reference_vartime[bits: static int, Gt](
const numBuckets = 1 shl c - 1 # bucket 0 is unused
const numWindows = bits.ceilDiv_vartime(c)

let miniEXPs = allocHeapArray(Gt, numWindows)
let buckets = allocHeapArray(Gt, numBuckets)
let miniEXPs = allocHeapArray(GtAcc, numWindows)
let buckets = allocHeapArray(GtAcc, numBuckets)

# Algorithm
# ---------
Expand All @@ -134,7 +150,7 @@ func multiExpImpl_reference_vartime[bits: static int, Gt](
# 2. Bucket reduction. Cost: 2x(2ᶜ-2) => 2 additions per 2ᶜ-1 bucket, last bucket is just copied
# We have ordered subset sums in each bucket, we now need to compute the mini-exponentiation
# S₁¹ + S₂² + S₃³ + ... + (S₂c₋₁)^(2ᶜ-1)
var accumBuckets{.noInit.}, miniEXP{.noInit.}: Gt
var accumBuckets{.noInit.}, miniEXP{.noInit.}: GtAcc
accumBuckets = buckets[numBuckets-1]
miniEXP = buckets[numBuckets-1]

Expand All @@ -157,9 +173,9 @@ func multiExpImpl_reference_vartime[bits: static int, Gt](
buckets.freeHeap()
miniEXPs.freeHeap()

func multiExp_reference_dispatch_vartime[bits: static int, Gt](
r: var Gt,
elems: ptr UncheckedArray[Gt],
func multiExp_reference_dispatch_vartime[bits: static int, GtAcc, GtElt](
r: var GtAcc,
elems: ptr UncheckedArray[GtElt],
expos: ptr UncheckedArray[BigInt[bits]],
N: int) {.tags:[VarTime, HeapAlloc].} =
## Multiexponentiation:
Expand Down Expand Up @@ -190,41 +206,58 @@ func multiExp_reference_vartime*[bits: static int, Gt](
r: var Gt,
elems: ptr UncheckedArray[Gt],
expos: ptr UncheckedArray[BigInt[bits]],
N: int) {.tags:[VarTime, HeapAlloc].} =
N: int, useTorus: static bool = true) {.tags:[VarTime, HeapAlloc].} =
## Multiexponentiation:
## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ
multiExp_reference_dispatch_vartime(r, elems, expos, N)
when useTorus:
static: doAssert Gt is QuadraticExt, "GT was: " & $Gt
type F = typeof(elems[0].c0)
let elemsTorus = allocHeapArrayAligned(T2Aff[F], N, alignment = 64)
elemsTorus.toOpenArray(0, N-1).batchFromGT_vartime(
elems.toOpenArray(0, N-1)
)
var r_torus {.noInit.}: T2Prj[F]
multiExp_reference_dispatch_vartime(r_torus, elemsTorus, expos, N)
r.fromTorus2_vartime(r_torus)
else:
multiExp_reference_dispatch_vartime(r, elems, expos, N)

func multiExp_reference_vartime*[Gt](r: var Gt, elems: openArray[Gt], expos: openArray[BigInt]) {.tags:[VarTime, HeapAlloc].} =
func multiExp_reference_vartime*[Gt](
r: var Gt,
elems: openArray[Gt],
expos: openArray[BigInt],
useTorus: static bool = true) {.tags:[VarTime, HeapAlloc].} =
## Multiexponentiation:
## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ
debug: doAssert expos.len == elems.len
let N = elems.len
multiExp_reference_dispatch_vartime(r, elems.asUnchecked(), expos.asUnchecked(), N)
multiExp_reference_vartime(r, elems.asUnchecked(), expos.asUnchecked(), N, useTorus)

func multiExp_reference_vartime*[F, Gt](
r: var Gt,
elems: ptr UncheckedArray[Gt],
expos: ptr UncheckedArray[F],
len: int) {.tags:[VarTime, Alloca, HeapAlloc], meter.} =
len: int,
useTorus: static bool = true) {.tags:[VarTime, Alloca, HeapAlloc], meter.} =
## Multiexponentiation:
## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ
let n = cast[int](len)
let expos_big = allocHeapArrayAligned(F.getBigInt(), n, alignment = 64)
expos_big.batchFromField(expos, n)
r.multiExp_reference_vartime(elems, expos_big, n)
r.multiExp_reference_vartime(elems, expos_big, n, useTorus)

freeHeapAligned(expos_big)

func multiExp_reference_vartime*[Gt](
r: var Gt,
elems: openArray[Gt],
expos: openArray[Fr]) {.tags:[VarTime, Alloca, HeapAlloc], inline.} =
expos: openArray[Fr],
useTorus: static bool = true) {.tags:[VarTime, Alloca, HeapAlloc], inline.} =
## Multiexponentiation:
## r <- g₀^a₀ + g₁^a₁ + ... + gₙ^aₙ
debug: doAssert expos.len == elems.len
let N = elems.len
multiExp_reference_vartime(r, elems.asUnchecked(), expos.asUnchecked(), N)
multiExp_reference_vartime(r, elems.asUnchecked(), expos.asUnchecked(), N, useTorus)

# ########################################################### #
# #
Expand All @@ -233,7 +266,6 @@ func multiExp_reference_vartime*[Gt](
# #
# ########################################################### #


func accumulate[GT](buckets: ptr UncheckedArray[GT], val: SecretWord, negate: SecretBool, elem: GT) {.inline, meter.} =
let val = BaseType(val)
if val == 0: # Skip g⁰
Expand Down Expand Up @@ -366,8 +398,8 @@ proc applyEndomorphism[bits: static int, GT](
## Returns a new triplet (endoElems, endoExpos, N)
## endoElems and endoExpos MUST be freed afterwards

const M = when Gt is Fp6: 2
elif Gt is Fp12: 4
const M = when Gt.Name.getEmbeddingDegree() == 6: 2
elif Gt.Name.getEmbeddingDegree() == 12: 4
else: {.error: "Unconfigured".}

const L = Fr[Gt.Name].bits().computeEndoRecodedLength(M)
Expand Down Expand Up @@ -410,8 +442,12 @@ template withEndo[exponentsBits: static int, GT](
else:
multiExpProc(r, elems, expos, N, c)

# Algorithm selection
# -----------------------------------------------------------------------------------------------------------------------
# ########################################################### #
# #
# Multi-exponentiations in 𝔾ₜ #
# Algorithm selection #
# #
# ########################################################### #

func multiexp_dispatch_vartime[bits: static int, GT](
r: var GT,
Expand Down
111 changes: 106 additions & 5 deletions constantine/math/pairings/gt_prj.nim
Original file line number Diff line number Diff line change
Expand Up @@ -283,8 +283,32 @@ type

template x[F](a: T2Prj[F]): F = a.coords[0]
template z[F](a: T2Prj[F]): F = a.coords[1]
template `x=`[F](a: var T2Prj[F], v: F) = a.coords[0] = v
template `z=`[F](a: var T2Prj[F], v: F) = a.coords[1] = v

proc setNeutral*[F](a: var T2Aff[F]) =
# We special-case the neutral element to 0
# TODO: this is in case an element of the Torus might be compressed
# to coordinate 1
# TODO: Can a GT element of the form a + bv have a == 0?
F(a).setZero()

proc isNeutral*[F](a: T2Aff[F]): SecretBool =
F(a).isZero()

proc setNeutral*(a: var T2Prj) =
a.x.setOne()
a.z.setZero()

proc isNeutral*(a: T2Prj): SecretBool =
a.z.isZero()

proc fromGT_vartime*[F](r: var T2Aff[F], a: QuadraticExt[F]) =
# Special case identity element
if bool a.isNeutral():
r.setNeutral()
return

var t {.noInit.}, one {.noInit.}: F
t.inv_vartime(a.c1)
one.setOne()
Expand All @@ -293,6 +317,11 @@ proc fromGT_vartime*[F](r: var T2Aff[F], a: QuadraticExt[F]) =
F(r) *= t

proc fromGT_vartime*[F](r: var T2Prj[F], a: QuadraticExt[F]) =
# Special case identity element
if bool a.isOne():
r.setNeutral()
return

var t {.noInit.}: F
t.inv_vartime(a.c1)
r.z.setOne()
Expand All @@ -301,8 +330,13 @@ proc fromGT_vartime*[F](r: var T2Prj[F], a: QuadraticExt[F]) =
r.x *= t

proc fromTorus2_vartime*[F](r: var QuadraticExt[F], a: T2Aff[F]) =
var num {.noInit.}, den {.noInit.}: typeof(r)

# Special case identity element
if bool a.isNeutral():
r.setNeutral()
return

var num {.noInit.}, den {.noInit.}: typeof(r)
num.c0 = F a
num.c1.setMinusOne()
den.c0 = F a
Expand All @@ -311,6 +345,12 @@ proc fromTorus2_vartime*[F](r: var QuadraticExt[F], a: T2Aff[F]) =
r.prod(num, den)

proc fromTorus2_vartime*[F](r: var QuadraticExt[F], a: T2Prj[F]) =

# Special case identity element
if bool a.isNeutral():
r.setOne()
return

type QF = QuadraticExt[F]

var t0 {.noInit.}, t1 {.noInit.}: QF
Expand All @@ -320,10 +360,26 @@ proc fromTorus2_vartime*[F](r: var QuadraticExt[F], a: T2Prj[F]) =

r.prod(t0, t1)

proc mixedProd*[F](r: var T2Prj[F], a: T2Prj[F], b: T2Aff[F]) =
proc fromAffine_vartime*[F](r: var T2Prj[F], a: T2Aff[F]) =
mixin `x=`
if bool a.isNeutral():
r.setNeutral()
else:
r.coords[0] = F(a) # r.x doesn't work despite bind, mixin, exports and usual generic sandwich workarounds
r.z.setOne()

proc mixedProd_vartime*[F](r: var T2Prj[F], a: T2Prj[F], b: T2Aff[F]) =
## Multiplication on a torus.
## b MUST be in the cyclotomic subgroup

# Special case identity element
if bool a.isNeutral():
r.fromAffine_vartime(b) # handles b == 1 as well
return
if bool b.isNeutral():
r = a
return

var u0 {.noInit.}, u1 {.noInit.}: F
u0.prod(a.x, F b)
u1.prod(a.z, F b)
Expand All @@ -332,15 +388,29 @@ proc mixedProd*[F](r: var T2Prj[F], a: T2Prj[F], b: T2Aff[F]) =
r.x += u0
r.z.sum(u1, a.x)

proc affineProd*[F](r: var T2Prj[F], a, b: T2Aff[F]) =
proc affineProd_vartime*[F](r: var T2Prj[F], a, b: T2Aff[F]) =

# Special case identity element
if bool a.isNeutral():
r.fromAffine_vartime(b) # handles b == 1 as well
return
if bool b.isNeutral():
r.fromAffine_vartime(a)
return

r.z.sum(F a, F b)
r.x.prod(F a, F b)

var snr {.noInit.}: typeof(r.x.c1)
snr.setOne()
r.x.c1 += snr

proc affineSquare*[F](r: var T2Prj[F], a: T2Aff[F]) =
proc affineSquare_vartime*[F](r: var T2Prj[F], a: T2Aff[F]) =

# Special case identity element
if bool a.isNeutral():
r.setNeutral()
return

r.z.double(F a)
r.x.square(F a)
Expand All @@ -357,8 +427,22 @@ proc square*[F](r: var T2Prj[F], a: T2Prj[F]) {.inline.} =
type QF = QuadraticExt[F]
QF(r).square(QF a)

template cyclotomic_square*[F](r: var T2Prj[F], a: T2Prj[F]) =
# Alias
r.square(a)

template cyclotomic_square*[F](a: var T2Prj[F]) =
# Alias
a.square(a)

proc inv*[F](r: var T2Aff[F], a: T2Aff[F]) {.inline.} =
## Cyclotomic inversion on a Torus
# Note: for neutral element this is valid only
# if the implementation uses 0 as special-value
F(r).neg(F(a))

proc inv*[F](r: var T2Prj[F], a: T2Prj[F]) {.inline.} =
# Cyclotomic inversion on a Torus
## Cyclotomic inversion on a Torus
r.x.neg(a.x)
r.z = a.z

Expand All @@ -378,6 +462,8 @@ proc batchFromGT_vartime*[F](dst: var openArray[T2Aff[F]],
## Note: on 𝔽p6, the ratio of inversion I/M is about 3.8
## so this is about a ~25% speedup

# TODO: handle neutral element

debug: doAssert dst.len == src.len

F(dst[0]) = src[0].c1
Expand Down Expand Up @@ -415,6 +501,8 @@ proc batchFromTorus2_vartime*[F](dst: var openArray[QuadraticExt[F]],
##
## Note: on 𝔽p12, the ratio of inversion I/M is about 3
## so this has likely no speedup, and is not trivial to parallelize

# TODO: handle neutral element
debug: doAssert dst.len == src.len

# We consciously choose to recompute conj(src[i]) to avoid an allocation
Expand Down Expand Up @@ -446,3 +534,16 @@ proc batchFromTorus2_vartime*[F](dst: var openArray[QuadraticExt[F]],
var t {.noInit.}: QF
t.conj(QF src[0])
dst[0] *= t


when isMainModule:
var a, c: QuadraticExt[Fp6[BLS12_381]]
var b: T2Prj[Fp6[BLS12_381]]

a.setOne()
b.fromGT_vartime(a)
c.fromTorus2_vartime(b)

echo "a: ", a.toHex(indent = 4)
echo "b: ", QuadraticExt[Fp6[BLS12_381]](b).toHex(indent = 4)
echo "c: ", c.toHex(indent = 4)
Loading

0 comments on commit 79c3e92

Please sign in to comment.