From f9611dfef4cacb640041c0064459538b727905ad Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Mon, 1 Jul 2024 10:25:17 +0900 Subject: [PATCH] generate all const vars in msm by python --- src/gen_msm_para.py | 16 +++++++++++ src/msm_avx.cpp | 32 ++++++--------------- src/msm_avx_bls12_381.h | 62 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 24 deletions(-) diff --git a/src/gen_msm_para.py b/src/gen_msm_para.py index fd260d0d..94b388e9 100644 --- a/src/gen_msm_para.py +++ b/src/gen_msm_para.py @@ -75,9 +75,20 @@ def expandN(name, v): print(('\t' + f'{hex(v[i])}, '*8).strip()) print('};') +def expandN3(name, vx, vy, vz): + print(f'static const CYBOZU_ALIGN(64) uint64_t {name}[] = {{') + for i in range(len(vx)): + print(('\t' + f'{hex(vx[i])}, '*8).strip()) + for i in range(len(vy)): + print(('\t' + f'{hex(vy[i])}, '*8).strip()) + for i in range(len(vz)): + print(('\t' + f'{hex(vz[i])}, '*8).strip()) + print('};') + def putCode(curve, mont): print('// generated by src/gen_msm_para.py') print(f'static const uint64_t g_mask = {hex(mont.mask)};') + # for FpM expand("g_mask_", mont.mask) expand("g_rp_", mont.rp) expandN('g_ap_', toArray(curve.p)) # array of p @@ -96,6 +107,11 @@ def putCode(curve, mont): return print(f'// rw={hex(rw)}') expandN('g_rw_', toArray(mont.toMont(rw))) + # for EcM + b = 4 + expandN('g_b3_', toArray(mont.toMont(b*3))) + expandN3('g_zeroJacobi_', toArray(0), toArray(0), toArray(0)) + expandN3('g_zeroProj_', toArray(0), toArray(1), toArray(0)) print(f''' struct G {{ diff --git a/src/msm_avx.cpp b/src/msm_avx.cpp index 923be57f..76fd1ae7 100644 --- a/src/msm_avx.cpp +++ b/src/msm_avx.cpp @@ -418,6 +418,7 @@ class Montgomery { } }; +static Montgomery g_mont; /* |64 |64 |64 |64 |64 |64 | @@ -527,7 +528,6 @@ inline void cvt6Ux8to8Ux8(Vec y[8], const Unit x[6*8]) struct FpM { Vec v[N]; - static Montgomery g_mont; static const FpM& zero() { return *(const FpM*)g_zero_; } static const FpM& one() { return *(const FpM*)g_R_; } static const FpM& R2() { return *(const FpM*)g_R2_; } @@ -707,7 +707,6 @@ struct FpM { #endif }; -Montgomery FpM::g_mont; template inline void normalizeJacobiVec(E P[n]) @@ -835,9 +834,9 @@ struct EcM { static const int a_ = 0; static const int b_ = 4; static const int specialB_ = mcl::ec::local::Plus4; - static FpM b3_; - static EcM zeroProj_; - static EcM zeroJacobi_; + static const FpM &b3_; + static const EcM &zeroProj_; + static const EcM &zeroJacobi_; FpM x, y, z; template static void add(EcM& z, const EcM& x, const EcM& y) @@ -864,18 +863,6 @@ struct EcM { dblJacobiNoCheck(z, x); } } - static void init(const Montgomery& mont) - { - const int b = 4; - mpz_class b3 = mont.toMont(b * 3); - expandN(b3_.v, b3); - zeroJacobi_.x.set(0); - zeroJacobi_.y.set(0); - zeroJacobi_.z.set(0); - zeroProj_.x.set(0); - zeroProj_.y.set(1); - zeroProj_.z.set(0); - } static EcM select(const Vmask& c, const EcM& a, const EcM& b) { EcM d; @@ -1164,9 +1151,9 @@ struct EcM { #endif }; -FpM EcM::b3_; -EcM EcM::zeroProj_; -EcM EcM::zeroJacobi_; +const FpM& EcM::b3_ = *(const FpM*)g_b3_; +const EcM& EcM::zeroProj_ = *(const EcM*)g_zeroProj_; +const EcM& EcM::zeroJacobi_ = *(const EcM*)g_zeroJacobi_; inline void reduceSum(mcl::msm::G1A& Q, const EcM& P) { @@ -1317,10 +1304,7 @@ bool initMsm(const mcl::CurveParam& cp, const mcl::msm::Func *func) g_func = *func; const mpz_class& mp = g_func.fp->mp; - FpM::g_mont.init(mp); -// FpM::init(mp); - Montgomery& mont = FpM::g_mont; - EcM::init(mont); + g_mont.init(mp); return true; } diff --git a/src/msm_avx_bls12_381.h b/src/msm_avx_bls12_381.h index 287724a2..a324e51a 100644 --- a/src/msm_avx_bls12_381.h +++ b/src/msm_avx_bls12_381.h @@ -93,6 +93,68 @@ static const CYBOZU_ALIGN(64) uint64_t g_rw_[] = { 0x82faa0ff3c329, 0x82faa0ff3c329, 0x82faa0ff3c329, 0x82faa0ff3c329, 0x82faa0ff3c329, 0x82faa0ff3c329, 0x82faa0ff3c329, 0x82faa0ff3c329, 0x17601, 0x17601, 0x17601, 0x17601, 0x17601, 0x17601, 0x17601, 0x17601, }; +static const CYBOZU_ALIGN(64) uint64_t g_b3_[] = { +0x460afeaf7b431, 0x460afeaf7b431, 0x460afeaf7b431, 0x460afeaf7b431, 0x460afeaf7b431, 0x460afeaf7b431, 0x460afeaf7b431, 0x460afeaf7b431, +0xcd5122beb5b19, 0xcd5122beb5b19, 0xcd5122beb5b19, 0xcd5122beb5b19, 0xcd5122beb5b19, 0xcd5122beb5b19, 0xcd5122beb5b19, 0xcd5122beb5b19, +0xc4664aadd2de0, 0xc4664aadd2de0, 0xc4664aadd2de0, 0xc4664aadd2de0, 0xc4664aadd2de0, 0xc4664aadd2de0, 0xc4664aadd2de0, 0xc4664aadd2de0, +0x1d78417c77713, 0x1d78417c77713, 0x1d78417c77713, 0x1d78417c77713, 0x1d78417c77713, 0x1d78417c77713, 0x1d78417c77713, 0x1d78417c77713, +0xa4d7d1f9b9711, 0xa4d7d1f9b9711, 0xa4d7d1f9b9711, 0xa4d7d1f9b9711, 0xa4d7d1f9b9711, 0xa4d7d1f9b9711, 0xa4d7d1f9b9711, 0xa4d7d1f9b9711, +0x4b2b884890e, 0x4b2b884890e, 0x4b2b884890e, 0x4b2b884890e, 0x4b2b884890e, 0x4b2b884890e, 0x4b2b884890e, 0x4b2b884890e, +0x717302e000d24, 0x717302e000d24, 0x717302e000d24, 0x717302e000d24, 0x717302e000d24, 0x717302e000d24, 0x717302e000d24, 0x717302e000d24, +0xf618, 0xf618, 0xf618, 0xf618, 0xf618, 0xf618, 0xf618, 0xf618, +}; +static const CYBOZU_ALIGN(64) uint64_t g_zeroJacobi_[] = { +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +}; +static const CYBOZU_ALIGN(64) uint64_t g_zeroProj_[] = { +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, +}; struct G { static const Vec& mask() { return *(const Vec*)g_mask_; }