diff --git a/include/mcl/bint.hpp b/include/mcl/bint.hpp index 51163d1b..7a601923 100644 --- a/include/mcl/bint.hpp +++ b/include/mcl/bint.hpp @@ -478,7 +478,7 @@ inline Unit getMontgomeryCoeff(Unit pLow, size_t bitSize = sizeof(Unit) * 8) struct SmallModP { static const size_t d = 16; // d = 26 if use double in approx - static const size_t MAX_MUL_N = 10; + static const size_t MAX_MUL_N = 1; // not used because mulSmallUnit is call at first. static const size_t maxE_ = d - 2; const Unit *p_; Unit tbl_[MAX_MUL_N][MCL_MAX_UNIT_SIZE+1]; @@ -578,7 +578,7 @@ struct SmallModP { } Unit t[N+1]; const Unit *pQ = 0; - if (false&&Q <= MAX_MUL_N) { + if (Q <= MAX_MUL_N) { pQ = tbl_[Q-1]; } else { t[N] = mcl::bint::mulUnitT(t, p_, Q); diff --git a/include/mcl/fp.hpp b/include/mcl/fp.hpp index b22da06c..0877bb7f 100644 --- a/include/mcl/fp.hpp +++ b/include/mcl/fp.hpp @@ -114,11 +114,14 @@ class FpT : public fp::Serializable, op_.fp_add(y, x, x, op_.p); } #endif +#if 0 static inline void mul9A(Unit *y, const Unit *x) { - mulSmall(y, x, 9); + mulUnit(y, x, 9); // op_.fp_mul9(y, x, op_.p); } +#endif +#if 0 static inline void mulSmall(Unit *z, const Unit *x, const uint32_t y) { assert(y <= op_.smallModp.maxMulN); @@ -129,6 +132,7 @@ class FpT : public fp::Serializable, op_.fp_subPre(z, xy, pv); op_.fp_sub(z, z, op_.p, op_.p); } +#endif public: typedef FpT BaseFp; // return pointer to array v_[] @@ -187,9 +191,11 @@ class FpT : public fp::Serializable, if (op_.fp_mul2A_ == 0) { op_.fp_mul2A_ = mul2A; } +#if 0 if (op_.fp_mul9A_ == 0) { op_.fp_mul9A_ = mul9A; } +#endif #endif *pb = true; } @@ -608,14 +614,19 @@ class FpT : public fp::Serializable, } static void mul9(FpT& y, const FpT& x) { +#if 1 + mulUnit(y, x, 9); +#else #ifdef MCL_XBYAK_DIRECT_CALL op_.fp_mul9A_(y.v_, x.v_); #else mul9A(y.v_, x.v_); +#endif #endif } static inline void addPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_addPre(z.v_, x.v_, y.v_); } static inline void subPre(FpT& z, const FpT& x, const FpT& y) { op_.fp_subPre(z.v_, x.v_, y.v_); } +#if 0 static inline void mulSmall(FpT& z, const FpT& x, const uint32_t y) { mulSmall(z.v_, x.v_, y); @@ -625,12 +636,15 @@ class FpT : public fp::Serializable, if (mulSmallUnit(z, x, y)) return; op_.fp_mulUnit(z.v_, x.v_, y, op_.p); } - static inline void mulUnit2(FpT& z, const FpT& x, const Unit y) +#endif + static inline void mulUnit(FpT& z, const FpT& x, const Unit y) { -// if (mulSmallUnit(z, x, y)) return; - if (op_.mulSmallUnit2(op_.smallModP, z.v_, x.v_, y)) return; + if (mcl::fp::mulSmallUnit(z, x, y)) return; + if (op_.mulSmallUnit(op_.smallModP, z.v_, x.v_, y)) return; op_.fp_mulUnit(z.v_, x.v_, y, op_.p); } + // alias of mulUnit + static inline void mulSmall(FpT& z, const FpT& x, const uint32_t y) { mulUnit(z, x, y); } static inline void inv(FpT& y, const FpT& x) { assert(!x.isZero()); diff --git a/include/mcl/gmp_util.hpp b/include/mcl/gmp_util.hpp index afb50192..c9a5c0f1 100644 --- a/include/mcl/gmp_util.hpp +++ b/include/mcl/gmp_util.hpp @@ -949,6 +949,7 @@ class SquareRoot { #endif }; +#if 0 /* x mod p for a small value x < (pMulTblN * p). */ @@ -1026,7 +1027,7 @@ struct SmallModp { } } }; - +#endif /* Barrett Reduction diff --git a/include/mcl/op.hpp b/include/mcl/op.hpp index 8b7b2608..0358b7ed 100644 --- a/include/mcl/op.hpp +++ b/include/mcl/op.hpp @@ -185,7 +185,7 @@ struct Op { mcl::SquareRoot sq; CYBOZU_ALIGN(8) char im[sizeof(mcl::inv::InvModT)]; mcl::Modp modp; - mcl::SmallModp smallModp; +// mcl::SmallModp smallModp; mcl::bint::SmallModP smallModP; Unit half[maxUnitSize]; // (p + 1) / 2 Unit oneRep[maxUnitSize]; // 1(=inv R if Montgomery) @@ -239,7 +239,7 @@ struct Op { void3u fp_mul2; void2uOp fp_invOp; void2uIu fp_mulUnit; // fp_mulUnitPre - bool (*mulSmallUnit2)(const mcl::bint::SmallModP&, Unit *z, const Unit *x, Unit y); + bool (*mulSmallUnit)(const mcl::bint::SmallModP&, Unit *z, const Unit *x, Unit y); void3u fpDbl_mulPre; void2u fpDbl_sqrPre; @@ -330,7 +330,7 @@ struct Op { fp_mul2 = 0; fp_invOp = 0; fp_mulUnit = 0; - mulSmallUnit2 = 0; + mulSmallUnit = 0; fpDbl_mulPre = 0; fpDbl_sqrPre = 0; diff --git a/include/mcl/util.hpp b/include/mcl/util.hpp index eea1c86a..81183118 100644 --- a/include/mcl/util.hpp +++ b/include/mcl/util.hpp @@ -154,10 +154,13 @@ bool mulSmallUnit(T& z, const T& x, U y) case 6: { T t; T::add(t, x, x); T::add(t, t, x); T::add(z, t, t); break; } case 7: { T t; T::add(t, x, x); T::add(t, t, t); T::add(t, t, t); T::sub(z, t, x); break; } case 8: T::add(z, x, x); T::add(z, z, z); T::add(z, z, z); break; + // slower than SmallModP +#if 0 case 9: { T t; T::add(t, x, x); T::add(t, t, t); T::add(t, t, t); T::add(z, t, x); break; } case 10: { T t; T::add(t, x, x); T::add(t, t, t); T::add(t, t, x); T::add(z, t, t); break; } case 11: { T t; T::add(t, x, x); T::add(t, t, x); T::add(t, t, t); T::add(t, t, t); T::sub(z, t, x); break; } case 12: { T t; T::add(t, x, x); T::add(t, t, t); T::add(z, t, t); T::add(z, z, t); break; } +#endif default: return false; } diff --git a/src/fp.cpp b/src/fp.cpp index f66010ec..e430be47 100644 --- a/src/fp.cpp +++ b/src/fp.cpp @@ -300,7 +300,7 @@ void setOp(Op& op) op.fp_shr1 = shr1T; op.fp_neg = negT; op.fp_mulUnitPre = mulUnitPreT; - op.mulSmallUnit2 = bint::SmallModP::mulUnit; + op.mulSmallUnit = bint::SmallModP::mulUnit; op.fp_addPre = bint::get_add(N); op.fp_subPre = bint::get_sub(N); op.fpDbl_addPre = bint::get_add(N * 2); @@ -581,7 +581,7 @@ bool Op::init(const mpz_class& _p, size_t maxBitSize, int _xi_a, Mode mode, size if (!b) return false; } modp.init(mp); - smallModp.init(mp); +// smallModp.init(mp); smallModP.init(p, N); return fp::initForMont(*this, p, mode); }