From 4200669f5b28062633684c80f2199dd6b2ed6c36 Mon Sep 17 00:00:00 2001 From: sowle Date: Mon, 25 Dec 2023 23:48:14 +0100 Subject: [PATCH] crypto: experimental optimizations: ge_scalarmult_base_vartime, ge_scalarmult_vartime_p3, ge_scalarmult_precomp_vartime (point_pc_t) + performance tests for msm pippenger v3-v4 --- src/crypto/crypto-ops.c | 170 +++++- src/crypto/crypto-ops.h | 8 + src/crypto/crypto-sugar.cpp | 30 +- src/crypto/crypto-sugar.h | 71 ++- src/currency_core/currency_basic.h | 2 +- tests/functional_tests/crypto_tests.cpp | 26 + .../crypto_tests_performance.h | 518 +++++++++++++++++- 7 files changed, 807 insertions(+), 18 deletions(-) diff --git a/src/crypto/crypto-ops.c b/src/crypto/crypto-ops.c index 1233da47..e48d2032 100644 --- a/src/crypto/crypto-ops.c +++ b/src/crypto/crypto-ops.c @@ -119,7 +119,7 @@ Postconditions: |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */ -static void fe_add(fe h, const fe f, const fe g) { +void fe_add(fe h, const fe f, const fe g) { int32_t f0 = f[0]; int32_t f1 = f[1]; int32_t f2 = f[2]; @@ -4314,3 +4314,171 @@ void ge_cached_to_p2(ge_p2 *r, const ge_cached *c) fe_copy(r->Z, c->Z); } + + +/////////////////////////// +// EXPERIMENTAL +// + +// With these select_vartime/ge_scalarmult_base_vartime I got ~25% speed up comparing to the select/ge_scalarmult_base -- sowle +static void select_vartime(ge_precomp *t, int pos, signed char b) +{ + unsigned char bnegative = negative(b); + unsigned char babs = b - (((-bnegative) & b) << 1); + const ge_precomp* base; + + if (babs == 0) + { + ge_precomp_0(t); + } + else if (bnegative == 0) + { + base = &ge_base[pos][babs - 1]; + fe_copy(t->yplusx, base->yplusx); + fe_copy(t->yminusx, base->yminusx); + fe_copy(t->xy2d, base->xy2d); + } + else + { + base = &ge_base[pos][babs - 1]; + fe_copy(t->yplusx, base->yminusx); + fe_copy(t->yminusx, base->yplusx); + fe_neg(t->xy2d, base->xy2d); + } +} + +void ge_scalarmult_base_vartime(ge_p3 *h, const unsigned char *a) +{ + signed char e[64]; + signed char carry; + ge_p1p1 r; + ge_p2 s; + ge_precomp t; + int i; + + for (i = 0; i < 32; ++i) { + e[2 * i + 0] = (a[i] >> 0) & 15; + e[2 * i + 1] = (a[i] >> 4) & 15; + } + /* each e[i] is between 0 and 15 */ + /* e[63] is between 0 and 7 */ + + carry = 0; + for (i = 0; i < 63; ++i) { + e[i] += carry; + carry = e[i] + 8; + carry >>= 4; + e[i] -= carry << 4; + } + e[63] += carry; + /* each e[i] is between -8 and 8 */ + + ge_p3_0(h); + for (i = 1; i < 64; i += 2) { + select_vartime(&t, i / 2, e[i]); + ge_madd(&r, h, &t); ge_p1p1_to_p3(h, &r); + } + + ge_p3_dbl(&r, h); ge_p1p1_to_p2(&s, &r); + ge_p2_dbl(&r, &s); ge_p1p1_to_p2(&s, &r); + ge_p2_dbl(&r, &s); ge_p1p1_to_p2(&s, &r); + ge_p2_dbl(&r, &s); ge_p1p1_to_p3(h, &r); + + for (i = 0; i < 64; i += 2) { + select_vartime(&t, i / 2, e[i]); + ge_madd(&r, h, &t); ge_p1p1_to_p3(h, &r); + } +} + + +static void select_custom_precomp_vartime(ge_precomp *t, const precomp_data_t base_precomp, int pos, signed char b) +{ + unsigned char bnegative = negative(b); + unsigned char babs = b - (((-bnegative) & b) << 1); + const ge_precomp* base; + + if (babs == 0) + { + ge_precomp_0(t); + } + else if (bnegative == 0) + { + base = &base_precomp[pos][babs - 1]; + fe_copy(t->yplusx, base->yplusx); + fe_copy(t->yminusx, base->yminusx); + fe_copy(t->xy2d, base->xy2d); + } + else + { + base = &base_precomp[pos][babs - 1]; + fe_copy(t->yplusx, base->yminusx); + fe_copy(t->yminusx, base->yplusx); + fe_neg(t->xy2d, base->xy2d); + } +} + + +void ge_scalarmult_precomp_vartime(ge_p3 *h, const precomp_data_t base_precomp, const unsigned char *a) +{ + signed char e[64]; + signed char carry; + ge_p1p1 r; + ge_p2 s; + ge_precomp t; + int i; + + for (i = 0; i < 32; ++i) { + e[2 * i + 0] = (a[i] >> 0) & 15; + e[2 * i + 1] = (a[i] >> 4) & 15; + } + /* each e[i] is between 0 and 15 */ + /* e[63] is between 0 and 7 */ + + carry = 0; + for (i = 0; i < 63; ++i) { + e[i] += carry; + carry = e[i] + 8; + carry >>= 4; + e[i] -= carry << 4; + } + e[63] += carry; + /* each e[i] is between -8 and 8 */ + + ge_p3_0(h); + for (i = 1; i < 64; i += 2) { + select_custom_precomp_vartime(&t, base_precomp, i / 2, e[i]); + ge_madd(&r, h, &t); ge_p1p1_to_p3(h, &r); + } + + ge_p3_dbl(&r, h); ge_p1p1_to_p2(&s, &r); + ge_p2_dbl(&r, &s); ge_p1p1_to_p2(&s, &r); + ge_p2_dbl(&r, &s); ge_p1p1_to_p2(&s, &r); + ge_p2_dbl(&r, &s); ge_p1p1_to_p3(h, &r); + + for (i = 0; i < 64; i += 2) { + select_custom_precomp_vartime(&t, base_precomp, i / 2, e[i]); + ge_madd(&r, h, &t); ge_p1p1_to_p3(h, &r); + } +} + +void ge_p3_to_precomp(ge_precomp *r, const ge_p3* p) +{ + fe recip; + fe x; + fe y; + //unsigned char s[32]; + + fe_invert(recip, p->Z); + fe_mul(x, p->X, recip); + fe_mul(y, p->Y, recip); + fe_sub(r->yminusx, y, x); + fe_add(r->yplusx, y, x); + fe_mul(r->xy2d, x, y); + fe_mul(r->xy2d, r->xy2d, fe_d2); + + // to get canonical representation and obtain the very same beautiful numbers for ge_base in crypto-ops-data.c (maybe unnecessary, TODO -- sowle) + //fe_tobytes(s, r->yminusx); fe_frombytes(r->yminusx, s); + //fe_tobytes(s, r->yplusx); fe_frombytes(r->yplusx, s); + //fe_tobytes(s, r->xy2d); fe_frombytes(r->xy2d, s); +} + diff --git a/src/crypto/crypto-ops.h b/src/crypto/crypto-ops.h index 318af2a2..67c170ef 100644 --- a/src/crypto/crypto-ops.h +++ b/src/crypto/crypto-ops.h @@ -60,6 +60,7 @@ void ge_double_scalarmult_base_vartime(ge_p2 *, const unsigned char *, const ge_ extern const fe fe_sqrtm1; extern const fe fe_d; +extern const fe fe_d2; int ge_frombytes_vartime(ge_p3 *, const unsigned char *); /* From ge_p1p1_to_p2.c */ @@ -118,6 +119,12 @@ void ge_sub(ge_p1p1 *, const ge_p3 *, const ge_cached *); void ge_double_scalarmult_base_vartime_p3(ge_p3 *r, const unsigned char *a, const ge_p3 *A, const unsigned char *b); void ge_scalarmult_vartime_p3(ge_p3 *r, const unsigned char *a, const ge_p3 *A); void ge_scalarmult_vartime_p3_v2(ge_p3 *r, const unsigned char *a, const ge_p3 *A); +void ge_scalarmult_base_vartime(ge_p3 *h, const unsigned char *a); + +/* precomp_data[i][j] = (j + 1) * 256^i * G */ +typedef ge_precomp (precomp_data_t)[32][8]; +void ge_scalarmult_precomp_vartime(ge_p3 *h, const precomp_data_t base_precomp, const unsigned char *a); +void ge_p3_to_precomp(ge_precomp *r, const ge_p3* p); extern const fe fe_ma2; extern const fe fe_ma; @@ -139,6 +146,7 @@ void sc_invert(unsigned char*, const unsigned char*); void fe_sq(fe h, const fe f); int fe_isnonzero(const fe f); +void fe_add(fe h, const fe f, const fe g); void fe_sub(fe h, const fe f, const fe g); void fe_mul(fe, const fe, const fe); void fe_frombytes(fe h, const unsigned char *s); diff --git a/src/crypto/crypto-sugar.cpp b/src/crypto/crypto-sugar.cpp index fe7cd63a..a0b3f4f9 100644 --- a/src/crypto/crypto-sugar.cpp +++ b/src/crypto/crypto-sugar.cpp @@ -1,5 +1,5 @@ -// Copyright (c) 2020-2021 Zano Project -// Copyright (c) 2020-2021 sowle (val@zano.org, crypto.sowle@gmail.com) +// Copyright (c) 2020-2023 Zano Project +// Copyright (c) 2020-2023 sowle (val@zano.org, crypto.sowle@gmail.com) // Distributed under the MIT/X11 software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. // @@ -10,5 +10,31 @@ namespace crypto { + const point_pc_t c_point_H {{ 20574939, 16670001, -29137604, 14614582, 24883426, 3503293, 2667523, 420631, 2267646, -4769165, -11764015, -12206428, -14187565, -2328122, -16242653, -788308, -12595746, -8251557, -10110987, 853396, -4982135, 6035602, -21214320, 16156349, 977218, 2807645, 31002271, 5694305, -16054128, 5644146, -15047429, -568775, -22568195, -8089957, -27721961, -10101877, -29459620, -13359100, -31515170, -6994674 }}; + const point_pc_t c_point_H2 {{ 1318371, 14804112, 12545972, -13482561, -12089798, -16020744, -21221907, -8410994, -33080606, 11275578, 3807637, 11185450, -23227561, -12892068, 1356866, -1025012, -8022738, -8139671, -20315029, -13916324, -6475650, -7025596, 12403179, -5139984, -12068178, 10445584, -14826705, -4927780, 13964546, 12525942, -2314107, -10566315, 32243863, 15603849, 5154154, 4276633, -20918372, -15718796, -26386151, 8434696 }}; + const point_pc_t c_point_U {{ 30807552, 984924, 23426137, -5598760, 7545909, 16325843, 993742, 2594106, -31962071, -959867, 16454190, -4091093, 1197656, 13586872, -9269020, -14133290, 1869274, 13360979, -24627258, -10663086, 2212027, 1198856, 20515811, 15870563, -23833732, 9839517, -19416306, 11567295, -4212053, 348531, -2671541, 484270, -19128078, 1236698, -16002690, 9321345, 9776066, 10711838, 11187722, -16371275 }}; + const point_pc_t c_point_X {{ 25635916, -5459446, 5768861, 5666160, -6357364, -12939311, 29490001, -4543704, -31266450, -2582476, 23705213, 9562626, -716512, 16560168, 7947407, 2039790, -2752711, 4742449, 3356761, 16338966, 17303421, -5790717, -5684800, 12062431, -3307947, 8139265, -26544839, 12058874, 3452748, 3359034, 26514848, -6060876, 31255039, 11154418, -21741975, -3782423, -19871841, 5729859, 21754676, -12454027 }}; + const point_pc_t c_point_H_plus_G {{ 12291435, 3330843, -3390294, 13894858, -1099584, -6848191, 12040668, -15950068, -7494633, 12566672, -5526901, -16645799, -31081168, -1095427, -13082463, 4573480, -11255691, 4344628, 33477173, 11137213, -3837023, -12436594, -8471924, -814016, 10785607, 9492721, 10992667, 7406385, -5687296, -127915, -6229107, -9324867, 558657, 6493750, 4895261, 12642545, 9549220, 696086, 21894285, -10521807 }}; + const point_pc_t c_point_H_minus_G {{ -28347682, 3523701, -3380175, -14453727, 4238027, -6032522, 20235758, 4091609, 12557126, -8064113, 4212476, -13419094, -114185, -7650727, -24238, 16663404, 23676363, -6819610, 18286466, 8714527, -3837023, -12436594, -8471924, -814016, 10785607, 9492721, 10992667, 7406385, -5687296, -127915, -20450317, 13815641, -11604061, -447489, 27380225, 9400847, -8551293, -1173627, -28110171, 14241295 }}; + + + void construct_precomp_data(precomp_data_t precomp_data, const point_t& point) + { + point_t A = point; + for(size_t i = 0; i < 32; ++i) + { + point_t B = A; + ge_p3_to_precomp(&precomp_data[i][0], &B.m_p3); + for(size_t j = 1; j < 8; ++j) + { + B += A; // B = (j+1) * 256^i * point + ge_p3_to_precomp(&precomp_data[i][j], &B.m_p3); + } + + if (i != 31) + A.modify_mul_pow_2(8); // *= 256 + } + } + } // namespace crypto diff --git a/src/crypto/crypto-sugar.h b/src/crypto/crypto-sugar.h index 70e5a210..d793898b 100644 --- a/src/crypto/crypto-sugar.h +++ b/src/crypto/crypto-sugar.h @@ -729,14 +729,15 @@ namespace crypto friend point_t operator*(const scalar_t& lhs, const point_t& rhs) { point_t result; - ge_scalarmult_p3(&result.m_p3, lhs.m_s, &rhs.m_p3); + //ge_scalarmult_p3(&result.m_p3, lhs.m_s, &rhs.m_p3); + ge_scalarmult_vartime_p3(&result.m_p3, lhs.m_s, &rhs.m_p3); return result; } point_t& operator*=(const scalar_t& rhs) { - // TODO: ge_scalarmult_vartime_p3 - ge_scalarmult_p3(&m_p3, rhs.m_s, &m_p3); + //ge_scalarmult_p3(&m_p3, rhs.m_s, &m_p3); + ge_scalarmult_vartime_p3(&m_p3, rhs.m_s, &m_p3); return *this; } @@ -745,7 +746,8 @@ namespace crypto point_t result; scalar_t reciprocal; sc_invert(&reciprocal.m_s[0], &rhs.m_s[0]); - ge_scalarmult_p3(&result.m_p3, &reciprocal.m_s[0], &lhs.m_p3); + //ge_scalarmult_p3(&result.m_p3, &reciprocal.m_s[0], &lhs.m_p3); + ge_scalarmult_vartime_p3(&result.m_p3, &reciprocal.m_s[0], &lhs.m_p3); return result; } @@ -907,7 +909,8 @@ namespace crypto friend point_t operator*(const scalar_t& lhs, const point_g_t&) { point_t result; - ge_scalarmult_base(&result.m_p3, &lhs.m_s[0]); + //ge_scalarmult_base(&result.m_p3, &lhs.m_s[0]); + ge_scalarmult_base_vartime(&result.m_p3, &lhs.m_s[0]); return result; } @@ -916,7 +919,8 @@ namespace crypto point_t result; scalar_t reciprocal; sc_invert(&reciprocal.m_s[0], &rhs.m_s[0]); - ge_scalarmult_base(&result.m_p3, &reciprocal.m_s[0]); + //ge_scalarmult_base(&result.m_p3, &reciprocal.m_s[0]); + ge_scalarmult_base_vartime(&result.m_p3, &reciprocal.m_s[0]); return result; } @@ -925,6 +929,41 @@ namespace crypto }; // struct point_g_t + void construct_precomp_data(precomp_data_t precomp_data, const point_t& point); + + // + // point_pc_t -- point with 30kB of precomputed data, which make possible to do very fast single scalar multiplication + // + struct point_pc_t : public point_t + { + explicit point_pc_t(const int32_t(&v)[40]) + : point_t(v) + { + construct_precomp_data(m_precomp_data, *this); + } + + friend point_t operator*(const scalar_t& lhs, const point_pc_t& self) + { + point_t result; + ge_scalarmult_precomp_vartime(&result.m_p3, self.m_precomp_data, &lhs.m_s[0]); + return result; + } + + friend point_t operator/(const point_pc_t& self, const scalar_t& rhs) + { + point_t result; + scalar_t reciprocal; + sc_invert(&reciprocal.m_s[0], &rhs.m_s[0]); + ge_scalarmult_precomp_vartime(&result.m_p3, self.m_precomp_data, &reciprocal.m_s[0]); + return result; + } + + static_assert(sizeof(crypto::public_key) == 32, "size error"); + + precomp_data_t m_precomp_data; + }; // struct point_g_t + + // // vector of scalars // @@ -1087,12 +1126,20 @@ namespace crypto static constexpr point_t c_point_0 {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }}; static constexpr point_g_t c_point_G {{ 25485296, 5318399, 8791791, -8299916, -14349720, 6939349, -3324311, -7717049, 7287234, -6577708, -758052, -1832720, 13046421, -4857925, 6576754, 14371947, -13139572, 6845540, -2198883, -4003719, -947565, 6097708, -469190, 10704810, -8556274, -15589498, -16424464, -16608899, 14028613, -5004649, 6966464, -2456167, 7033433, 6781840, 28785542, 12262365, -2659449, 13959020, -21013759, -5262166 }}; - static constexpr point_t c_point_H {{ 20574939, 16670001, -29137604, 14614582, 24883426, 3503293, 2667523, 420631, 2267646, -4769165, -11764015, -12206428, -14187565, -2328122, -16242653, -788308, -12595746, -8251557, -10110987, 853396, -4982135, 6035602, -21214320, 16156349, 977218, 2807645, 31002271, 5694305, -16054128, 5644146, -15047429, -568775, -22568195, -8089957, -27721961, -10101877, -29459620, -13359100, -31515170, -6994674 }}; - static constexpr point_t c_point_H2 {{ 1318371, 14804112, 12545972, -13482561, -12089798, -16020744, -21221907, -8410994, -33080606, 11275578, 3807637, 11185450, -23227561, -12892068, 1356866, -1025012, -8022738, -8139671, -20315029, -13916324, -6475650, -7025596, 12403179, -5139984, -12068178, 10445584, -14826705, -4927780, 13964546, 12525942, -2314107, -10566315, 32243863, 15603849, 5154154, 4276633, -20918372, -15718796, -26386151, 8434696 }}; - static constexpr point_t c_point_U {{ 30807552, 984924, 23426137, -5598760, 7545909, 16325843, 993742, 2594106, -31962071, -959867, 16454190, -4091093, 1197656, 13586872, -9269020, -14133290, 1869274, 13360979, -24627258, -10663086, 2212027, 1198856, 20515811, 15870563, -23833732, 9839517, -19416306, 11567295, -4212053, 348531, -2671541, 484270, -19128078, 1236698, -16002690, 9321345, 9776066, 10711838, 11187722, -16371275 }}; - static constexpr point_t c_point_X {{ 25635916, -5459446, 5768861, 5666160, -6357364, -12939311, 29490001, -4543704, -31266450, -2582476, 23705213, 9562626, -716512, 16560168, 7947407, 2039790, -2752711, 4742449, 3356761, 16338966, 17303421, -5790717, -5684800, 12062431, -3307947, 8139265, -26544839, 12058874, 3452748, 3359034, 26514848, -6060876, 31255039, 11154418, -21741975, -3782423, -19871841, 5729859, 21754676, -12454027 }}; - static constexpr point_t c_point_H_plus_G {{ 12291435, 3330843, -3390294, 13894858, -1099584, -6848191, 12040668, -15950068, -7494633, 12566672, -5526901, -16645799, -31081168, -1095427, -13082463, 4573480, -11255691, 4344628, 33477173, 11137213, -3837023, -12436594, -8471924, -814016, 10785607, 9492721, 10992667, 7406385, -5687296, -127915, -6229107, -9324867, 558657, 6493750, 4895261, 12642545, 9549220, 696086, 21894285, -10521807 }}; - static constexpr point_t c_point_H_minus_G {{ -28347682, 3523701, -3380175, -14453727, 4238027, -6032522, 20235758, 4091609, 12557126, -8064113, 4212476, -13419094, -114185, -7650727, -24238, 16663404, 23676363, -6819610, 18286466, 8714527, -3837023, -12436594, -8471924, -814016, 10785607, 9492721, 10992667, 7406385, -5687296, -127915, -20450317, 13815641, -11604061, -447489, 27380225, 9400847, -8551293, -1173627, -28110171, 14241295 }}; + + extern const point_pc_t c_point_H; + extern const point_pc_t c_point_H2; + extern const point_pc_t c_point_U; + extern const point_pc_t c_point_X; + extern const point_pc_t c_point_H_plus_G; + extern const point_pc_t c_point_H_minus_G; + + //static constexpr point_t c_point_H {{ 20574939, 16670001, -29137604, 14614582, 24883426, 3503293, 2667523, 420631, 2267646, -4769165, -11764015, -12206428, -14187565, -2328122, -16242653, -788308, -12595746, -8251557, -10110987, 853396, -4982135, 6035602, -21214320, 16156349, 977218, 2807645, 31002271, 5694305, -16054128, 5644146, -15047429, -568775, -22568195, -8089957, -27721961, -10101877, -29459620, -13359100, -31515170, -6994674 }}; + //static constexpr point_t c_point_H2 {{ 1318371, 14804112, 12545972, -13482561, -12089798, -16020744, -21221907, -8410994, -33080606, 11275578, 3807637, 11185450, -23227561, -12892068, 1356866, -1025012, -8022738, -8139671, -20315029, -13916324, -6475650, -7025596, 12403179, -5139984, -12068178, 10445584, -14826705, -4927780, 13964546, 12525942, -2314107, -10566315, 32243863, 15603849, 5154154, 4276633, -20918372, -15718796, -26386151, 8434696 }}; + //static constexpr point_t c_point_U {{ 30807552, 984924, 23426137, -5598760, 7545909, 16325843, 993742, 2594106, -31962071, -959867, 16454190, -4091093, 1197656, 13586872, -9269020, -14133290, 1869274, 13360979, -24627258, -10663086, 2212027, 1198856, 20515811, 15870563, -23833732, 9839517, -19416306, 11567295, -4212053, 348531, -2671541, 484270, -19128078, 1236698, -16002690, 9321345, 9776066, 10711838, 11187722, -16371275 }}; + //static constexpr point_t c_point_X {{ 25635916, -5459446, 5768861, 5666160, -6357364, -12939311, 29490001, -4543704, -31266450, -2582476, 23705213, 9562626, -716512, 16560168, 7947407, 2039790, -2752711, 4742449, 3356761, 16338966, 17303421, -5790717, -5684800, 12062431, -3307947, 8139265, -26544839, 12058874, 3452748, 3359034, 26514848, -6060876, 31255039, 11154418, -21741975, -3782423, -19871841, 5729859, 21754676, -12454027 }}; + //static constexpr point_t c_point_H_plus_G {{ 12291435, 3330843, -3390294, 13894858, -1099584, -6848191, 12040668, -15950068, -7494633, 12566672, -5526901, -16645799, -31081168, -1095427, -13082463, 4573480, -11255691, 4344628, 33477173, 11137213, -3837023, -12436594, -8471924, -814016, 10785607, 9492721, 10992667, 7406385, -5687296, -127915, -6229107, -9324867, 558657, 6493750, 4895261, 12642545, 9549220, 696086, 21894285, -10521807 }}; + //static constexpr point_t c_point_H_minus_G {{ -28347682, 3523701, -3380175, -14453727, 4238027, -6032522, 20235758, 4091609, 12557126, -8064113, 4212476, -13419094, -114185, -7650727, -24238, 16663404, 23676363, -6819610, 18286466, 8714527, -3837023, -12436594, -8471924, -814016, 10785607, 9492721, 10992667, 7406385, -5687296, -127915, -20450317, 13815641, -11604061, -447489, 27380225, 9400847, -8551293, -1173627, -28110171, 14241295 }}; // // hash functions' helper diff --git a/src/currency_core/currency_basic.h b/src/currency_core/currency_basic.h index 0fbab8a8..228e2516 100644 --- a/src/currency_core/currency_basic.h +++ b/src/currency_core/currency_basic.h @@ -60,7 +60,7 @@ namespace currency // Using C++17 extended aggregate initialization (P0017R1). C++17, finally! -- sowle const static crypto::public_key native_coin_asset_id = {{'\xd6', '\x32', '\x9b', '\x5b', '\x1f', '\x7c', '\x08', '\x05', '\xb5', '\xc3', '\x45', '\xf4', '\x95', '\x75', '\x54', '\x00', '\x2a', '\x2f', '\x55', '\x78', '\x45', '\xf6', '\x4d', '\x76', '\x45', '\xda', '\xe0', '\xe0', '\x51', '\xa6', '\x49', '\x8a'}}; // == crypto::c_point_H, checked in crypto_constants const static crypto::public_key native_coin_asset_id_1div8 = {{'\x74', '\xc3', '\x2d', '\x3e', '\xaa', '\xfa', '\xfc', '\x62', '\x3b', '\xf4', '\x83', '\xe8', '\x58', '\xd4', '\x2e', '\x8b', '\xf4', '\xec', '\x7d', '\xf0', '\x64', '\xad', '\xa2', '\xe3', '\x49', '\x34', '\x46', '\x9c', '\xff', '\x6b', '\x62', '\x68'}}; // == 1/8 * crypto::c_point_H, checked in crypto_constants - const static crypto::point_t native_coin_asset_id_pt = crypto::c_point_H; + const static crypto::point_t native_coin_asset_id_pt {{ 20574939, 16670001, -29137604, 14614582, 24883426, 3503293, 2667523, 420631, 2267646, -4769165, -11764015, -12206428, -14187565, -2328122, -16242653, -788308, -12595746, -8251557, -10110987, 853396, -4982135, 6035602, -21214320, 16156349, 977218, 2807645, 31002271, 5694305, -16054128, 5644146, -15047429, -568775, -22568195, -8089957, -27721961, -10101877, -29459620, -13359100, -31515170, -6994674 }}; // c_point_H const static wide_difficulty_type global_difficulty_pow_starter = DIFFICULTY_POW_STARTER; const static wide_difficulty_type global_difficulty_pos_starter = DIFFICULTY_POS_STARTER; diff --git a/tests/functional_tests/crypto_tests.cpp b/tests/functional_tests/crypto_tests.cpp index 4afdbe5e..3a0d63ec 100644 --- a/tests/functional_tests/crypto_tests.cpp +++ b/tests/functional_tests/crypto_tests.cpp @@ -1695,6 +1695,32 @@ TEST(crypto, scalar_get_bits) return true; } + +TEST(crypto, scalarmult_base_vartime) +{ + auto check_for_x = [&](const scalar_t& x) -> bool { + point_t P, P2; + ge_scalarmult_base_vartime(&P.m_p3, x.m_s); + ge_scalarmult_base(&P2.m_p3, x.m_s); + return (P - P2).is_zero(); + }; + + ASSERT_TRUE(check_for_x(c_scalar_0)); + ASSERT_TRUE(check_for_x(c_scalar_1)); + ASSERT_TRUE(check_for_x(c_scalar_1div8)); + ASSERT_TRUE(check_for_x(c_scalar_Lm1)); + ASSERT_TRUE(check_for_x(c_scalar_L)); + + for(size_t i = 0; i < 1000; ++i) + { + scalar_t x = scalar_t::random(); + ASSERT_TRUE(check_for_x(x)); + } + + return true; +} + + template bool crypto_msm_runner(size_t N, size_t low_bits_to_clear, size_t high_bits_to_clear) { diff --git a/tests/functional_tests/crypto_tests_performance.h b/tests/functional_tests/crypto_tests_performance.h index 68a8a7e7..ed7e64b2 100644 --- a/tests/functional_tests/crypto_tests_performance.h +++ b/tests/functional_tests/crypto_tests_performance.h @@ -41,6 +41,42 @@ inline std::ostream &operator <<(std::ostream &o, const crypto::ge_precomp v) return o; } +TEST(crypto, ge_precomp) +{ + //precomp_data_t G_precomp = {}; + //construct_precomp_data(G_precomp, c_point_G); + //std::cout << "size of G_precomp: " << sizeof G_precomp << " bytes" << ENDL; + //for(size_t i = 0; i < 32; ++i) + // for(size_t j = 0; j < 8; ++j) + // std::cout << "i: " << i << ", j: " << j << ", precomp: " << ENDL << G_precomp[i][j] << ENDL; + + precomp_data_t H_precomp = {}; + construct_precomp_data(H_precomp, c_point_H); + + auto check_for_x = [&](const scalar_t& x) -> bool { + point_t P; + ge_scalarmult_precomp_vartime(&P.m_p3, H_precomp, x.m_s); + return P == x * c_point_H; + }; + + ASSERT_TRUE(check_for_x(c_scalar_0)); + ASSERT_TRUE(check_for_x(c_scalar_1)); + ASSERT_TRUE(check_for_x(c_scalar_1div8)); + ASSERT_TRUE(check_for_x(c_scalar_Lm1)); + ASSERT_TRUE(check_for_x(c_scalar_L)); + + for(size_t i = 0; i < 1000; ++i) + { + scalar_t x = scalar_t::random(); + ASSERT_TRUE(check_for_x(x)); + } + + return true; +} + + + + TEST(perf, primitives) { struct helper @@ -315,7 +351,7 @@ TEST(perf, primitives) return HASH_64_VEC(points_cached); }); - run("ge_add(p3 + p3)", 50000, [](timer_t& t, size_t rounds) { + run("ge_add(p1p1 = p3 + cached)", 50000, [](timer_t& t, size_t rounds) { std::vector rnd_indecies; helper::make_rnd_indicies(rnd_indecies, rounds); std::vector points_cached(rounds); @@ -530,7 +566,7 @@ TEST(perf, primitives) return HASH_64_VEC(points_p3); }); - run("ge_scalarmult_base()", 5000, [](timer_t& t, size_t rounds) { + run("ge_scalarmult_base()", 10000, [](timer_t& t, size_t rounds) { std::vector rnd_indecies; helper::make_rnd_indicies(rnd_indecies, rounds); @@ -555,6 +591,87 @@ TEST(perf, primitives) return HASH_64_VEC(points_p3); }); + run("construct_precomp_data()", 300, [](timer_t& t, size_t rounds) { + std::vector rnd_indecies; + helper::make_rnd_indicies(rnd_indecies, rounds); + + unsigned char s[32] = {}; + std::vector random_points(rounds); + for (size_t i = 0; i < rounds; ++i) + { + s[0] = i; + ge_p2 p2; + ge_fromfe_frombytes_vartime(&p2, s); + ge_p2_to_p3(&random_points[i].m_p3, &p2); + } + + std::vector points_p3(rounds); + precomp_data_t precomp_data; + uint64_t result = 0; + t.start(); + for (size_t i = 0; i < rounds; ++i) + { + construct_precomp_data(precomp_data, random_points[rnd_indecies[i]]); + result ^= (precomp_data[1][1].xy2d[1] + precomp_data[31][7].xy2d[9]); + } + t.stop(); + + return result; + }); + + run("ge_scalarmult_precomp_vartime()", 10000, [](timer_t& t, size_t rounds) { + std::vector rnd_indecies; + helper::make_rnd_indicies(rnd_indecies, rounds); + + scalar_t x; + x.make_random(); + + std::vector scalars(rounds); + for (size_t i = 0; i < rounds; ++i) + { + scalar_t x = x + x + x; + memcpy(&scalars[i].data, x.data(), 32); + } + + precomp_data_t precomp_data; + construct_precomp_data(precomp_data, x * c_point_X); + + std::vector points_p3(rounds); + t.start(); + for (size_t i = 0; i < rounds; ++i) + { + ge_scalarmult_precomp_vartime(&points_p3[i], precomp_data, (const unsigned char*)&scalars[rnd_indecies[i]]); + } + t.stop(); + + return HASH_64_VEC(points_p3); + }); + + run("ge_scalarmult_base_vartime()", 10000, [](timer_t& t, size_t rounds) { + std::vector rnd_indecies; + helper::make_rnd_indicies(rnd_indecies, rounds); + + scalar_t x; + x.make_random(); + + std::vector scalars(rounds); + for (size_t i = 0; i < rounds; ++i) + { + scalar_t x = x + x + x; + memcpy(&scalars[i].data, x.data(), 32); + } + + std::vector points_p3(rounds); + t.start(); + for (size_t i = 0; i < rounds; ++i) + { + ge_scalarmult_base_vartime(&points_p3[i], (const unsigned char*)&scalars[rnd_indecies[i]]); + } + t.stop(); + + return HASH_64_VEC(points_p3); + }); + run("ge_mul8_p3()", 5000, [](timer_t& t, size_t rounds) { std::vector rnd_indecies; helper::make_rnd_indicies(rnd_indecies, rounds); @@ -738,5 +855,402 @@ TEST(perf, primitives) return HASH_64_VEC(result); }); + run("get_bits x 10", 20000, [](timer_t& t, size_t rounds) { + std::vector rnd_indecies; + helper::make_rnd_indicies(rnd_indecies, rounds); + + scalar_vec_t data; + data.resize_and_make_random(rounds); + + std::vector result(rounds); + t.start(); + for (size_t i = 0; i < rounds; ++i) + { + auto& x = data[rnd_indecies[i]]; + result[i] = + x.get_bits(x.m_s[11], x.m_s[21] % 65) ^ + x.get_bits(x.m_s[12], x.m_s[22] % 65) ^ + x.get_bits(x.m_s[13], x.m_s[23] % 65) ^ + x.get_bits(x.m_s[14], x.m_s[24] % 65) ^ + x.get_bits(x.m_s[15], x.m_s[25] % 65) ^ + x.get_bits(x.m_s[16], x.m_s[26] % 65) ^ + x.get_bits(x.m_s[17], x.m_s[27] % 65) ^ + x.get_bits(x.m_s[18], x.m_s[28] % 65) ^ + x.get_bits(x.m_s[19], x.m_s[29] % 65) ^ + x.get_bits(x.m_s[20], x.m_s[30] % 65); + } + t.stop(); + + return HASH_64_VEC(result); + }); + + return true; +} // TEST + + + +//////////////////////////////////////////////////////////////////////////////// +///////////////// v3 + + + +///////////////// v4 + + +template +bool msm_and_check_zero_pippenger_v4(const scalar_vec_t& g_scalars, const scalar_vec_t& h_scalars, const point_t& summand, size_t c) +{ + // TODO: with c = 8 and with direct access got much worse result than with c = 7 and get_bits(), consider checking again for bigger datasets (N>256) + CHECK_AND_ASSERT_MES(g_scalars.size() <= CT::c_bpp_mn_max, false, "g_scalars oversized"); + CHECK_AND_ASSERT_MES(h_scalars.size() <= CT::c_bpp_mn_max, false, "h_scalars oversized"); + CHECK_AND_ASSERT_MES(c < 10, false, "c is too big"); + + size_t C = 1ull << c; + + // k_max * c + (c-1) >= max_bit_idx + // + // max_bit_idx - (c - 1) max_bit_idx - (c - 1) + (c - 1) max_bit_idx + // k_max = ceil ( --------------------- ) = floor ( ------------------------------ ) = floor ( ----------- ) + // c c c + const size_t b = 253; // the maximum number of bits in x https://eprint.iacr.org/2022/999.pdf TODO: we may also scan for maximum bit used in all the scalars if all the scalars are small + const size_t max_bit_idx = b - 1; + const size_t k_max = max_bit_idx / c; + const size_t K = k_max + 1; + + std::unique_ptr buckets( new point_t[C * K] ); + std::vector buckets_inited(C * K); + + // first loop, calculate partial bucket sums + for (size_t n = 0; n < g_scalars.size(); ++n) + { + for (size_t k = 0; k < K; ++k) + { + uint64_t l = g_scalars[n].get_bits(k * c, c); // l in [0; 2^c-1] + if (l != 0) + { + size_t bucket_id = l * K + k; + if (buckets_inited[bucket_id]) + buckets[bucket_id] += CT::get_generator(false, n); + else + { + buckets[bucket_id] = CT::get_generator(false, n); + buckets_inited[bucket_id] = true; + } + } + } + } + + for (size_t n = 0; n < h_scalars.size(); ++n) + { + for (size_t k = 0; k < K; ++k) + { + uint64_t l = h_scalars[n].get_bits(k * c, c); // l in [0; 2^c-1] + if (l != 0) + { + size_t bucket_id = l * K + k; + if (buckets_inited[bucket_id]) + buckets[bucket_id] += CT::get_generator(true, n); + else + { + buckets[bucket_id] = CT::get_generator(true, n); + buckets_inited[bucket_id] = true; + } + } + } + } + + // the second loop + // S[l, k] = S[l-1, k] + B[l, k] + // G[k] = sum{1..C-1} S[l, k] + std::unique_ptr Sk( new point_t[K] ); + std::vector Sk_inited(K); + std::unique_ptr Gk( new point_t[K] ); + std::vector Gk_inited(K); + for (size_t l = C - 1; l > 0; --l) + { + for (size_t k = 0; k < K; ++k) + { + size_t bucket_id = l * K + k; + if (buckets_inited[bucket_id]) + { + if (Sk_inited[k]) + Sk[k] += buckets[bucket_id]; + else + { + Sk[k] = buckets[bucket_id]; + Sk_inited[k] = true; + } + } + + if (Sk_inited[k]) + { + if (Gk_inited[k]) + Gk[k] += Sk[k]; + else + { + Gk[k] = Sk[k]; + Gk_inited[k] = true; + } + } + } + } + + // the third loop: Horner’s rule + point_t result = Gk_inited[K - 1] ? Gk[K - 1] : c_point_0; + for (size_t k = K - 2; k != SIZE_MAX; --k) + { + result.modify_mul_pow_2(c); + if (Gk_inited[k]) + result += Gk[k]; + } + + result += summand; + + if (!result.is_zero()) + { + LOG_PRINT_L0("msm result is non zero: " << result); + return false; + } + + return true; +} + + + +//////////////////////////////////////////////////////////////////////////////// + +//template +//struct mes_msm_and_check_zero_pippenger_v1 +//{ +// static bool msm_and_check_zero(const scalar_vec_t& g_scalars, const scalar_vec_t& h_scalars, const point_t& summand, size_t c) +// { +// return msm_and_check_zero_pippenger_v1(g_scalars, h_scalars, summand, c); +// } +//}; +// +//template +//struct mes_msm_and_check_zero_pippenger_v2 +//{ +// static bool msm_and_check_zero(const scalar_vec_t& g_scalars, const scalar_vec_t& h_scalars, const point_t& summand, size_t c) +// { +// return msm_and_check_zero_pippenger_v2(g_scalars, h_scalars, summand, c); +// } +//}; + +template +struct mes_msm_and_check_zero_pippenger_v3 +{ + static bool msm_and_check_zero(const scalar_vec_t& g_scalars, const scalar_vec_t& h_scalars, const point_t& summand, size_t c) + { + return msm_and_check_zero_pippenger_v3(g_scalars, h_scalars, summand, c); + } +}; + +template +struct mes_msm_and_check_zero_pippenger_v4 +{ + static bool msm_and_check_zero(const scalar_vec_t& g_scalars, const scalar_vec_t& h_scalars, const point_t& summand, size_t c) + { + return msm_and_check_zero_pippenger_v4(g_scalars, h_scalars, summand, c); + } +}; + + + +struct pme_runner_i +{ + virtual ~pme_runner_i() {} + virtual bool iteration(bool warmup) = 0; +}; + +template typename selector_t> +struct pme_runner_t : public pme_runner_i +{ + pme_runner_t(const char* testname_, size_t pip_partition_bits_c) + : testname(testname_) + , pip_partition_bits_c(pip_partition_bits_c) + { + testname += std::string(", ") + std::string(typeid(selector_t).name()).erase(0, 11) + std::string(", c = ") + epee::string_tools::num_to_string_fast(pip_partition_bits_c); + std::cout << testname << ENDL; + } + virtual ~pme_runner_t() + { + if (timings.empty()) + return; + + uint64_t median = 0; + auto median_it = timings.begin() + timings.size() / 2; + std::nth_element(timings.begin(), median_it, timings.end()); + median = *median_it; + if (timings.size() % 2 == 0) + { + auto max_it = std::max_element(timings.begin(), median_it); + median = (median + *max_it) / 2; + } + + uint64_t total_time = std::accumulate(timings.begin(), timings.end(), 0); + std::cout << std::left << std::setw(100) << testname << " : " << std::setw(5) << median << " (median), " << std::setw(5) << total_time / timings.size() << " (avg), mcs" << ENDL; + } + + virtual bool iteration(bool warmup) + { + scalar_vec_t g_scalars, h_scalars; + g_scalars.resize_and_make_random(N); + g_scalars[0] = c_scalar_Lm1; + //std::cout << "bit 251: " << g_scalars[0].get_bit(251) << ", bit 252: " << g_scalars[0].get_bit(252) << ENDL; + h_scalars.resize_and_make_random(N); + point_t sum = c_point_0; + for(size_t i = 0; i < N; ++i) + { + //g_scalars[i].m_u64[3] = 0; + //h_scalars[i].m_u64[3] = 0; + //g_scalars[i].m_s[31] = 0; + //h_scalars[i].m_s[31] = 0; + sum += g_scalars[i] * CT::get_generator(false, i) + h_scalars[i] * CT::get_generator(true, i); + } + + TIME_MEASURE_START(t); + bool r = typename selector_t::msm_and_check_zero(g_scalars, h_scalars, -sum, pip_partition_bits_c); + TIME_MEASURE_FINISH(t); + ASSERT_TRUE(r); + + if (!warmup) + timings.push_back(t); + return true; + } + + std::vector timings; + std::string testname; + size_t pip_partition_bits_c; +}; + + +TEST(perf, msm) +{ + bool r = false; + + std::deque> runners; + //runners.emplace_front(std::make_unique< pme_runner_t<128, bpp_crypto_trait_Zarcanum, mes_msm_and_check_zero_pippenger_v1> >("Zarcanum, BPPE, 128", 1)); + //runners.emplace_front(std::make_unique< pme_runner_t<128, bpp_crypto_trait_Zarcanum, mes_msm_and_check_zero_pippenger_v1> >("Zarcanum, BPPE, 128", 2)); + //runners.emplace_front(std::make_unique< pme_runner_t<128, bpp_crypto_trait_Zarcanum, mes_msm_and_check_zero_pippenger_v1> >("Zarcanum, BPPE, 128", 3)); + //runners.emplace_front(std::make_unique< pme_runner_t<128, bpp_crypto_trait_Zarcanum, mes_msm_and_check_zero_pippenger_v1> >("Zarcanum, BPPE, 128", 4)); + //runners.emplace_front(std::make_unique< pme_runner_t<128, bpp_crypto_trait_Zarcanum, mes_msm_and_check_zero_pippenger_v1> >("Zarcanum, BPPE, 128", 5)); + //runners.emplace_front(std::make_unique< pme_runner_t<128, bpp_crypto_trait_Zarcanum, mes_msm_and_check_zero_pippenger_v1> >("Zarcanum, BPPE, 128", 6)); + //runners.emplace_front(std::make_unique< pme_runner_t<128, bpp_crypto_trait_Zarcanum, mes_msm_and_check_zero_pippenger_v1> >("Zarcanum, BPPE, 128", 7)); + //runners.emplace_front(std::make_unique< pme_runner_t<128, bpp_crypto_trait_Zarcanum, mes_msm_and_check_zero_pippenger_v1> >("Zarcanum, BPPE, 128", 8)); + //runners.emplace_front(std::make_unique< pme_runner_t<128, bpp_crypto_trait_Zarcanum, mes_msm_and_check_zero_pippenger_v1> >("Zarcanum, BPPE, 128", 9)); + //runners.emplace_front(std::make_unique< pme_runner_t<256, bpp_crypto_trait_ZC_out, mes_msm_and_check_zero_pippenger_v1> >("ZC out, BPP, 256", 1)); + //runners.emplace_front(std::make_unique< pme_runner_t<256, bpp_crypto_trait_ZC_out, mes_msm_and_check_zero_pippenger_v1> >("ZC out, BPP, 256", 2)); + //runners.emplace_front(std::make_unique< pme_runner_t<256, bpp_crypto_trait_ZC_out, mes_msm_and_check_zero_pippenger_v1> >("ZC out, BPP, 256", 3)); + //runners.emplace_front(std::make_unique< pme_runner_t<256, bpp_crypto_trait_ZC_out, mes_msm_and_check_zero_pippenger_v1> >("ZC out, BPP, 256", 4)); + //runners.emplace_front(std::make_unique< pme_runner_t<256, bpp_crypto_trait_ZC_out, mes_msm_and_check_zero_pippenger_v1> >("ZC out, BPP, 256", 5)); + //runners.emplace_front(std::make_unique< pme_runner_t<256, bpp_crypto_trait_ZC_out, mes_msm_and_check_zero_pippenger_v1> >("ZC out, BPP, 256", 6)); + //runners.emplace_front(std::make_unique< pme_runner_t<256, bpp_crypto_trait_ZC_out, mes_msm_and_check_zero_pippenger_v1> >("ZC out, BPP, 256", 7)); + //runners.emplace_front(std::make_unique< pme_runner_t<256, bpp_crypto_trait_ZC_out, mes_msm_and_check_zero_pippenger_v1> >("ZC out, BPP, 256", 8)); + //runners.emplace_front(std::make_unique< pme_runner_t<256, bpp_crypto_trait_ZC_out, mes_msm_and_check_zero_pippenger_v1> >("ZC out, BPP, 256", 9)); + + //runners.emplace_front(std::make_unique< pme_runner_t<128, bpp_crypto_trait_Zarcanum, mes_msm_and_check_zero_pippenger_v2> >("Zarcanum, BPPE, 128", 1)); + //runners.emplace_front(std::make_unique< pme_runner_t<128, bpp_crypto_trait_Zarcanum, mes_msm_and_check_zero_pippenger_v2> >("Zarcanum, BPPE, 128", 2)); + //runners.emplace_front(std::make_unique< pme_runner_t<128, bpp_crypto_trait_Zarcanum, mes_msm_and_check_zero_pippenger_v2> >("Zarcanum, BPPE, 128", 3)); + //runners.emplace_front(std::make_unique< pme_runner_t<128, bpp_crypto_trait_Zarcanum, mes_msm_and_check_zero_pippenger_v2> >("Zarcanum, BPPE, 128", 4)); + //runners.emplace_front(std::make_unique< pme_runner_t<128, bpp_crypto_trait_Zarcanum, mes_msm_and_check_zero_pippenger_v2> >("Zarcanum, BPPE, 128", 5)); + //runners.emplace_front(std::make_unique< pme_runner_t<128, bpp_crypto_trait_Zarcanum, mes_msm_and_check_zero_pippenger_v2> >("Zarcanum, BPPE, 128", 6)); + //runners.emplace_front(std::make_unique< pme_runner_t<128, bpp_crypto_trait_Zarcanum, mes_msm_and_check_zero_pippenger_v2> >("Zarcanum, BPPE, 128", 7)); + //runners.emplace_front(std::make_unique< pme_runner_t<128, bpp_crypto_trait_Zarcanum, mes_msm_and_check_zero_pippenger_v2> >("Zarcanum, BPPE, 128", 8)); + //runners.emplace_front(std::make_unique< pme_runner_t<128, bpp_crypto_trait_Zarcanum, mes_msm_and_check_zero_pippenger_v2> >("Zarcanum, BPPE, 128", 9)); + //runners.emplace_front(std::make_unique< pme_runner_t<256, bpp_crypto_trait_ZC_out, mes_msm_and_check_zero_pippenger_v2> >("ZC out, BPP, 256", 1)); + //runners.emplace_front(std::make_unique< pme_runner_t<256, bpp_crypto_trait_ZC_out, mes_msm_and_check_zero_pippenger_v2> >("ZC out, BPP, 256", 2)); + //runners.emplace_front(std::make_unique< pme_runner_t<256, bpp_crypto_trait_ZC_out, mes_msm_and_check_zero_pippenger_v2> >("ZC out, BPP, 256", 3)); + //runners.emplace_front(std::make_unique< pme_runner_t<256, bpp_crypto_trait_ZC_out, mes_msm_and_check_zero_pippenger_v2> >("ZC out, BPP, 256", 4)); + //runners.emplace_front(std::make_unique< pme_runner_t<256, bpp_crypto_trait_ZC_out, mes_msm_and_check_zero_pippenger_v2> >("ZC out, BPP, 256", 5)); + //runners.emplace_front(std::make_unique< pme_runner_t<256, bpp_crypto_trait_ZC_out, mes_msm_and_check_zero_pippenger_v2> >("ZC out, BPP, 256", 6)); + //runners.emplace_front(std::make_unique< pme_runner_t<256, bpp_crypto_trait_ZC_out, mes_msm_and_check_zero_pippenger_v2> >("ZC out, BPP, 256", 7)); + //runners.emplace_front(std::make_unique< pme_runner_t<256, bpp_crypto_trait_ZC_out, mes_msm_and_check_zero_pippenger_v2> >("ZC out, BPP, 256", 8)); + //runners.emplace_front(std::make_unique< pme_runner_t<256, bpp_crypto_trait_ZC_out, mes_msm_and_check_zero_pippenger_v2> >("ZC out, BPP, 256", 9)); + + + runners.emplace_front(std::make_unique< pme_runner_t<128, bpp_crypto_trait_Zarcanum, mes_msm_and_check_zero_pippenger_v3> >("Zarcanum, BPPE, 128 +++++++++++", 7)); + runners.emplace_front(std::make_unique< pme_runner_t<256, bpp_crypto_trait_ZC_out, mes_msm_and_check_zero_pippenger_v3> >("ZC out, BPP, 256 +++++++++++", 7)); + + runners.emplace_front(std::make_unique< pme_runner_t<128, bpp_crypto_trait_Zarcanum, mes_msm_and_check_zero_pippenger_v4> >("Zarcanum, BPPE, 128 ###########", 7)); + runners.emplace_front(std::make_unique< pme_runner_t<256, bpp_crypto_trait_ZC_out, mes_msm_and_check_zero_pippenger_v4> >("ZC out, BPP, 256 ###########", 7)); + + //runners.emplace_front(std::make_unique< pme_runner_t<128, bpp_crypto_trait_Zarcanum, mes_msm_and_check_zero_pippenger_v1> >("Zarcanum, BPPE, 128", 7)); + //runners.emplace_front(std::make_unique< pme_runner_t<256, bpp_crypto_trait_ZC_out, mes_msm_and_check_zero_pippenger_v1> >("ZC out, BPP, 256", 7)); + + + std::cout << "warm up..." << ENDL; + size_t runs_count = 30; + for(size_t k = 0; k < runs_count; ++k) + { + for(auto& runner : runners) + ASSERT_TRUE(runner->iteration(true)); + } + + runs_count = 200; + for(size_t k = 0; k < runs_count; ++k) + { + for(auto& runner : runners) + ASSERT_TRUE(runner->iteration(false)); + + size_t done_percent = 100 * k / runs_count; + if (100 * (k + 1) / runs_count > done_percent && done_percent % 5 == 0) + std::cout << done_percent << " %" << ENDL; + } + + return true; +} + + + +template +bool perf_generators_runner(const T& generator, const char* title) +{ + const size_t warmup_rounds = 20; + const size_t rounds = 500; + const size_t inner_rounds = 128; + uint64_t h = 0; + std::vector timings; + + size_t N = 1024; + scalar_vec_t scalars; + scalars.resize_and_make_random(N); + std::vector points(N); + + for(size_t i = 0; i < warmup_rounds; ++i) + for(size_t j = 0; j < inner_rounds; ++j) + points[(i + j) % N] = scalars[(i + j) % N] * generator; + + h = hash_64(points.data(), points.size() * sizeof(point_t)); + + for(size_t i = 0; i < rounds; ++i) + { + TIME_MEASURE_START(t); + for(size_t j = 0; j < inner_rounds; ++j) + points[(i + j) % N] = scalars[(i + j) % N] * generator; + TIME_MEASURE_FINISH(t); + timings.push_back(t); + } + + h ^= hash_64(points.data(), points.size() * sizeof(point_t)); + + std::cout << std::left << std::setw(20) << title << " : " << std::setw(5) << std::fixed << std::setprecision(1) << (double)epee::misc_utils::median(timings) / inner_rounds << " mcs, hash = " << h << ENDL; + + return true; +} + +TEST(perf, generators) +{ +#define TEST_GENERATOR(G) ASSERT_TRUE(perf_generators_runner(G, #G)) + + TEST_GENERATOR(c_point_0); + TEST_GENERATOR(c_point_G); + TEST_GENERATOR(c_point_H); + TEST_GENERATOR(c_point_H2); + TEST_GENERATOR(c_point_U); + TEST_GENERATOR(c_point_X); + TEST_GENERATOR(c_point_H_plus_G); + TEST_GENERATOR(c_point_H_minus_G); + return true; }