From: Dmitry Eremin-Solenikov dbaryshkov@gmail.com
Use jacobian/harmonized representation in ecc_point structure.
This is an RFC patch for now, j_to_a/eh_to_a are not modified to produce y coordinate only, more tests are necessary most probably.
Signed-off-by: Dmitry Eremin-Solenikov dbaryshkov@gmail.com --- ecc-a-to-j.c | 12 +++++++---- ecc-ecdsa-sign.c | 2 +- ecc-ecdsa-verify.c | 4 ++-- ecc-eh-to-a.c | 17 ++++++++------- ecc-internal.h | 20 ++++++++++------- ecc-j-to-a.c | 15 +++++++------ ecc-mul-a-eh.c | 13 +++++------ ecc-mul-a.c | 18 +++++++--------- ecc-point-mul-g.c | 8 +++---- ecc-point-mul.c | 2 +- ecc-point.c | 36 +++++++++++++++++++++++++------ ecdsa-keygen.c | 7 +++--- eddsa-compress.c | 2 +- eddsa-decompress.c | 1 + eddsa-verify.c | 2 +- testsuite/ecc-add-test.c | 5 ++++- testsuite/ecc-dup-test.c | 10 ++++----- testsuite/ecc-mul-a-test.c | 22 ++++++++++++------- testsuite/ecc-mul-g-test.c | 4 ++-- testsuite/ecdsa-keygen-test.c | 38 ++++++++++++++++++++++----------- testsuite/eddsa-compress-test.c | 8 +++++-- testsuite/eddsa-verify-test.c | 2 +- testsuite/testutils.c | 2 +- 23 files changed, 152 insertions(+), 98 deletions(-)
diff --git a/ecc-a-to-j.c b/ecc-a-to-j.c index 9fb0d2b80c41..895502e0fe20 100644 --- a/ecc-a-to-j.c +++ b/ecc-a-to-j.c @@ -40,11 +40,12 @@
void ecc_a_to_j (const struct ecc_curve *ecc, - mp_limb_t *r, const mp_limb_t *p) + mp_limb_t *r, const mpz_t x, const mpz_t y) { if (ecc->use_redc) { - mpn_copyd (r + ecc->p.size, p, 2*ecc->p.size); + mpz_limbs_copy (r + ecc->p.size, x, ecc->p.size); + mpz_limbs_copy (r + 2 * ecc->p.size, y, ecc->p.size);
mpn_zero (r, ecc->p.size); ecc->p.mod (&ecc->p, r); @@ -52,8 +53,11 @@ ecc_a_to_j (const struct ecc_curve *ecc, mpn_zero (r + ecc->p.size, ecc->p.size); ecc->p.mod (&ecc->p, r + ecc->p.size); } - else if (r != p) - mpn_copyi (r, p, 2*ecc->p.size); + else + { + mpz_limbs_copy (r, x, ecc->p.size); + mpz_limbs_copy (r + ecc->p.size, y, ecc->p.size); + }
mpn_copyi (r + 2*ecc->p.size, ecc->unit, ecc->p.size); } diff --git a/ecc-ecdsa-sign.c b/ecc-ecdsa-sign.c index 3b9e9cc1a35d..87239b7cccb6 100644 --- a/ecc-ecdsa-sign.c +++ b/ecc-ecdsa-sign.c @@ -80,7 +80,7 @@ ecc_ecdsa_sign (const struct ecc_curve *ecc,
ecc->mul_g (ecc, P, kp, P + 3*ecc->p.size); /* x coordinate only, modulo q */ - ecc->h_to_a (ecc, 2, rp, P, P + 3*ecc->p.size); + ecc->h_to_a (ecc, 2, rp, NULL, P, P + 3*ecc->p.size);
/* Invert k, uses 4 * ecc->p.size including scratch */ ecc->q.invert (&ecc->q, kinv, kp, tp); /* NOTE: Also clobbers hp */ diff --git a/ecc-ecdsa-verify.c b/ecc-ecdsa-verify.c index d7f5b684841a..120b12965fd5 100644 --- a/ecc-ecdsa-verify.c +++ b/ecc-ecdsa-verify.c @@ -64,7 +64,7 @@ mp_size_t ecc_ecdsa_verify_itch (const struct ecc_curve *ecc) { /* Largest storage need is for the ecc->mul call. */ - return 5*ecc->p.size + ecc->mul_itch; + return 6*ecc->p.size + ecc->mul_itch; }
/* FIXME: Use faster primitives, not requiring side-channel silence. */ @@ -145,7 +145,7 @@ ecc_ecdsa_verify (const struct ecc_curve *ecc, ecc->add_hhh (ecc, P1, P1, P2, P1 + 3*ecc->p.size); } /* x coordinate only, modulo q */ - ecc->h_to_a (ecc, 2, P2, P1, P1 + 3*ecc->p.size); + ecc->h_to_a (ecc, 2, P2, NULL, P1, P1 + 3*ecc->p.size);
return (mpn_cmp (rp, P2, ecc->p.size) == 0); #undef P2 diff --git a/ecc-eh-to-a.c b/ecc-eh-to-a.c index 8173b887d59d..851dcb8d592a 100644 --- a/ecc-eh-to-a.c +++ b/ecc-eh-to-a.c @@ -43,7 +43,8 @@ void ecc_eh_to_a (const struct ecc_curve *ecc, int op, - mp_limb_t *r, const mp_limb_t *p, + mp_limb_t *x, mp_limb_t *y, + const mp_limb_t *p, mp_limb_t *scratch) { #define izp scratch @@ -60,8 +61,8 @@ ecc_eh_to_a (const struct ecc_curve *ecc, ecc->p.invert (&ecc->p, izp, zp, tp + ecc->p.size);
ecc_modp_mul (ecc, tp, xp, izp); - cy = mpn_sub_n (r, tp, ecc->p.m, ecc->p.size); - cnd_copy (cy, r, tp, ecc->p.size); + cy = mpn_sub_n (x, tp, ecc->p.m, ecc->p.size); + cnd_copy (cy, x, tp, ecc->p.size);
if (op) { @@ -75,14 +76,14 @@ ecc_eh_to_a (const struct ecc_curve *ecc, unsigned shift; assert (ecc->p.bit_size == 255); shift = ecc->q.bit_size - 1 - GMP_NUMB_BITS * (ecc->p.size - 1); - cy = mpn_submul_1 (r, ecc->q.m, ecc->p.size, - r[ecc->p.size-1] >> shift); + cy = mpn_submul_1 (x, ecc->q.m, ecc->p.size, + x[ecc->p.size-1] >> shift); assert (cy < 2); - cnd_add_n (cy, r, ecc->q.m, ecc->p.size); + cnd_add_n (cy, x, ecc->q.m, ecc->p.size); } return; } ecc_modp_mul (ecc, tp, yp, izp); - cy = mpn_sub_n (r + ecc->p.size, tp, ecc->p.m, ecc->p.size); - cnd_copy (cy, r + ecc->p.size, tp, ecc->p.size); + cy = mpn_sub_n (y, tp, ecc->p.m, ecc->p.size); + cnd_copy (cy, y, tp, ecc->p.size); } diff --git a/ecc-internal.h b/ecc-internal.h index 18c1bf7d8cee..5854a3247306 100644 --- a/ecc-internal.h +++ b/ecc-internal.h @@ -132,7 +132,8 @@ typedef void ecc_mul_func (const struct ecc_curve *ecc,
typedef void ecc_h_to_a_func (const struct ecc_curve *ecc, int flags, - mp_limb_t *r, const mp_limb_t *p, + mp_limb_t *x, mp_limb_t *y, + const mp_limb_t *p, mp_limb_t *scratch);
struct ecc_modulo @@ -278,19 +279,21 @@ ecc_hash (const struct ecc_modulo *m, mp_limb_t *hp, size_t length, const uint8_t *digest);
-/* Converts a point P in affine coordinates into a point R in jacobian +/* Converts a point x,y in affine coordinates into a point R in jacobian coordinates. */ void ecc_a_to_j (const struct ecc_curve *ecc, - mp_limb_t *r, const mp_limb_t *p); + mp_limb_t *r, const mpz_t x, const mpz_t y);
/* Converts a point P in jacobian coordinates into a point R in affine coordinates. If op == 1, produce x coordinate only. If op == 2, - produce the x coordinate only, and also reduce it modulo q. */ + produce the x coordinate only, and also reduce it modulo q. If op == 3 + produce the y coordinate only. */ void ecc_j_to_a (const struct ecc_curve *ecc, int op, - mp_limb_t *r, const mp_limb_t *p, + mp_limb_t *x, mp_limb_t *y, + const mp_limb_t *p, mp_limb_t *scratch);
/* Converts a point P in homogeneous coordinates on an Edwards curve @@ -299,7 +302,8 @@ ecc_j_to_a (const struct ecc_curve *ecc, void ecc_eh_to_a (const struct ecc_curve *ecc, int op, - mp_limb_t *r, const mp_limb_t *p, + mp_limb_t *x, mp_limb_t *y, + const mp_limb_t *p, mp_limb_t *scratch);
/* Group operations */ @@ -404,13 +408,13 @@ curve25519_eh_to_x (mp_limb_t *xp, const mp_limb_t *p, #define ECC_MUL_G_ITCH(size) (9*(size)) #define ECC_MUL_G_EH_ITCH(size) (9*(size)) #if ECC_MUL_A_WBITS == 0 -#define ECC_MUL_A_ITCH(size) (12*(size)) +#define ECC_MUL_A_ITCH(size) (3*(size) + ECC_ADD_JJJ_ITCH(size)) #else #define ECC_MUL_A_ITCH(size) \ (((3 << ECC_MUL_A_WBITS) + 11) * (size)) #endif #if ECC_MUL_A_EH_WBITS == 0 -#define ECC_MUL_A_EH_ITCH(size) (13*(size)) +#define ECC_MUL_A_EH_ITCH(size) (3*(size) + ECC_ADD_EHH_ITCH(size)) #else #define ECC_MUL_A_EH_ITCH(size) \ (((3 << ECC_MUL_A_EH_WBITS) + 10) * (size)) diff --git a/ecc-j-to-a.c b/ecc-j-to-a.c index eca10f0fac9e..1ce2e43a5b31 100644 --- a/ecc-j-to-a.c +++ b/ecc-j-to-a.c @@ -41,7 +41,8 @@ void ecc_j_to_a (const struct ecc_curve *ecc, int op, - mp_limb_t *r, const mp_limb_t *p, + mp_limb_t *x, mp_limb_t *y, + const mp_limb_t *p, mp_limb_t *scratch) { #define izp scratch @@ -89,8 +90,8 @@ ecc_j_to_a (const struct ecc_curve *ecc, ecc_modp_mul (ecc, iz3p, iz2p, p); /* ecc_modp (and ecc_modp_mul) may return a value up to 2p - 1, so do a conditional subtraction. */ - cy = mpn_sub_n (r, iz3p, ecc->p.m, ecc->p.size); - cnd_copy (cy, r, iz3p, ecc->p.size); + cy = mpn_sub_n (x, iz3p, ecc->p.m, ecc->p.size); + cnd_copy (cy, x, iz3p, ecc->p.size);
if (op) { @@ -100,16 +101,16 @@ ecc_j_to_a (const struct ecc_curve *ecc, /* Also reduce the x coordinate mod ecc->q. It should already be < 2*ecc->q, so one subtraction should suffice. */ - cy = mpn_sub_n (scratch, r, ecc->q.m, ecc->p.size); - cnd_copy (cy == 0, r, scratch, ecc->p.size); + cy = mpn_sub_n (scratch, x, ecc->q.m, ecc->p.size); + cnd_copy (cy == 0, x, scratch, ecc->p.size); } return; } ecc_modp_mul (ecc, iz3p, iz2p, izp); ecc_modp_mul (ecc, tp, iz3p, p + ecc->p.size); /* And a similar subtraction. */ - cy = mpn_sub_n (r + ecc->p.size, tp, ecc->p.m, ecc->p.size); - cnd_copy (cy, r + ecc->p.size, tp, ecc->p.size); + cy = mpn_sub_n (y, tp, ecc->p.m, ecc->p.size); + cnd_copy (cy, y, tp, ecc->p.size);
#undef izp #undef up diff --git a/ecc-mul-a-eh.c b/ecc-mul-a-eh.c index e9b22cd4c1e7..73657c87d58e 100644 --- a/ecc-mul-a-eh.c +++ b/ecc-mul-a-eh.c @@ -38,7 +38,7 @@ #include "ecc.h" #include "ecc-internal.h"
-/* Binary algorithm needs 6*ecc->p.size + scratch for ecc_add_ehh, +/* Binary algorithm needs 3*ecc->p.size + scratch for ecc_add_ehh, total 13 ecc->p.size
Window algorithm needs (3<<w) * ecc->p.size for the table, @@ -52,14 +52,11 @@ ecc_mul_a_eh (const struct ecc_curve *ecc, const mp_limb_t *np, const mp_limb_t *p, mp_limb_t *scratch) { -#define pe scratch -#define tp (scratch + 3*ecc->p.size) -#define scratch_out (scratch + 6*ecc->p.size) +#define tp (scratch) +#define scratch_out (scratch + 3*ecc->p.size)
unsigned i;
- ecc_a_to_j (ecc, pe, p); - /* x = 0, y = 1, z = 1 */ mpn_zero (r, 3*ecc->p.size); r[ecc->p.size] = r[2*ecc->p.size] = 1; @@ -76,7 +73,7 @@ ecc_mul_a_eh (const struct ecc_curve *ecc, int digit;
ecc->dup (ecc, r, r, scratch_out); - ecc->add_hhh (ecc, tp, r, pe, scratch_out); + ecc->add_hhh (ecc, tp, r, p, scratch_out);
digit = (w & bit) > 0; /* If we had a one-bit, use the sum. */ @@ -103,7 +100,7 @@ table_init (const struct ecc_curve *ecc, mpn_zero (TABLE(0), 3*ecc->p.size); TABLE(0)[ecc->p.size] = TABLE(0)[2*ecc->p.size] = 1;
- ecc_a_to_j (ecc, TABLE(1), p); + mpn_copyi (TABLE(1), p, 3*ecc->p.size);
for (j = 2; j < size; j += 2) { diff --git a/ecc-mul-a.c b/ecc-mul-a.c index cb9c7d418633..6f0c19bcd004 100644 --- a/ecc-mul-a.c +++ b/ecc-mul-a.c @@ -40,8 +40,8 @@ #include "ecc.h" #include "ecc-internal.h"
-/* Binary algorithm needs 6*ecc->p.size + scratch for ecc_add_jja. - Current total is 12 ecc->p.size, at most 864 bytes. +/* Binary algorithm needs 3*ecc->p.size + scratch for ecc_add_jjj. + Current total is 11 ecc->p.size, at most 792 bytes.
Window algorithm needs (3<<w) * ecc->p.size for the table, 3*ecc->p.size for a temporary point, and scratch for @@ -55,14 +55,12 @@ ecc_mul_a (const struct ecc_curve *ecc, mp_limb_t *scratch) { #define tp scratch -#define pj (scratch + 3*ecc->p.size) -#define scratch_out (scratch + 6*ecc->p.size) +#define scratch_out (scratch + 3*ecc->p.size)
int is_zero;
unsigned i;
- ecc_a_to_j (ecc, pj, p); mpn_zero (r, 3*ecc->p.size);
for (i = ecc->p.size, is_zero = 1; i-- > 0; ) @@ -77,12 +75,12 @@ ecc_mul_a (const struct ecc_curve *ecc, int digit;
ecc_dup_jj (ecc, r, r, scratch_out); - ecc_add_jja (ecc, tp, r, pj, scratch_out); + ecc_add_jjj (ecc, tp, r, p, scratch_out);
digit = (w & bit) > 0; /* If is_zero is set, r is the zero point, - and ecc_add_jja produced garbage. */ - cnd_copy (is_zero, tp, pj, 3*ecc->p.size); + and ecc_add_jjj produced garbage. */ + cnd_copy (is_zero, tp, p, 3*ecc->p.size); is_zero &= ~digit; /* If we had a one-bit, use the sum. */ cnd_copy (digit, r, tp, 3*ecc->p.size); @@ -106,12 +104,12 @@ table_init (const struct ecc_curve *ecc, unsigned j;
mpn_zero (TABLE(0), 3*ecc->p.size); - ecc_a_to_j (ecc, TABLE(1), p); + mpn_copyi (TABLE(1), p, 3*ecc->p.size);
for (j = 2; j < size; j += 2) { ecc_dup_jj (ecc, TABLE(j), TABLE(j/2), scratch); - ecc_add_jja (ecc, TABLE(j+1), TABLE(j), TABLE(1), scratch); + ecc_add_jjj (ecc, TABLE(j+1), TABLE(j), TABLE(1), scratch); } }
diff --git a/ecc-point-mul-g.c b/ecc-point-mul-g.c index 46fceb81ea45..e38f161bcdff 100644 --- a/ecc-point-mul-g.c +++ b/ecc-point-mul-g.c @@ -44,15 +44,13 @@ void ecc_point_mul_g (struct ecc_point *r, const struct ecc_scalar *n) { - TMP_DECL(scratch, mp_limb_t, 3*ECC_MAX_SIZE + ECC_MUL_G_ITCH (ECC_MAX_SIZE)); + TMP_DECL(scratch, mp_limb_t, ECC_MUL_G_ITCH (ECC_MAX_SIZE)); const struct ecc_curve *ecc = r->ecc; - mp_limb_t size = ecc->p.size; - mp_size_t itch = 3*size + ecc->mul_g_itch; + mp_size_t itch = ecc->mul_g_itch;
assert (n->ecc == ecc);
TMP_ALLOC (scratch, itch);
- ecc->mul_g (ecc, scratch, n->p, scratch + 3*size); - ecc->h_to_a (ecc, 0, r->p, scratch, scratch + 3*size); + ecc->mul_g (ecc, r->p, n->p, scratch); } diff --git a/ecc-point-mul.c b/ecc-point-mul.c index 2be1c5c41d3d..97755ea9b8ec 100644 --- a/ecc-point-mul.c +++ b/ecc-point-mul.c @@ -53,6 +53,6 @@ ecc_point_mul (struct ecc_point *r, const struct ecc_scalar *n, assert (p->ecc == ecc);
ecc->mul (ecc, scratch, n->p, p->p, scratch + 3*size); - ecc->h_to_a (ecc, 0, r->p, scratch, scratch + 3*size); + mpn_copyi (r->p, scratch, 3*size); gmp_free_limbs (scratch, itch); } diff --git a/ecc-point.c b/ecc-point.c index 31e3115abbbc..e4feb129927b 100644 --- a/ecc-point.c +++ b/ecc-point.c @@ -42,13 +42,13 @@ void ecc_point_init (struct ecc_point *p, const struct ecc_curve *ecc) { p->ecc = ecc; - p->p = gmp_alloc_limbs (2*ecc->p.size); + p->p = gmp_alloc_limbs (3*ecc->p.size); }
void ecc_point_clear (struct ecc_point *p) { - gmp_free_limbs (p->p, 2*p->ecc->p.size); + gmp_free_limbs (p->p, 3*p->ecc->p.size); }
int @@ -102,8 +102,7 @@ ecc_point_set (struct ecc_point *p, const mpz_t x, const mpz_t y) if (!res) return 0;
- mpz_limbs_copy (p->p, x, size); - mpz_limbs_copy (p->p + size, y, size); + ecc_a_to_j (p->ecc, p->p, x, y);
return 1; } @@ -112,8 +111,33 @@ void ecc_point_get (const struct ecc_point *p, mpz_t x, mpz_t y) { mp_size_t size = p->ecc->p.size; + mp_limb_t *xp = NULL, *yp = NULL; + mp_limb_t *scratch = gmp_alloc_limbs(p->ecc->h_to_a_itch); + int op; + + if (x) + { + if (y) + op = 0; + else + op = 1; + } + else + { + if (y) + op = 3; + else + return; + } + if (x) + xp = mpz_limbs_write (x, size); + if (y) + yp = mpz_limbs_write (y, size); + p->ecc->h_to_a (p->ecc, op, xp, yp, p->p, scratch); + gmp_free_limbs (scratch, p->ecc->h_to_a_itch); + if (x) - mpz_set_n (x, p->p, size); + mpz_limbs_finish (x, size); if (y) - mpz_set_n (y, p->p + size, size); + mpz_limbs_finish (y, size); } diff --git a/ecdsa-keygen.c b/ecdsa-keygen.c index fa559a9e3b43..3cf0bd3c485a 100644 --- a/ecdsa-keygen.c +++ b/ecdsa-keygen.c @@ -47,15 +47,14 @@ ecdsa_generate_keypair (struct ecc_point *pub, struct ecc_scalar *key, void *random_ctx, nettle_random_func *random) { - TMP_DECL(p, mp_limb_t, 3*ECC_MAX_SIZE + ECC_MUL_G_ITCH (ECC_MAX_SIZE)); + TMP_DECL(p, mp_limb_t, ECC_MUL_G_ITCH (ECC_MAX_SIZE)); const struct ecc_curve *ecc = pub->ecc; - mp_size_t itch = 3*ecc->p.size + ecc->mul_g_itch; + mp_size_t itch = ecc->mul_g_itch;
assert (key->ecc == ecc);
TMP_ALLOC (p, itch);
ecc_mod_random (&ecc->q, key->p, random_ctx, random, p); - ecc->mul_g (ecc, p, key->p, p + 3*ecc->p.size); - ecc->h_to_a (ecc, 0, pub->p, p, p + 3*ecc->p.size); + ecc->mul_g (ecc, pub->p, key->p, p); } diff --git a/eddsa-compress.c b/eddsa-compress.c index 547ba736dc7b..3fe2a9bdbc34 100644 --- a/eddsa-compress.c +++ b/eddsa-compress.c @@ -53,7 +53,7 @@ _eddsa_compress (const struct ecc_curve *ecc, uint8_t *r, mp_limb_t *p, #define yp (scratch + ecc->p.size) #define scratch_out (scratch + 2*ecc->p.size)
- ecc->h_to_a (ecc, 0, xp, p, scratch_out); + ecc->h_to_a (ecc, 0, xp, yp, p, scratch_out); /* Encoding is the y coordinate and an appended "sign" bit, which is the low bit of x. Bit order is not specified explicitly, but for little-endian encoding, it makes most sense to append the bit diff --git a/eddsa-decompress.c b/eddsa-decompress.c index f114b576fffe..0b3119acc890 100644 --- a/eddsa-decompress.c +++ b/eddsa-decompress.c @@ -80,5 +80,6 @@ _eddsa_decompress (const struct ecc_curve *ecc, mp_limb_t *p, sign ^= xp[0] & 1; mpn_sub_n (tp, ecc->p.m, xp, ecc->p.size); cnd_copy (sign, xp, tp, ecc->p.size); + mpn_copyi (p + 2*ecc->p.size, ecc->unit, ecc->p.size); return res; } diff --git a/eddsa-verify.c b/eddsa-verify.c index 7718a1260463..9b596c517fb9 100644 --- a/eddsa-verify.c +++ b/eddsa-verify.c @@ -97,7 +97,7 @@ _eddsa_verify (const struct ecc_curve *ecc,
/* Could maybe save some storage by delaying the R and S operations, but it makes sense to check them for validity up front. */ - if (!_eddsa_decompress (ecc, R, signature, R+2*ecc->p.size)) + if (!_eddsa_decompress (ecc, R, signature, R+3*ecc->p.size)) return 0;
mpn_set_base256_le (sp, ecc->q.size, signature + nbytes, nbytes); diff --git a/testsuite/ecc-add-test.c b/testsuite/ecc-add-test.c index ad2bd29230ee..144111695a27 100644 --- a/testsuite/ecc-add-test.c +++ b/testsuite/ecc-add-test.c @@ -5,6 +5,7 @@ void test_main (void) { unsigned i; + mpz_t x, y;
for (i = 0; ecc_curves[i]; i++) { @@ -17,7 +18,9 @@ test_main (void)
ASSERT (ecc->dup_itch <= ecc->add_hhh_itch);
- ecc_a_to_j (ecc, g, ecc->g); + ecc_a_to_j (ecc, g, + mpz_roinit_n (x, ecc->g, ecc->p.size), + mpz_roinit_n (y, ecc->g + ecc->p.size, ecc->p.size));
if (ecc->p.bit_size == 255) { diff --git a/testsuite/ecc-dup-test.c b/testsuite/ecc-dup-test.c index 0ae4444a7cb0..eede76f24487 100644 --- a/testsuite/ecc-dup-test.c +++ b/testsuite/ecc-dup-test.c @@ -3,17 +3,15 @@ void test_main (void) { + mpz_t x, y; unsigned i;
for (i = 0; ecc_curves[i]; i++) { const struct ecc_curve *ecc = ecc_curves[i]; - mp_limb_t *g = xalloc_limbs (ecc_size_j (ecc)); mp_limb_t *p = xalloc_limbs (ecc_size_j (ecc)); mp_limb_t *scratch = xalloc_limbs (ecc->dup_itch);
- ecc_a_to_j (ecc, g, ecc->g); - if (ecc->p.bit_size == 255) { mp_limb_t *z = xalloc_limbs (ecc_size_j (ecc)); @@ -32,14 +30,16 @@ test_main (void) else ASSERT (ecc->dup == ecc_dup_jj);
- ecc->dup (ecc, p, g, scratch); + ecc_a_to_j (ecc, p, + mpz_roinit_n (x, ecc->g, ecc->p.size), + mpz_roinit_n (y, ecc->g + ecc->p.size, ecc->p.size)); + ecc->dup (ecc, p, p, scratch); test_ecc_mul_h (i, 2, p);
ecc->dup (ecc, p, p, scratch); test_ecc_mul_h (i, 4, p);
free (p); - free (g); free (scratch); } } diff --git a/testsuite/ecc-mul-a-test.c b/testsuite/ecc-mul-a-test.c index 245016aafc78..af62e7d5c498 100644 --- a/testsuite/ecc-mul-a-test.c +++ b/testsuite/ecc-mul-a-test.c @@ -14,6 +14,8 @@ test_main (void) { const struct ecc_curve *ecc = ecc_curves[i]; mp_size_t size = ecc_size (ecc); + mpz_t x, y; + mp_limb_t *g = xalloc_limbs (ecc_size_j (ecc)); mp_limb_t *p = xalloc_limbs (ecc_size_j (ecc)); mp_limb_t *q = xalloc_limbs (ecc_size_j (ecc)); mp_limb_t *n = xalloc_limbs (size); @@ -22,23 +24,26 @@ test_main (void)
mpn_zero (n, size);
+ ecc_a_to_j (ecc, g, + mpz_roinit_n (x, ecc->g, size), + mpz_roinit_n (y, ecc->g + size, size)); n[0] = 1; - ecc->mul (ecc, p, n, ecc->g, scratch); - ecc->h_to_a (ecc, 0, p, p, scratch); + ecc->mul (ecc, p, n, g, scratch); + ecc->h_to_a (ecc, 0, p, p + size, p, scratch);
if (mpn_cmp (p, ecc->g, 2*size) != 0) die ("curve %d: ecc->mul with n = 1 failed.\n", ecc->p.bit_size);
for (n[0] = 2; n[0] <= 4; n[0]++) { - ecc->mul (ecc, p, n, ecc->g, scratch); + ecc->mul (ecc, p, n, g, scratch); test_ecc_mul_h (i, n[0], p); }
/* (order - 1) * g = - g */ mpn_sub_1 (n, ecc->q.m, size, 1); - ecc->mul (ecc, p, n, ecc->g, scratch); - ecc->h_to_a (ecc, 0, p, p, scratch); + ecc->mul (ecc, p, n, g, scratch); + ecc->h_to_a (ecc, 0, p, p + size, p, scratch); if (ecc->p.bit_size == 255) /* For edwards curves, - (x,y ) == (-x, y). FIXME: Swap x and y, to get identical negation? */ @@ -64,11 +69,11 @@ test_main (void) mpz_limbs_copy (n, r, size); n[size - 1] %= ecc->q.m[size - 1];
- ecc->mul (ecc, p, n, ecc->g, scratch); - ecc->h_to_a (ecc, 0, p, p, scratch); + ecc->mul (ecc, p, n, g, scratch); + ecc->h_to_a (ecc, 0, p, p + size, p, scratch);
ecc->mul_g (ecc, q, n, scratch); - ecc->h_to_a (ecc, 0, q, q, scratch); + ecc->h_to_a (ecc, 0, q, q + size, q, scratch);
if (mpn_cmp (p, q, 2*size)) { @@ -92,6 +97,7 @@ test_main (void) abort (); } } + free (g); free (n); free (p); free (q); diff --git a/testsuite/ecc-mul-g-test.c b/testsuite/ecc-mul-g-test.c index 272394847f3a..c6b7374e82d8 100644 --- a/testsuite/ecc-mul-g-test.c +++ b/testsuite/ecc-mul-g-test.c @@ -23,7 +23,7 @@ test_main (void)
n[0] = 1; ecc->mul_g (ecc, p, n, scratch); - ecc->h_to_a (ecc, 0, p, p, scratch); + ecc->h_to_a (ecc, 0, p, p + size, p, scratch);
if (mpn_cmp (p, ecc->g, 2*size) != 0) { @@ -40,7 +40,7 @@ test_main (void) /* (order - 1) * g = - g */ mpn_sub_1 (n, ecc->q.m, size, 1); ecc->mul_g (ecc, p, n, scratch); - ecc->h_to_a (ecc, 0, p, p, scratch); + ecc->h_to_a (ecc, 0, p, p + size, p, scratch); if (ecc->p.bit_size == 255) /* For edwards curves, - (x,y ) == (-x, y). FIXME: Swap x and y, to get identical negation? */ diff --git a/testsuite/ecdsa-keygen-test.c b/testsuite/ecdsa-keygen-test.c index a96c09effeef..21b43623a8a2 100644 --- a/testsuite/ecdsa-keygen-test.c +++ b/testsuite/ecdsa-keygen-test.c @@ -12,17 +12,32 @@ ecc_valid_p (struct ecc_point *pub)
size = pub->ecc->p.size;
- /* First check range */ - if (mpn_cmp (pub->p, pub->ecc->p.m, size) >= 0 - || mpn_cmp (pub->p + size, pub->ecc->p.m, size) >= 0) - return 0; + mpz_init (x); + mpz_init (y); + + ecc_point_get (pub, x, y); + + if (verbose) + { + fprintf (stderr, "Public key:\nx = "); + mpz_out_str (stderr, 16, x); + fprintf (stderr, "\ny = "); + mpz_out_str (stderr, 16, y); + fprintf (stderr, "\n"); + } + + if (mpz_sgn (x) < 0 || mpz_limbs_cmp (x, pub->ecc->p.m, size) >= 0 + || mpz_sgn (y) < 0 || mpz_limbs_cmp (y, pub->ecc->p.m, size) >= 0) + { + mpz_clear (x); + mpz_clear (y); + + return 0; + }
mpz_init (lhs); mpz_init (rhs);
- mpz_roinit_n (x, pub->p, size); - mpz_roinit_n (y, pub->p + size, size); - mpz_mul (lhs, y, y);
if (pub->ecc->p.bit_size == 255) @@ -53,6 +68,9 @@ ecc_valid_p (struct ecc_point *pub) mpz_clear (lhs); mpz_clear (rhs);
+ mpz_clear (x); + mpz_clear (y); + return res; }
@@ -90,11 +108,7 @@ test_main (void)
if (verbose) { - fprintf (stderr, "Public key:\nx = "); - write_mpn (stderr, 16, pub.p, ecc->p.size); - fprintf (stderr, "\ny = "); - write_mpn (stderr, 16, pub.p + ecc->p.size, ecc->p.size); - fprintf (stderr, "\nPrivate key: "); + fprintf (stderr, "Private key: "); write_mpn (stderr, 16, key.p, ecc->p.size); fprintf (stderr, "\n"); } diff --git a/testsuite/eddsa-compress-test.c b/testsuite/eddsa-compress-test.c index f95da870967e..812a085c9040 100644 --- a/testsuite/eddsa-compress-test.c +++ b/testsuite/eddsa-compress-test.c @@ -44,6 +44,7 @@ void test_main (void) mpz_t zp, t; mp_limb_t *s; mp_limb_t *p; + mp_limb_t *p2; mp_limb_t *pa1; mp_limb_t *pa2; mp_limb_t *scratch; @@ -61,6 +62,7 @@ void test_main (void) mpz_init (t); s = xalloc_limbs (size); p = xalloc_limbs (ecc_size_j (ecc)); + p2 = xalloc_limbs (ecc_size_j (ecc)); pa1 = xalloc_limbs (ecc_size_a (ecc)); pa2 = xalloc_limbs (ecc_size_a (ecc)); c = xalloc (clen); @@ -79,8 +81,9 @@ void test_main (void) mpz_limbs_copy (s, t, ecc->q.size); ecc->mul_g (ecc, p, s, scratch); _eddsa_compress (ecc, c, p, scratch); - ecc->h_to_a (ecc, 0, pa1, p, scratch); - _eddsa_decompress (ecc, pa2, c, scratch); + ecc->h_to_a (ecc, 0, pa1, pa1 + size, p, scratch); + _eddsa_decompress (ecc, p2, c, scratch); + ecc->h_to_a (ecc, 0, pa2, pa2 + size, p2, scratch); mpz_roinit_n (x1, pa1, size); mpz_roinit_n (y1, pa1 + size, size); mpz_roinit_n (x2, pa2, size); @@ -105,6 +108,7 @@ void test_main (void) mpz_clear (t); free (s); free (p); + free (p2); free (c); free (pa1); free (pa2); diff --git a/testsuite/eddsa-verify-test.c b/testsuite/eddsa-verify-test.c index 770080591b13..d58095b900ed 100644 --- a/testsuite/eddsa-verify-test.c +++ b/testsuite/eddsa-verify-test.c @@ -41,7 +41,7 @@ test_eddsa (const struct ecc_curve *ecc, const struct tstring *msg, const uint8_t *signature) { - mp_limb_t *A = xalloc_limbs (ecc_size_a (ecc)); + mp_limb_t *A = xalloc_limbs (ecc_size_j (ecc)); mp_limb_t *scratch = xalloc_limbs (_eddsa_verify_itch (ecc)); size_t nbytes = 1 + ecc->p.bit_size / 8; uint8_t *cmsg = xalloc (msg->length); diff --git a/testsuite/testutils.c b/testsuite/testutils.c index c9f21bab2346..e999044614a2 100644 --- a/testsuite/testutils.c +++ b/testsuite/testutils.c @@ -1838,7 +1838,7 @@ test_ecc_mul_h (unsigned curve, unsigned n, const mp_limb_t *p) const struct ecc_curve *ecc = ecc_curves[curve]; mp_limb_t *np = xalloc_limbs (ecc_size_a (ecc)); mp_limb_t *scratch = xalloc_limbs (ecc->h_to_a_itch); - ecc->h_to_a (ecc, 0, np, p, scratch); + ecc->h_to_a (ecc, 0, np, np + ecc->p.size, p, scratch);
test_ecc_mul_a (curve, n, np);
dbaryshkov@gmail.com writes:
From: Dmitry Eremin-Solenikov dbaryshkov@gmail.com
Use jacobian/harmonized representation in ecc_point structure.
Can you explain what benefit you see from this?
E.g., ecc_point_mul takes a const struct ecc_point as input, and calls ecc->mul, which for the standard weierstrass curves is ecc_mul_a. This one is a loop including a call to ecc_add_jja, which assumes that the z coordinate is one.
(ecc_mul_a also calls ecc_a_to_j, which appears to write a z = 1 coordinate, in montgomery representation if appropriate. Even though the z coordinate is not used in this case. Maybe no callers of ecc_a_to_j needs the z coordiante? Needs some investigation. Reading the code a few years after it was written I wish I had been more thorough with documenting details input and output assumptions).
Doing ecc_point_mul starting with a point represented as (X, Y, Z) where Z is arbitrary will be slower, since it needs to take one more coordinate into account in the point addition.
It could be useful to work exclusively with jacobian or homogeneous coordinates if you want to make a series of operations on points, and only convert back to affine at the very end. But I'd like to see a clear use case before trying to optimize for that.
diff --git a/ecc-a-to-j.c b/ecc-a-to-j.c index 9fb0d2b80c41..895502e0fe20 100644 --- a/ecc-a-to-j.c +++ b/ecc-a-to-j.c @@ -40,11 +40,12 @@
void ecc_a_to_j (const struct ecc_curve *ecc,
mp_limb_t *r, const mp_limb_t *p)
mp_limb_t *r, const mpz_t x, const mpz_t y)
{ if (ecc->use_redc) {
mpn_copyd (r + ecc->p.size, p, 2*ecc->p.size);
mpz_limbs_copy (r + ecc->p.size, x, ecc->p.size);
mpz_limbs_copy (r + 2 * ecc->p.size, y, ecc->p.size);
I don't think we should use the mpz_t type in internal ecc functions. mpz_limbs_copy is not side-channnel silent, since organization of storage inside mpz_t is different depending on the leading bits.
Regards, /Niels
nisse@lysator.liu.se (Niels Möller) writes:
E.g., ecc_point_mul takes a const struct ecc_point as input, and calls ecc->mul, which for the standard weierstrass curves is ecc_mul_a. This one is a loop including a call to ecc_add_jja, which assumes that the z coordinate is one.
Sorry, looked at the wrong implementation of ecc_mul_a. ecc_add_jja is used in the *main loop* only if
ECC_MUL_A_WBITS == 0
However, when ECC_MUL_A_WBITS > 0 (currently set to 4), ecc_add_jja is still used in the table_init function.
So being able to require that z == 1 is still beneficial.
Regards, /Niels
Hello,
чт, 5 дек. 2019 г. в 00:18, Niels Möller nisse@lysator.liu.se:
dbaryshkov@gmail.com writes:
From: Dmitry Eremin-Solenikov dbaryshkov@gmail.com
Use jacobian/harmonized representation in ecc_point structure.
Can you explain what benefit you see from this?
Well, I've had two particular GOST curves in mind. They are defined in Weierstrass form, but have birationally equal Edwards curves that can be used for point addition. Converting to/from Edwards form during each operation doesn't look like a good solution.
E.g., ecc_point_mul takes a const struct ecc_point as input, and calls ecc->mul, which for the standard weierstrass curves is ecc_mul_a. This one is a loop including a call to ecc_add_jja, which assumes that the z coordinate is one.
(ecc_mul_a also calls ecc_a_to_j, which appears to write a z = 1 coordinate, in montgomery representation if appropriate. Even though the z coordinate is not used in this case. Maybe no callers of ecc_a_to_j needs the z coordiante? Needs some investigation. Reading the code a few years after it was written I wish I had been more thorough with documenting details input and output assumptions).
Doing ecc_point_mul starting with a point represented as (X, Y, Z) where Z is arbitrary will be slower, since it needs to take one more coordinate into account in the point addition.
It could be useful to work exclusively with jacobian or homogeneous coordinates if you want to make a series of operations on points, and only convert back to affine at the very end. But I'd like to see a clear use case before trying to optimize for that.
diff --git a/ecc-a-to-j.c b/ecc-a-to-j.c index 9fb0d2b80c41..895502e0fe20 100644 --- a/ecc-a-to-j.c +++ b/ecc-a-to-j.c @@ -40,11 +40,12 @@
void ecc_a_to_j (const struct ecc_curve *ecc,
mp_limb_t *r, const mp_limb_t *p)
mp_limb_t *r, const mpz_t x, const mpz_t y)
{ if (ecc->use_redc) {
mpn_copyd (r + ecc->p.size, p, 2*ecc->p.size);
mpz_limbs_copy (r + ecc->p.size, x, ecc->p.size);
mpz_limbs_copy (r + 2 * ecc->p.size, y, ecc->p.size);
I don't think we should use the mpz_t type in internal ecc functions. mpz_limbs_copy is not side-channnel silent, since organization of storage inside mpz_t is different depending on the leading bits.
Hmm, I don't see how mpz_t internal representation can be a threat. Also this function becomes less internal now, as it is called only from ecc_point_set().
If you wish, I can change a_to_j to copy available limbs and zero-fill rest of limbs instead of using mpz_t/mpz_limbs_copy().
Dmitry Eremin-Solenikov dbaryshkov@gmail.com writes:
Well, I've had two particular GOST curves in mind. They are defined in Weierstrass form, but have birationally equal Edwards curves that can be used for point addition.
And to do that conversion without an expensive modular inversion, you get a Z != 1?
It might make sense to to more work in ecc_point_set, but I'd prefer to not introduce a Z cooordinate for the current weierstrass curves.
Converting to/from Edwards form during each operation doesn't look like a good solution.
And what are those operations? ecdsa_verify? I agree it's not ideal to have that function do the coordinate conversion every time. On the other hand, conversion may be very cheap compared to the two scalar multiplications done by ecdsa_verify. I think I'd prefer to postpone that optimization.
(For your curves, you may need a slightly different ecc_mul function to support Z != 1, same loop but different table_init).
Hmm, I don't see how mpz_t internal representation can be a threat.
In this case, maybe not (in particular if it's for ecdsa_verify, with no secret inputs). But in general, practically any mpz_t operation on a number of nominally n bits will leak information on whether or not most significant bits are all zero.
That's why I prefer to use mpz_t only in the public api (and even there, it might be preferable to use octet strings of predetermined length, just like for the curve25519 operations). Depending on applications' needs.
Regards, /Niels
Hello,
чт, 5 дек. 2019 г., 8:15 Niels Möller nisse@lysator.liu.se:
Dmitry Eremin-Solenikov dbaryshkov@gmail.com writes:
Well, I've had two particular GOST curves in mind. They are defined in Weierstrass form, but have birationally equal Edwards curves that can be used for point addition.
And to do that conversion without an expensive modular inversion, you get a Z != 1?
This conversion will take place at each a_to_h call. More importantly the conversion at h_to_a will include modular inversion. So I wanted to lower the amount of such conversions.
It might make sense to to more work in ecc_point_set, but I'd prefer to not introduce a Z cooordinate for the current weierstrass curves.
Would it be ok to change ecc_point size to become a per curve option?
Converting to/from Edwards form during each operation doesn't look like a good solution.
And what are those operations? ecdsa_verify? I agree it's not ideal to
A close rival to ecdsa sign/verify (see rfc 7091 and 5832).
have that function do the coordinate conversion every time. On the other
hand, conversion may be very cheap compared to the two scalar multiplications done by ecdsa_verify. I think I'd prefer to postpone that optimization.
No problem, I'll submit then the next round of patches without this one.
(For your curves, you may need a slightly different ecc_mul function to support Z != 1, same loop but different table_init).
And ecc point memory size. And adjusting tests that use low-level representation.
Hmm, I don't see how mpz_t internal representation can be a threat.
In this case, maybe not (in particular if it's for ecdsa_verify, with no secret inputs). But in general, practically any mpz_t operation on a number of nominally n bits will leak information on whether or not most significant bits are all zero.
Again, in this case it concerns only public key being converted to an internal representation.
That's why I prefer to use mpz_t only in the public api (and even there, it might be preferable to use octet strings of predetermined length, just like for the curve25519 operations). Depending on applications' needs.
Dmitry Eremin-Solenikov dbaryshkov@gmail.com writes:
Would it be ok to change ecc_point size to become a per curve option?
If needed, yes.
(I've also been considering switching to using extended X,Y,Z,T coordinates as internal representation for twisted edwards curves, but unclear if it's worth the effort. See the section on cooordinate choice on https://www.lysator.liu.se/~nisse/misc/ed25519-msp430.html for some details).
A close rival to ecdsa sign/verify (see rfc 7091 and 5832).
Ok, I'll have to look that up (but not at the moment).
Regards, /Niels
nettle-bugs@lists.lysator.liu.se