From: Dmitry Eremin-Solenikov dbaryshkov@gmail.com
Add common implementations for functions doing XOR over nettle_block16/nettle_block8.
Signed-off-by: Dmitry Eremin-Solenikov dbaryshkov@gmail.com --- Makefile.in | 3 +- block-internal.h | 93 ++++++++++++++++++++++++++++++++++++++++++++++++ cmac.c | 11 +++--- cmac64.c | 12 +++---- eax.c | 9 +---- gcm.c | 20 ++++------- siv-cmac.c | 9 ++--- 7 files changed, 120 insertions(+), 37 deletions(-) create mode 100644 block-internal.h
diff --git a/Makefile.in b/Makefile.in index af4f6e46ee9b..f6658c86341c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -230,7 +230,8 @@ DISTFILES = $(SOURCES) $(HEADERS) getopt.h getopt_int.h \ INSTALL NEWS ChangeLog \ nettle.pc.in hogweed.pc.in \ $(des_headers) descore.README desdata.stamp \ - aes-internal.h camellia-internal.h cmac-internal.h serpent-internal.h \ + aes-internal.h block-internal.h \ + camellia-internal.h cmac-internal.h serpent-internal.h \ cast128_sboxes.h desinfo.h desCode.h \ ripemd160-internal.h sha2-internal.h \ memxor-internal.h nettle-internal.h nettle-write.h \ diff --git a/block-internal.h b/block-internal.h new file mode 100644 index 000000000000..ab3a6a79b8cb --- /dev/null +++ b/block-internal.h @@ -0,0 +1,93 @@ +/* block-internal.h + + Internal implementations of nettle_blockZ-related functions. + + Copyright (C) 2011 Katholieke Universiteit Leuven + Copyright (C) 2011, 2013, 2018 Niels Möller + Copyright (C) 2018 Red Hat, Inc. + Copyright (C) 2019 Dmitry Eremin-Solenikov + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#ifndef NETTLE_BLOCK_INTERNAL_H_INCLUDED +#define NETTLE_BLOCK_INTERNAL_H_INCLUDED + +#include <assert.h> + +#include "nettle-types.h" +#include "memxor.h" + +static inline void +block16_xor (union nettle_block16 *r, + const union nettle_block16 *x) +{ + r->u64[0] ^= x->u64[0]; + r->u64[1] ^= x->u64[1]; +} + +static inline void +block16_xor3 (union nettle_block16 *r, + const union nettle_block16 *x, + const union nettle_block16 *y) +{ + r->u64[0] = x->u64[0] ^ y->u64[0]; + r->u64[1] = x->u64[1] ^ y->u64[1]; +} + +static inline void +block16_xor_bytes (union nettle_block16 *r, + const union nettle_block16 *x, + const uint8_t *bytes) +{ + memxor3 (r->b, x->b, bytes, 16); +} + +static inline void +block8_xor (union nettle_block8 *r, + const union nettle_block8 *x) +{ + r->u64 ^= x->u64; +} + +static inline void +block8_xor3 (union nettle_block8 *r, + const union nettle_block8 *x, + const union nettle_block8 *y) +{ + r->u64 = x->u64 ^ y->u64; +} + +static inline void +block8_xor_bytes (union nettle_block8 *r, + const union nettle_block8 *x, + const uint8_t *bytes) +{ + memxor3 (r->b, x->b, bytes, 8); +} + +#endif /* NETTLE_BLOCK_INTERNAL_H_INCLUDED */ diff --git a/cmac.c b/cmac.c index 70ce8132d9d1..194324421c58 100644 --- a/cmac.c +++ b/cmac.c @@ -45,6 +45,7 @@ #include "memxor.h" #include "nettle-internal.h" #include "cmac-internal.h" +#include "block-internal.h" #include "macros.h"
/* shift one and XOR with 0x87. */ @@ -119,12 +120,12 @@ cmac128_update(struct cmac128_ctx *ctx, const void *cipher, /* * now checksum everything but the last block */ - memxor3(Y.b, ctx->X.b, ctx->block.b, 16); + block16_xor3(&Y, &ctx->X, &ctx->block); encrypt(cipher, 16, ctx->X.b, Y.b);
while (msg_len > 16) { - memxor3(Y.b, ctx->X.b, msg, 16); + block16_xor_bytes (&Y, &ctx->X, msg); encrypt(cipher, 16, ctx->X.b, Y.b); msg += 16; msg_len -= 16; @@ -151,14 +152,14 @@ cmac128_digest(struct cmac128_ctx *ctx, const struct cmac128_key *key, ctx->block.b[ctx->index] = 0x80; memset(ctx->block.b + ctx->index + 1, 0, 16 - 1 - ctx->index);
- memxor(ctx->block.b, key->K2.b, 16); + block16_xor (&ctx->block, &key->K2); } else { - memxor(ctx->block.b, key->K1.b, 16); + block16_xor (&ctx->block, &key->K1); }
- memxor3(Y.b, ctx->block.b, ctx->X.b, 16); + block16_xor3 (&Y, &ctx->block, &ctx->X);
assert(length <= 16); if (length == 16) diff --git a/cmac64.c b/cmac64.c index 636635ba478b..e7bb438580d6 100644 --- a/cmac64.c +++ b/cmac64.c @@ -43,8 +43,8 @@
#include "cmac.h"
-#include "memxor.h" #include "nettle-internal.h" +#include "block-internal.h" #include "macros.h"
/* shift one and XOR with 0x87. */ @@ -119,12 +119,12 @@ cmac64_update(struct cmac64_ctx *ctx, const void *cipher, /* * now checksum everything but the last block */ - memxor3(Y.b, ctx->X.b, ctx->block.b, 8); + block8_xor3(&Y, &ctx->X, &ctx->block); encrypt(cipher, 8, ctx->X.b, Y.b);
while (msg_len > 8) { - memxor3(Y.b, ctx->X.b, msg, 8); + block8_xor_bytes(&Y, &ctx->X, msg); encrypt(cipher, 8, ctx->X.b, Y.b); msg += 8; msg_len -= 8; @@ -151,14 +151,14 @@ cmac64_digest(struct cmac64_ctx *ctx, const struct cmac64_key *key, if (ctx->index < 8) { ctx->block.b[ctx->index] = 0x80; - memxor(ctx->block.b, key->K2.b, 8); + block8_xor(&ctx->block, &key->K2); } else { - memxor(ctx->block.b, key->K1.b, 8); + block8_xor(&ctx->block, &key->K1); }
- memxor3(Y.b, ctx->block.b, ctx->X.b, 8); + block8_xor3(&Y, &ctx->block, &ctx->X);
assert(length <= 8); if (length == 8) diff --git a/eax.c b/eax.c index 4b8b5117746e..63f3ff82fe65 100644 --- a/eax.c +++ b/eax.c @@ -40,6 +40,7 @@
#include "eax.h"
+#include "block-internal.h" #include "ctr.h" #include "memxor.h"
@@ -50,14 +51,6 @@ omac_init (union nettle_block16 *state, unsigned t) state->b[EAX_BLOCK_SIZE - 1] = t; }
-/* Almost the same as gcm_gf_add */ -static void -block16_xor (union nettle_block16 *dst, const union nettle_block16 *src) -{ - dst->u64[0] ^= src->u64[0]; - dst->u64[1] ^= src->u64[1]; -} - static void omac_update (union nettle_block16 *state, const struct eax_key *key, const void *cipher, nettle_cipher_func *f, diff --git a/gcm.c b/gcm.c index 627097b24218..4a04a0a10842 100644 --- a/gcm.c +++ b/gcm.c @@ -53,16 +53,10 @@ #include "nettle-internal.h" #include "macros.h" #include "ctr-internal.h" +#include "block-internal.h"
#define GHASH_POLYNOMIAL 0xE1UL
-static void -gcm_gf_add (union nettle_block16 *r, - const union nettle_block16 *x, const union nettle_block16 *y) -{ - r->u64[0] = x->u64[0] ^ y->u64[0]; - r->u64[1] = x->u64[1] ^ y->u64[1]; -} /* Multiplication by 010...0; a big-endian shift right. If the bit shifted out is one, the defining polynomial is added to cancel it out. r == x is allowed. */ @@ -108,7 +102,7 @@ gcm_gf_mul (union nettle_block16 *x, const union nettle_block16 *y) for (j = 0; j < 8; j++, b <<= 1) { if (b & 0x80) - gcm_gf_add(&Z, &Z, &V); + block16_xor(&Z, &V); gcm_gf_shift(&V, &V); } @@ -165,9 +159,9 @@ gcm_gf_mul (union nettle_block16 *x, const union nettle_block16 *table) uint8_t b = x->b[i];
gcm_gf_shift_4(&Z); - gcm_gf_add(&Z, &Z, &table[b & 0xf]); + block16_xor(&Z, &table[b & 0xf]); gcm_gf_shift_4(&Z); - gcm_gf_add(&Z, &Z, &table[b >> 4]); + block16_xor(&Z, &table[b >> 4]); } memcpy (x->b, Z.b, sizeof(Z)); } @@ -243,10 +237,10 @@ gcm_gf_mul (union nettle_block16 *x, const union nettle_block16 *table) for (i = GCM_BLOCK_SIZE-2; i > 0; i--) { gcm_gf_shift_8(&Z); - gcm_gf_add(&Z, &Z, &table[x->b[i]]); + block16_xor(&Z, &table[x->b[i]]); } gcm_gf_shift_8(&Z); - gcm_gf_add(x, &Z, &table[x->b[0]]); + block16_xor3(x, &Z, &table[x->b[0]]); } # endif /* ! HAVE_NATIVE_gcm_hash8 */ # else /* GCM_TABLE_BITS != 8 */ @@ -286,7 +280,7 @@ gcm_set_key(struct gcm_key *key, { unsigned j; for (j = 1; j < i; j++) - gcm_gf_add(&key->h[i+j], &key->h[i],&key->h[j]); + block16_xor3(&key->h[i+j], &key->h[i],&key->h[j]); } #endif } diff --git a/siv-cmac.c b/siv-cmac.c index f498cb863f5a..42f740cddf5d 100644 --- a/siv-cmac.c +++ b/siv-cmac.c @@ -46,6 +46,7 @@ #include "memops.h" #include "cmac-internal.h" #include "nettle-internal.h" +#include "block-internal.h"
/* This is an implementation of S2V for the AEAD case where * vectors if zero, are considered as S empty components */ @@ -69,12 +70,12 @@ _siv_s2v (const struct nettle_cipher *nc, _cmac128_block_mulx (&D, &D); cmac128_update (&cmac_ctx, cmac_cipher, nc->encrypt, alength, adata); cmac128_digest (&cmac_ctx, cmac_key, cmac_cipher, nc->encrypt, 16, S.b); - memxor (D.b, S.b, 16); + block16_xor (&D, &S);
_cmac128_block_mulx (&D, &D); cmac128_update (&cmac_ctx, cmac_cipher, nc->encrypt, nlength, nonce); cmac128_digest (&cmac_ctx, cmac_key, cmac_cipher, nc->encrypt, 16, S.b); - memxor (D.b, S.b, 16); + block16_xor (&D, &S);
/* Sn */ if (plength >= 16) @@ -83,7 +84,7 @@ _siv_s2v (const struct nettle_cipher *nc,
pdata += plength - 16;
- memxor3 (T.b, pdata, D.b, 16); + block16_xor_bytes (&T, &D, pdata); } else { @@ -95,7 +96,7 @@ _siv_s2v (const struct nettle_cipher *nc, if (plength + 1 < 16) memset (&pad.b[plength + 1], 0, 16 - plength - 1);
- memxor (T.b, pad.b, 16); + block16_xor (&T, &pad); }
cmac128_update (&cmac_ctx, cmac_cipher, nc->encrypt, 16, T.b);
From: Dmitry Eremin-Solenikov dbaryshkov@gmail.com
Move Galois polynomial shifts to block-internal.h, simplifying common code. GCM is left unconverted for now, this will be fixed later.
Signed-off-by: Dmitry Eremin-Solenikov dbaryshkov@gmail.com --- Makefile.in | 2 +- block-internal.h | 72 +++++++++++++++++++++++++++++++++++++++++++++++ cmac-internal.h | 54 ----------------------------------- cmac.c | 28 ++---------------- cmac64.c | 27 ++---------------- eax.c | 18 ++---------- siv-cmac-aes128.c | 1 - siv-cmac-aes256.c | 1 - siv-cmac.c | 7 ++--- xts.c | 34 ++++------------------ 10 files changed, 87 insertions(+), 157 deletions(-) delete mode 100644 cmac-internal.h
diff --git a/Makefile.in b/Makefile.in index f6658c86341c..ae9c8a7563f9 100644 --- a/Makefile.in +++ b/Makefile.in @@ -231,7 +231,7 @@ DISTFILES = $(SOURCES) $(HEADERS) getopt.h getopt_int.h \ nettle.pc.in hogweed.pc.in \ $(des_headers) descore.README desdata.stamp \ aes-internal.h block-internal.h \ - camellia-internal.h cmac-internal.h serpent-internal.h \ + camellia-internal.h serpent-internal.h \ cast128_sboxes.h desinfo.h desCode.h \ ripemd160-internal.h sha2-internal.h \ memxor-internal.h nettle-internal.h nettle-write.h \ diff --git a/block-internal.h b/block-internal.h index ab3a6a79b8cb..8972d0ac2b5b 100644 --- a/block-internal.h +++ b/block-internal.h @@ -90,4 +90,76 @@ block8_xor_bytes (union nettle_block8 *r, memxor3 (r->b, x->b, bytes, 8); }
+/* Do a foreign-endianness shift of data */ + +#define LSHIFT_ALIEN_UINT64(x) \ + ((((x) & UINT64_C(0x7f7f7f7f7f7f7f7f)) << 1) | \ + (((x) & UINT64_C(0x8080808080808080)) >> 15)) + +/* Two typical defining polynoms */ + +#define BLOCK16_POLY (UINT64_C(0x87)) +#define BLOCK8_POLY (UINT64_C(0x1b)) + +/* Galois multiplications by 2: + * functions differ in shifting right or left, big- or little- endianness + * and by defining polynom. + * r == x is allowed. */ + +#if WORDS_BIGENDIAN +static inline void +block16_mulx_be (union nettle_block16 *dst, + const union nettle_block16 *src) +{ + uint64_t carry = src->u64[0] >> 63; + dst->u64[0] = (src->u64[0] << 1) | (src->u64[1] >> 63); + dst->u64[1] = (src->u64[1] << 1) ^ (BLOCK16_POLY & -carry); +} + +static inline void +block16_mulx_le (union nettle_block16 *dst, + const union nettle_block16 *src) +{ + uint64_t carry = (src->u64[1] & 0x80) >> 7; + dst->u64[1] = LSHIFT_ALIEN_UINT64(src->u64[1]) | ((src->u64[0] & 0x80) << 49); + dst->u64[0] = LSHIFT_ALIEN_UINT64(src->u64[0]) ^ ((BLOCK16_POLY << 56) & -carry); +} + +static inline void +block8_mulx_be (union nettle_block8 *dst, + const union nettle_block8 *src) +{ + uint64_t carry = src->u64 >> 63; + + dst->u64 = (src->u64 << 1) ^ (BLOCK8_POLY & -carry); +} +#else /* !WORDS_BIGENDIAN */ +static inline void +block16_mulx_be (union nettle_block16 *dst, + const union nettle_block16 *src) +{ + uint64_t carry = (src->u64[0] & 0x80) >> 7; + dst->u64[0] = LSHIFT_ALIEN_UINT64(src->u64[0]) | ((src->u64[1] & 0x80) << 49); + dst->u64[1] = LSHIFT_ALIEN_UINT64(src->u64[1]) ^ ((BLOCK16_POLY << 56) & -carry); +} + +static inline void +block16_mulx_le (union nettle_block16 *dst, + const union nettle_block16 *src) +{ + uint64_t carry = src->u64[1] >> 63; + dst->u64[1] = (src->u64[1] << 1) | (src->u64[0] >> 63); + dst->u64[0] = (src->u64[0] << 1) ^ (BLOCK16_POLY & -carry); +} + +static inline void +block8_mulx_be (union nettle_block8 *dst, + const union nettle_block8 *src) +{ + uint64_t carry = (src->u64 & 0x80) >> 7; + + dst->u64 = LSHIFT_ALIEN_UINT64(src->u64) ^ ((BLOCK8_POLY << 56) & -carry); +} +#endif /* !WORDS_BIGENDIAN */ + #endif /* NETTLE_BLOCK_INTERNAL_H_INCLUDED */ diff --git a/cmac-internal.h b/cmac-internal.h deleted file mode 100644 index 80db7fcc58cd..000000000000 --- a/cmac-internal.h +++ /dev/null @@ -1,54 +0,0 @@ -/* cmac-internal.h - - CMAC mode internal functions - - Copyright (C) 2017 Red Hat, Inc. - - Contributed by Nikos Mavrogiannopoulos - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. -*/ - -#ifndef NETTLE_CMAC_INTERNAL_H_INCLUDED -#define NETTLE_CMAC_INTERNAL_H_INCLUDED - -#include "cmac.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define _cmac128_block_mulx _nettle_cmac128_block_mulx - -void _cmac128_block_mulx(union nettle_block16 *out, - const union nettle_block16 *in); - -#ifdef __cplusplus -} -#endif - -#endif /* CMAC_INTERNAL_H_INCLUDED */ diff --git a/cmac.c b/cmac.c index 194324421c58..af9b3c2c0cf4 100644 --- a/cmac.c +++ b/cmac.c @@ -44,33 +44,9 @@
#include "memxor.h" #include "nettle-internal.h" -#include "cmac-internal.h" #include "block-internal.h" #include "macros.h"
-/* shift one and XOR with 0x87. */ -#if WORDS_BIGENDIAN -void -_cmac128_block_mulx(union nettle_block16 *dst, - const union nettle_block16 *src) -{ - uint64_t carry = src->u64[0] >> 63; - dst->u64[0] = (src->u64[0] << 1) | (src->u64[1] >> 63); - dst->u64[1] = (src->u64[1] << 1) ^ (0x87 & -carry); -} -#else /* !WORDS_BIGENDIAN */ -#define LE_SHIFT(x) ((((x) & 0x7f7f7f7f7f7f7f7f) << 1) | \ - (((x) & 0x8080808080808080) >> 15)) -void -_cmac128_block_mulx(union nettle_block16 *dst, - const union nettle_block16 *src) -{ - uint64_t carry = (src->u64[0] & 0x80) >> 7; - dst->u64[0] = LE_SHIFT(src->u64[0]) | ((src->u64[1] & 0x80) << 49); - dst->u64[1] = LE_SHIFT(src->u64[1]) ^ (0x8700000000000000 & -carry); -} -#endif /* !WORDS_BIGENDIAN */ - void cmac128_set_key(struct cmac128_key *key, const void *cipher, nettle_cipher_func *encrypt) @@ -81,8 +57,8 @@ cmac128_set_key(struct cmac128_key *key, const void *cipher, /* step 1 - generate subkeys k1 and k2 */ encrypt(cipher, 16, L.b, zero_block.b);
- _cmac128_block_mulx(&key->K1, &L); - _cmac128_block_mulx(&key->K2, &key->K1); + block16_mulx_be(&key->K1, &L); + block16_mulx_be(&key->K2, &key->K1); }
void diff --git a/cmac64.c b/cmac64.c index e7bb438580d6..e4be1b547c50 100644 --- a/cmac64.c +++ b/cmac64.c @@ -47,29 +47,6 @@ #include "block-internal.h" #include "macros.h"
-/* shift one and XOR with 0x87. */ -#if WORDS_BIGENDIAN -static void -_cmac64_block_mulx(union nettle_block8 *dst, - const union nettle_block8 *src) -{ - uint64_t carry = src->u64 >> 63; - - dst->u64 = (src->u64 << 1) ^ (0x1b & -carry); -} -#else /* !WORDS_BIGENDIAN */ -#define LE_SHIFT(x) ((((x) & 0x7f7f7f7f7f7f7f7f) << 1) | \ - (((x) & 0x8080808080808080) >> 15)) -static void -_cmac64_block_mulx(union nettle_block8 *dst, - const union nettle_block8 *src) -{ - uint64_t carry = (src->u64 & 0x80) >> 7; - - dst->u64 = LE_SHIFT(src->u64) ^ (0x1b00000000000000 & -carry); -} -#endif /* !WORDS_BIGENDIAN */ - void cmac64_set_key(struct cmac64_key *key, const void *cipher, nettle_cipher_func *encrypt) @@ -80,8 +57,8 @@ cmac64_set_key(struct cmac64_key *key, const void *cipher, /* step 1 - generate subkeys k1 and k2 */ encrypt(cipher, 8, L.b, zero_block.b);
- _cmac64_block_mulx(&key->K1, &L); - _cmac64_block_mulx(&key->K2, &key->K1); + block8_mulx_be(&key->K1, &L); + block8_mulx_be(&key->K2, &key->K1); }
void diff --git a/eax.c b/eax.c index 63f3ff82fe65..f0b6fac5c5b1 100644 --- a/eax.c +++ b/eax.c @@ -82,27 +82,13 @@ omac_final (union nettle_block16 *state, const struct eax_key *key, f (cipher, EAX_BLOCK_SIZE, state->b, state->b); }
-/* Allows r == a */ -static void -gf2_double (uint8_t *r, const uint8_t *a) -{ - unsigned high = - (a[0] >> 7); - unsigned i; - /* Shift left */ - for (i = 0; i < EAX_BLOCK_SIZE - 1; i++) - r[i] = (a[i] << 1) + (a[i+1] >> 7); - - /* Wrap around for x^{128} = x^7 + x^2 + x + 1 */ - r[EAX_BLOCK_SIZE - 1] = (a[EAX_BLOCK_SIZE - 1] << 1) ^ (high & 0x87); -} - void eax_set_key (struct eax_key *key, const void *cipher, nettle_cipher_func *f) { static const union nettle_block16 zero_block; f (cipher, EAX_BLOCK_SIZE, key->pad_block.b, zero_block.b); - gf2_double (key->pad_block.b, key->pad_block.b); - gf2_double (key->pad_partial.b, key->pad_block.b); + block16_mulx_be (&key->pad_block, &key->pad_block); + block16_mulx_be (&key->pad_partial, &key->pad_block); block16_xor (&key->pad_partial, &key->pad_block); }
diff --git a/siv-cmac-aes128.c b/siv-cmac-aes128.c index 82ac16e91992..fd2e23a4513e 100644 --- a/siv-cmac-aes128.c +++ b/siv-cmac-aes128.c @@ -44,7 +44,6 @@ #include "ctr.h" #include "memxor.h" #include "memops.h" -#include "cmac-internal.h"
void siv_cmac_aes128_set_key(struct siv_cmac_aes128_ctx *ctx, const uint8_t *key) diff --git a/siv-cmac-aes256.c b/siv-cmac-aes256.c index 9401bbf119c5..eda7f1c27a55 100644 --- a/siv-cmac-aes256.c +++ b/siv-cmac-aes256.c @@ -44,7 +44,6 @@ #include "ctr.h" #include "memxor.h" #include "memops.h" -#include "cmac-internal.h"
void siv_cmac_aes256_set_key(struct siv_cmac_aes256_ctx *ctx, const uint8_t *key) diff --git a/siv-cmac.c b/siv-cmac.c index 42f740cddf5d..8205c320f544 100644 --- a/siv-cmac.c +++ b/siv-cmac.c @@ -44,7 +44,6 @@ #include "ctr.h" #include "memxor.h" #include "memops.h" -#include "cmac-internal.h" #include "nettle-internal.h" #include "block-internal.h"
@@ -67,12 +66,12 @@ _siv_s2v (const struct nettle_cipher *nc, cmac128_update (&cmac_ctx, cmac_cipher, nc->encrypt, 16, const_zero.b); cmac128_digest (&cmac_ctx, cmac_key, cmac_cipher, nc->encrypt, 16, D.b);
- _cmac128_block_mulx (&D, &D); + block16_mulx_be (&D, &D); cmac128_update (&cmac_ctx, cmac_cipher, nc->encrypt, alength, adata); cmac128_digest (&cmac_ctx, cmac_key, cmac_cipher, nc->encrypt, 16, S.b); block16_xor (&D, &S);
- _cmac128_block_mulx (&D, &D); + block16_mulx_be (&D, &D); cmac128_update (&cmac_ctx, cmac_cipher, nc->encrypt, nlength, nonce); cmac128_digest (&cmac_ctx, cmac_key, cmac_cipher, nc->encrypt, 16, S.b); block16_xor (&D, &S); @@ -90,7 +89,7 @@ _siv_s2v (const struct nettle_cipher *nc, { union nettle_block16 pad;
- _cmac128_block_mulx (&T, &D); + block16_mulx_be (&T, &D); memcpy (pad.b, pdata, plength); pad.b[plength] = 0x80; if (plength + 1 < 16) diff --git a/xts.c b/xts.c index 6730b3ad76ff..7b023748ddd4 100644 --- a/xts.c +++ b/xts.c @@ -44,31 +44,7 @@ #include "macros.h" #include "memxor.h" #include "nettle-internal.h" - -/* shift left one and XOR with 0x87 if there is carry. */ -/* the algorithm reads this as a 128bit Little Endian number */ -/* src and dest can point to the same buffer for in-place operations */ -#if WORDS_BIGENDIAN -#define BE_SHIFT(x) ((((x) & 0x7f7f7f7f7f7f7f7f) << 1) | \ - (((x) & 0x8080808080808080) >> 15)) -static void -xts_shift(union nettle_block16 *dst, - const union nettle_block16 *src) -{ - uint64_t carry = (src->u64[1] & 0x80) >> 7; - dst->u64[1] = BE_SHIFT(src->u64[1]) | ((src->u64[0] & 0x80) << 49); - dst->u64[0] = BE_SHIFT(src->u64[0]) ^ (0x8700000000000000 & -carry); -} -#else /* !WORDS_BIGENDIAN */ -static void -xts_shift(union nettle_block16 *dst, - const union nettle_block16 *src) -{ - uint64_t carry = src->u64[1] >> 63; - dst->u64[1] = (src->u64[1] << 1) | (src->u64[0] >> 63); - dst->u64[0] = (src->u64[0] << 1) ^ (0x87 & -carry); -} -#endif /* !WORDS_BIGNDIAN */ +#include "block-internal.h"
static void check_length(size_t length, uint8_t *dst) @@ -107,7 +83,7 @@ xts_encrypt_message(const void *enc_ctx, const void *twk_ctx,
/* shift T for next block if any */ if (length > XTS_BLOCK_SIZE) - xts_shift(&T, &T); + block16_mulx_le(&T, &T); }
/* if the last block is partial, handle via stealing */ @@ -121,7 +97,7 @@ xts_encrypt_message(const void *enc_ctx, const void *twk_ctx, memxor(S.b, T.b, XTS_BLOCK_SIZE); /* CC -> S */
/* shift T for next block */ - xts_shift(&T, &T); + block16_mulx_le(&T, &T);
length -= XTS_BLOCK_SIZE; src += XTS_BLOCK_SIZE; @@ -162,7 +138,7 @@ xts_decrypt_message(const void *dec_ctx, const void *twk_ctx,
/* shift T for next block if any */ if (length > XTS_BLOCK_SIZE) - xts_shift(&T, &T); + block16_mulx_le(&T, &T); }
/* if the last block is partial, handle via stealing */ @@ -173,7 +149,7 @@ xts_decrypt_message(const void *dec_ctx, const void *twk_ctx, union nettle_block16 S;
/* we need the last T(n) and save the T(n-1) for later */ - xts_shift(&T1, &T); + block16_mulx_le(&T1, &T);
memxor3(C.b, src, T1.b, XTS_BLOCK_SIZE); /* C -> CC */ decf(dec_ctx, XTS_BLOCK_SIZE, S.b, C.b); /* PP */
From: Dmitry Eremin-Solenikov dbaryshkov@gmail.com
Move GCM's block shift function to block-internal.h. This concludes moving of all Galois mul-by-2 to single header.
Signed-off-by: Dmitry Eremin-Solenikov dbaryshkov@gmail.com --- block-internal.h | 30 +++++++++++++++++++++++++++++- gcm.c | 30 ++---------------------------- 2 files changed, 31 insertions(+), 29 deletions(-)
diff --git a/block-internal.h b/block-internal.h index 8972d0ac2b5b..88e19be333c8 100644 --- a/block-internal.h +++ b/block-internal.h @@ -95,11 +95,15 @@ block8_xor_bytes (union nettle_block8 *r, #define LSHIFT_ALIEN_UINT64(x) \ ((((x) & UINT64_C(0x7f7f7f7f7f7f7f7f)) << 1) | \ (((x) & UINT64_C(0x8080808080808080)) >> 15)) +#define RSHIFT_ALIEN_UINT64(x) \ + ((((x) & UINT64_C(0xfefefefefefefefe)) >> 1) | \ + (((x) & UINT64_C(0x0001010101010101)) << 15))
/* Two typical defining polynoms */
#define BLOCK16_POLY (UINT64_C(0x87)) #define BLOCK8_POLY (UINT64_C(0x1b)) +#define GHASH_POLY (UINT64_C(0xE1))
/* Galois multiplications by 2: * functions differ in shifting right or left, big- or little- endianness @@ -133,6 +137,18 @@ block8_mulx_be (union nettle_block8 *dst,
dst->u64 = (src->u64 << 1) ^ (BLOCK8_POLY & -carry); } + +static inline void +block16_mulx_ghash (union nettle_block16 *r, + const union nettle_block16 *x) +{ + uint64_t mask; + + /* Shift uses big-endian representation. */ + mask = - (x->u64[1] & 1); + r->u64[1] = (x->u64[1] >> 1) | ((x->u64[0] & 1) << 63); + r->u64[0] = (x->u64[0] >> 1) ^ (mask & (GHASH_POLY << 56)); +} #else /* !WORDS_BIGENDIAN */ static inline void block16_mulx_be (union nettle_block16 *dst, @@ -160,6 +176,18 @@ block8_mulx_be (union nettle_block8 *dst,
dst->u64 = LSHIFT_ALIEN_UINT64(src->u64) ^ ((BLOCK8_POLY << 56) & -carry); } -#endif /* !WORDS_BIGENDIAN */ + +static inline void +block16_mulx_ghash (union nettle_block16 *r, + const union nettle_block16 *x) +{ + uint64_t mask; + + /* Shift uses big-endian representation. */ + mask = - ((x->u64[1] >> 56) & 1); + r->u64[1] = RSHIFT_ALIEN_UINT64(x->u64[1]) | ((x->u64[0] >> 49) & 0x80); + r->u64[0] = RSHIFT_ALIEN_UINT64(x->u64[0]) ^ (mask & GHASH_POLY); +} +#endif /* ! WORDS_BIGENDIAN */
#endif /* NETTLE_BLOCK_INTERNAL_H_INCLUDED */ diff --git a/gcm.c b/gcm.c index 4a04a0a10842..cf615daf18bd 100644 --- a/gcm.c +++ b/gcm.c @@ -55,32 +55,6 @@ #include "ctr-internal.h" #include "block-internal.h"
-#define GHASH_POLYNOMIAL 0xE1UL - -/* Multiplication by 010...0; a big-endian shift right. If the bit - shifted out is one, the defining polynomial is added to cancel it - out. r == x is allowed. */ -static void -gcm_gf_shift (union nettle_block16 *r, const union nettle_block16 *x) -{ - uint64_t mask; - - /* Shift uses big-endian representation. */ -#if WORDS_BIGENDIAN - mask = - (x->u64[1] & 1); - r->u64[1] = (x->u64[1] >> 1) | ((x->u64[0] & 1) << 63); - r->u64[0] = (x->u64[0] >> 1) ^ (mask & ((uint64_t) GHASH_POLYNOMIAL << 56)); -#else /* ! WORDS_BIGENDIAN */ -#define RSHIFT_WORD(x) \ - ((((x) & 0xfefefefefefefefeUL) >> 1) \ - | (((x) & 0x0001010101010101UL) << 15)) - mask = - ((x->u64[1] >> 56) & 1); - r->u64[1] = RSHIFT_WORD(x->u64[1]) | ((x->u64[0] >> 49) & 0x80); - r->u64[0] = RSHIFT_WORD(x->u64[0]) ^ (mask & GHASH_POLYNOMIAL); -# undef RSHIFT_WORD -#endif /* ! WORDS_BIGENDIAN */ -} - #if GCM_TABLE_BITS == 0 /* Sets x <- x * y mod r, using the plain bitwise algorithm from the specification. y may be shorter than a full block, missing bytes @@ -104,7 +78,7 @@ gcm_gf_mul (union nettle_block16 *x, const union nettle_block16 *y) if (b & 0x80) block16_xor(&Z, &V); - gcm_gf_shift(&V, &V); + block16_mulx_ghash(&V, &V); } } memcpy (x->b, Z.b, sizeof(Z)); @@ -275,7 +249,7 @@ gcm_set_key(struct gcm_key *key, /* Algorithm 3 from the gcm paper. First do powers of two, then do the rest by adding. */ while (i /= 2) - gcm_gf_shift(&key->h[i], &key->h[2*i]); + block16_mulx_ghash(&key->h[i], &key->h[2*i]); for (i = 2; i < 1<<GCM_TABLE_BITS; i *= 2) { unsigned j;
dbaryshkov@gmail.com writes:
From: Dmitry Eremin-Solenikov dbaryshkov@gmail.com
Move GCM's block shift function to block-internal.h. This concludes moving of all Galois mul-by-2 to single header.
I've merged patch 1-3 to the master-updates branch. Thanks!
Regards, /Niels
ср, 4 сент. 2019 г. в 23:25, Niels Möller nisse@lysator.liu.se:
dbaryshkov@gmail.com writes:
From: Dmitry Eremin-Solenikov dbaryshkov@gmail.com
Move GCM's block shift function to block-internal.h. This concludes moving of all Galois mul-by-2 to single header.
I've merged patch 1-3 to the master-updates branch. Thanks!
Thank you! What about gosthash v2 patches I've sent in July?
From: Dmitry Eremin-Solenikov dbaryshkov@gmail.com
It makes little sense to have intermediate solution with GCM_TABLE_BITS == 4. One either will use unoptimized case of GCM_TABLE_BITS == 0, or will switch to fully optimized case (8) as memory usage difference is quite low between 4 and 8. So drop GCM_TABLE_BITS == 4 support
Signed-off-by: Dmitry Eremin-Solenikov dbaryshkov@gmail.com --- gcm.c | 67 +++++++---------------------------------------------------- 1 file changed, 8 insertions(+), 59 deletions(-)
diff --git a/gcm.c b/gcm.c index cf615daf18bd..3a6f04a7671b 100644 --- a/gcm.c +++ b/gcm.c @@ -83,62 +83,7 @@ gcm_gf_mul (union nettle_block16 *x, const union nettle_block16 *y) } memcpy (x->b, Z.b, sizeof(Z)); } -#else /* GCM_TABLE_BITS != 0 */
-# if WORDS_BIGENDIAN -# define W(left,right) (0x##left##right) -# else -# define W(left,right) (0x##right##left) -# endif - -# if GCM_TABLE_BITS == 4 -static const uint16_t -shift_table[0x10] = { - W(00,00),W(1c,20),W(38,40),W(24,60),W(70,80),W(6c,a0),W(48,c0),W(54,e0), - W(e1,00),W(fd,20),W(d9,40),W(c5,60),W(91,80),W(8d,a0),W(a9,c0),W(b5,e0), -}; - -static void -gcm_gf_shift_4(union nettle_block16 *x) -{ - uint64_t *u64 = x->u64; - uint64_t reduce; - - /* Shift uses big-endian representation. */ -#if WORDS_BIGENDIAN - reduce = shift_table[u64[1] & 0xf]; - u64[1] = (u64[1] >> 4) | ((u64[0] & 0xf) << 60); - u64[0] = (u64[0] >> 4) ^ (reduce << 48); -#else /* ! WORDS_BIGENDIAN */ -#define RSHIFT_WORD_4(x) \ - ((((x) & UINT64_C(0xf0f0f0f0f0f0f0f0)) >> 4) \ - | (((x) & UINT64_C(0x000f0f0f0f0f0f0f)) << 12)) - reduce = shift_table[(u64[1] >> 56) & 0xf]; - u64[1] = RSHIFT_WORD_4(u64[1]) | ((u64[0] >> 52) & 0xf0); - u64[0] = RSHIFT_WORD_4(u64[0]) ^ reduce; -# undef RSHIFT_WORD_4 -#endif /* ! WORDS_BIGENDIAN */ -} - -static void -gcm_gf_mul (union nettle_block16 *x, const union nettle_block16 *table) -{ - union nettle_block16 Z; - unsigned i; - - memset(Z.b, 0, sizeof(Z)); - - for (i = GCM_BLOCK_SIZE; i-- > 0;) - { - uint8_t b = x->b[i]; - - gcm_gf_shift_4(&Z); - block16_xor(&Z, &table[b & 0xf]); - gcm_gf_shift_4(&Z); - block16_xor(&Z, &table[b >> 4]); - } - memcpy (x->b, Z.b, sizeof(Z)); -} # elif GCM_TABLE_BITS == 8 # if HAVE_NATIVE_gcm_hash8
@@ -147,6 +92,13 @@ void _nettle_gcm_hash8 (const struct gcm_key *key, union nettle_block16 *x, size_t length, const uint8_t *data); # else /* !HAVE_NATIVE_gcm_hash8 */ + +# if WORDS_BIGENDIAN +# define W(left,right) (0x##left##right) +# else +# define W(left,right) (0x##right##left) +# endif + static const uint16_t shift_table[0x100] = { W(00,00),W(01,c2),W(03,84),W(02,46),W(07,08),W(06,ca),W(04,8c),W(05,4e), @@ -182,6 +134,7 @@ shift_table[0x100] = { W(b5,e0),W(b4,22),W(b6,64),W(b7,a6),W(b2,e8),W(b3,2a),W(b1,6c),W(b0,ae), W(bb,f0),W(ba,32),W(b8,74),W(b9,b6),W(bc,f8),W(bd,3a),W(bf,7c),W(be,be), }; +#undef W
static void gcm_gf_shift_8(union nettle_block16 *x) @@ -221,10 +174,6 @@ gcm_gf_mul (union nettle_block16 *x, const union nettle_block16 *table) # error Unsupported table size. # endif /* GCM_TABLE_BITS != 8 */
-#undef W - -#endif /* GCM_TABLE_BITS */ - /* Increment the rightmost 32 bits. */ #define INC32(block) INCREMENT(4, (block.b) + GCM_BLOCK_SIZE - 4)
dbaryshkov@gmail.com writes:
It makes little sense to have intermediate solution with GCM_TABLE_BITS == 4. One either will use unoptimized case of GCM_TABLE_BITS == 0, or will switch to fully optimized case (8) as memory usage difference is quite low between 4 and 8. So drop GCM_TABLE_BITS == 4 support
For the const shift_table, the size difference is 32 bytes vs 512 bytes, which may not be a big deal.
I'm more concerned with the size of struct gcm_key, 256 bytes vs 4096 bytes. GCM_TABLE_BITS == 4 seems like a reasonable tradeoff if you have an embedded system with little RAM.
It's unfortunate that it is poorly tested. It would make some sense with an (abi-breaking) configure flag --enable-small-size to reduce size of various tables, which could then be tested more regularly. Besides gcm, AES and DES use large constant tables of somewhat configurable size, and for ecc there are both constant tables, for ecc_mul_g, and run-time tables for ecc_mul_a.
Is there any interest in improving support for low-end embedded devices?
Regards, /Niels
nettle-bugs@lists.lysator.liu.se