[PATCH 1/5] gcm: use uint64_t member of nettle_block16

List overview All Threads
Download

newer

older

[PATCH v2 1/4] block-internal: add...

PPC64le GHASH Vector Acceleration

dbaryshkov＠gmail.com

26 Aug 2019 26 Aug '19

6:20 p.m.

From: Dmitry Eremin-Solenikov dbaryshkov@gmail.com

Remove last usage of unsigned long member of nettle_block16.

Signed-off-by: Dmitry Eremin-Solenikov dbaryshkov@gmail.com --- gcm.c | 47 ++++++++++++----------------------------------- 1 file changed, 12 insertions(+), 35 deletions(-)

diff --git a/gcm.c b/gcm.c index a55f603f66d5..627097b24218 100644 --- a/gcm.c +++ b/gcm.c @@ -133,45 +133,22 @@ shift_table[0x10] = { static void gcm_gf_shift_4(union nettle_block16 *x) { - unsigned long *w = x->w; - unsigned long reduce; + uint64_t *u64 = x->u64; + uint64_t reduce;

/* Shift uses big-endian representation. */ #if WORDS_BIGENDIAN -# if SIZEOF_LONG == 4 - reduce = shift_table[w[3] & 0xf]; - w[3] = (w[3] >> 4) | ((w[2] & 0xf) << 28); - w[2] = (w[2] >> 4) | ((w[1] & 0xf) << 28); - w[1] = (w[1] >> 4) | ((w[0] & 0xf) << 28); - w[0] = (w[0] >> 4) ^ (reduce << 16); -# elif SIZEOF_LONG == 8 - reduce = shift_table[w[1] & 0xf]; - w[1] = (w[1] >> 4) | ((w[0] & 0xf) << 60); - w[0] = (w[0] >> 4) ^ (reduce << 48); -# else -# error Unsupported word size. */ -#endif + reduce = shift_table[u64[1] & 0xf]; + u64[1] = (u64[1] >> 4) | ((u64[0] & 0xf) << 60); + u64[0] = (u64[0] >> 4) ^ (reduce << 48); #else /* ! WORDS_BIGENDIAN */ -# if SIZEOF_LONG == 4 -#define RSHIFT_WORD(x) \ - ((((x) & 0xf0f0f0f0UL) >> 4) \ - | (((x) & 0x000f0f0f) << 12)) - reduce = shift_table[(w[3] >> 24) & 0xf]; - w[3] = RSHIFT_WORD(w[3]) | ((w[2] >> 20) & 0xf0); - w[2] = RSHIFT_WORD(w[2]) | ((w[1] >> 20) & 0xf0); - w[1] = RSHIFT_WORD(w[1]) | ((w[0] >> 20) & 0xf0); - w[0] = RSHIFT_WORD(w[0]) ^ reduce; -# elif SIZEOF_LONG == 8 -#define RSHIFT_WORD(x) \ - ((((x) & 0xf0f0f0f0f0f0f0f0UL) >> 4) \ - | (((x) & 0x000f0f0f0f0f0f0fUL) << 12)) - reduce = shift_table[(w[1] >> 56) & 0xf]; - w[1] = RSHIFT_WORD(w[1]) | ((w[0] >> 52) & 0xf0); - w[0] = RSHIFT_WORD(w[0]) ^ reduce; -# else -# error Unsupported word size. */ -# endif -# undef RSHIFT_WORD +#define RSHIFT_WORD_4(x) \ + ((((x) & UINT64_C(0xf0f0f0f0f0f0f0f0)) >> 4) \ + | (((x) & UINT64_C(0x000f0f0f0f0f0f0f)) << 12)) + reduce = shift_table[(u64[1] >> 56) & 0xf]; + u64[1] = RSHIFT_WORD_4(u64[1]) | ((u64[0] >> 52) & 0xf0); + u64[0] = RSHIFT_WORD_4(u64[0]) ^ reduce; +# undef RSHIFT_WORD_4 #endif /* ! WORDS_BIGENDIAN */ }

-- 2.23.0.rc1

Show replies by date

dbaryshkov＠gmail.com

26 Aug 26 Aug

6:20 p.m.

New subject: [PATCH 2/5] cmac64: fix nettle_block16 usage

From: Dmitry Eremin-Solenikov dbaryshkov@gmail.com

CMAC64 uses block8, rather than block16.

Signed-off-by: Dmitry Eremin-Solenikov dbaryshkov@gmail.com --- cmac64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmac64.c b/cmac64.c index 2fbffc9b5ea6..636635ba478b 100644 --- a/cmac64.c +++ b/cmac64.c @@ -98,7 +98,7 @@ cmac64_update(struct cmac64_ctx *ctx, const void *cipher, nettle_cipher_func *encrypt, size_t msg_len, const uint8_t *msg) { - union nettle_block16 Y; + union nettle_block8 Y; /* * check if we expand the block */

-- 2.23.0.rc1

dbaryshkov＠gmail.com

6:20 p.m.

New subject: [PATCH 3/5] block-internal: add block XORing functions

From: Dmitry Eremin-Solenikov dbaryshkov@gmail.com

Add common implementations for functions doing XOR over nettle_block16/nettle_block8.

diff --git a/Makefile.in b/Makefile.in index af4f6e46ee9b..f6658c86341c 100644 --- a/Makefile.in +++ b/Makefile.in @@ -230,7 +230,8 @@ DISTFILES = $(SOURCES) $(HEADERS) getopt.h getopt_int.h \ INSTALL NEWS ChangeLog \ nettle.pc.in hogweed.pc.in \ $(des_headers) descore.README desdata.stamp \ - aes-internal.h camellia-internal.h cmac-internal.h serpent-internal.h \ + aes-internal.h block-internal.h \ + camellia-internal.h cmac-internal.h serpent-internal.h \ cast128_sboxes.h desinfo.h desCode.h \ ripemd160-internal.h sha2-internal.h \ memxor-internal.h nettle-internal.h nettle-write.h \ diff --git a/block-internal.h b/block-internal.h new file mode 100644 index 000000000000..84839c872f63 --- /dev/null +++ b/block-internal.h @@ -0,0 +1,93 @@ +/* block-internal.h + + Internal implementations of nettle_blockZ-related functions. + + Copyright (C) 2011 Katholieke Universiteit Leuven + Copyright (C) 2011, 2013, 2018 Niels Möller + Copyright (C) 2018 Red Hat, Inc. + Copyright (C) 2019 Dmitry Eremin-Solenikov + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#ifndef NETTLE_BLOCK_INTERNAL_H_INCLUDED +#define NETTLE_BLOCK_INTERNAL_H_INCLUDED + +#include <assert.h> + +#include "nettle-types.h" +#include "memxor.h" + +static inline void +block16_xor (union nettle_block16 *r, + const union nettle_block16 *x) +{ + r->u64[0] ^= x->u64[0]; + r->u64[1] ^= x->u64[1]; +} + +static inline void +block16_xor3 (union nettle_block16 *r, + const union nettle_block16 *x, + const union nettle_block16 *y) +{ + r->u64[0] = x->u64[0] ^ y->u64[0]; + r->u64[1] = x->u64[1] ^ y->u64[1]; +} + +static inline void +block16_xor_bytes (union nettle_block16 *r, + const union nettle_block16 *x, + const uint8_t *bytes) +{ + memxor3 (r->b, x->b, bytes, 16); +} + +static inline void +block8_xor (union nettle_block8 *r, + const union nettle_block8 *x) +{ + r->u64 ^= x->u64; +} + +static inline void +block8_xor3 (union nettle_block8 *r, + const union nettle_block8 *x, + const union nettle_block8 *y) +{ + r->u64 = x->u64 ^ y->u64; +} + +static inline void +block8_xor_bytes (union nettle_block8 *r, + const union nettle_block8 *x, + const uint8_t *bytes) +{ + memxor3 (r->b, x->b, bytes, 8); +} + +#endif /* NETTLE_BLOCK_INTERNAL_H_INCLUDED */ diff --git a/cmac.c b/cmac.c index 70ce8132d9d1..194324421c58 100644 --- a/cmac.c +++ b/cmac.c @@ -45,6 +45,7 @@ #include "memxor.h" #include "nettle-internal.h" #include "cmac-internal.h" +#include "block-internal.h" #include "macros.h"

/* shift one and XOR with 0x87. */ @@ -119,12 +120,12 @@ cmac128_update(struct cmac128_ctx *ctx, const void *cipher, /* * now checksum everything but the last block */ - memxor3(Y.b, ctx->X.b, ctx->block.b, 16); + block16_xor3(&Y, &ctx->X, &ctx->block); encrypt(cipher, 16, ctx->X.b, Y.b);

while (msg_len > 16) { - memxor3(Y.b, ctx->X.b, msg, 16); + block16_xor_bytes (&Y, &ctx->X, msg); encrypt(cipher, 16, ctx->X.b, Y.b); msg += 16; msg_len -= 16; @@ -151,14 +152,14 @@ cmac128_digest(struct cmac128_ctx *ctx, const struct cmac128_key *key, ctx->block.b[ctx->index] = 0x80; memset(ctx->block.b + ctx->index + 1, 0, 16 - 1 - ctx->index);

- memxor(ctx->block.b, key->K2.b, 16); + block16_xor (&ctx->block, &key->K2); } else { - memxor(ctx->block.b, key->K1.b, 16); + block16_xor (&ctx->block, &key->K1); }

- memxor3(Y.b, ctx->block.b, ctx->X.b, 16); + block16_xor3 (&Y, &ctx->block, &ctx->X);

assert(length <= 16); if (length == 16) diff --git a/cmac64.c b/cmac64.c index 636635ba478b..e7bb438580d6 100644 --- a/cmac64.c +++ b/cmac64.c @@ -43,8 +43,8 @@

#include "cmac.h"

-#include "memxor.h" #include "nettle-internal.h" +#include "block-internal.h" #include "macros.h"

/* shift one and XOR with 0x87. */ @@ -119,12 +119,12 @@ cmac64_update(struct cmac64_ctx *ctx, const void *cipher, /* * now checksum everything but the last block */ - memxor3(Y.b, ctx->X.b, ctx->block.b, 8); + block8_xor3(&Y, &ctx->X, &ctx->block); encrypt(cipher, 8, ctx->X.b, Y.b);

while (msg_len > 8) { - memxor3(Y.b, ctx->X.b, msg, 8); + block8_xor_bytes(&Y, &ctx->X, msg); encrypt(cipher, 8, ctx->X.b, Y.b); msg += 8; msg_len -= 8; @@ -151,14 +151,14 @@ cmac64_digest(struct cmac64_ctx *ctx, const struct cmac64_key *key, if (ctx->index < 8) { ctx->block.b[ctx->index] = 0x80; - memxor(ctx->block.b, key->K2.b, 8); + block8_xor(&ctx->block, &key->K2); } else { - memxor(ctx->block.b, key->K1.b, 8); + block8_xor(&ctx->block, &key->K1); }

- memxor3(Y.b, ctx->block.b, ctx->X.b, 8); + block8_xor3(&Y, &ctx->block, &ctx->X);

assert(length <= 8); if (length == 8) diff --git a/eax.c b/eax.c index 4b8b5117746e..63f3ff82fe65 100644 --- a/eax.c +++ b/eax.c @@ -40,6 +40,7 @@

#include "eax.h"

+#include "block-internal.h" #include "ctr.h" #include "memxor.h"

@@ -50,14 +51,6 @@ omac_init (union nettle_block16 *state, unsigned t) state->b[EAX_BLOCK_SIZE - 1] = t; }

-/* Almost the same as gcm_gf_add */ -static void -block16_xor (union nettle_block16 *dst, const union nettle_block16 *src) -{ - dst->u64[0] ^= src->u64[0]; - dst->u64[1] ^= src->u64[1]; -} - static void omac_update (union nettle_block16 *state, const struct eax_key *key, const void *cipher, nettle_cipher_func *f, diff --git a/gcm.c b/gcm.c index 627097b24218..c550c5d810ef 100644 --- a/gcm.c +++ b/gcm.c @@ -53,16 +53,10 @@ #include "nettle-internal.h" #include "macros.h" #include "ctr-internal.h" +#include "block-internal.h"

#define GHASH_POLYNOMIAL 0xE1UL

-static void -gcm_gf_add (union nettle_block16 *r, - const union nettle_block16 *x, const union nettle_block16 *y) -{ - r->u64[0] = x->u64[0] ^ y->u64[0]; - r->u64[1] = x->u64[1] ^ y->u64[1]; -} /* Multiplication by 010...0; a big-endian shift right. If the bit shifted out is one, the defining polynomial is added to cancel it out. r == x is allowed. */ @@ -108,7 +102,7 @@ gcm_gf_mul (union nettle_block16 *x, const union nettle_block16 *y) for (j = 0; j < 8; j++, b <<= 1) { if (b & 0x80) - gcm_gf_add(&Z, &Z, &V); + block16_xor3(&Z, &Z, &V); gcm_gf_shift(&V, &V); } @@ -165,9 +159,9 @@ gcm_gf_mul (union nettle_block16 *x, const union nettle_block16 *table) uint8_t b = x->b[i];

gcm_gf_shift_4(&Z); - gcm_gf_add(&Z, &Z, &table[b & 0xf]); + block16_xor3(&Z, &Z, &table[b & 0xf]); gcm_gf_shift_4(&Z); - gcm_gf_add(&Z, &Z, &table[b >> 4]); + block16_xor3(&Z, &Z, &table[b >> 4]); } memcpy (x->b, Z.b, sizeof(Z)); } @@ -243,10 +237,10 @@ gcm_gf_mul (union nettle_block16 *x, const union nettle_block16 *table) for (i = GCM_BLOCK_SIZE-2; i > 0; i--) { gcm_gf_shift_8(&Z); - gcm_gf_add(&Z, &Z, &table[x->b[i]]); + block16_xor3(&Z, &Z, &table[x->b[i]]); } gcm_gf_shift_8(&Z); - gcm_gf_add(x, &Z, &table[x->b[0]]); + block16_xor3(x, &Z, &table[x->b[0]]); } # endif /* ! HAVE_NATIVE_gcm_hash8 */ # else /* GCM_TABLE_BITS != 8 */ @@ -286,7 +280,7 @@ gcm_set_key(struct gcm_key *key, { unsigned j; for (j = 1; j < i; j++) - gcm_gf_add(&key->h[i+j], &key->h[i],&key->h[j]); + block16_xor3(&key->h[i+j], &key->h[i],&key->h[j]); } #endif } diff --git a/siv-cmac.c b/siv-cmac.c index f498cb863f5a..42f740cddf5d 100644 --- a/siv-cmac.c +++ b/siv-cmac.c @@ -46,6 +46,7 @@ #include "memops.h" #include "cmac-internal.h" #include "nettle-internal.h" +#include "block-internal.h"

/* This is an implementation of S2V for the AEAD case where * vectors if zero, are considered as S empty components */ @@ -69,12 +70,12 @@ _siv_s2v (const struct nettle_cipher *nc, _cmac128_block_mulx (&D, &D); cmac128_update (&cmac_ctx, cmac_cipher, nc->encrypt, alength, adata); cmac128_digest (&cmac_ctx, cmac_key, cmac_cipher, nc->encrypt, 16, S.b); - memxor (D.b, S.b, 16); + block16_xor (&D, &S);

_cmac128_block_mulx (&D, &D); cmac128_update (&cmac_ctx, cmac_cipher, nc->encrypt, nlength, nonce); cmac128_digest (&cmac_ctx, cmac_key, cmac_cipher, nc->encrypt, 16, S.b); - memxor (D.b, S.b, 16); + block16_xor (&D, &S);

/* Sn */ if (plength >= 16) @@ -83,7 +84,7 @@ _siv_s2v (const struct nettle_cipher *nc,

pdata += plength - 16;

- memxor3 (T.b, pdata, D.b, 16); + block16_xor_bytes (&T, &D, pdata); } else { @@ -95,7 +96,7 @@ _siv_s2v (const struct nettle_cipher *nc, if (plength + 1 < 16) memset (&pad.b[plength + 1], 0, 16 - plength - 1);

- memxor (T.b, pad.b, 16); + block16_xor (&T, &pad); }

cmac128_update (&cmac_ctx, cmac_cipher, nc->encrypt, 16, T.b);

-- 2.23.0.rc1

nisse＠lysator.liu.se

3 Sep 3 Sep

5:05 p.m.

New subject: [PATCH 3/5] block-internal: add block XORing functions

dbaryshkov@gmail.com writes:

...

From: Dmitry Eremin-Solenikov dbaryshkov@gmail.com

Add common implementations for functions doing XOR over nettle_block16/nettle_block8.

I've merged the first two patches. Thanks! Do you know if anyone is using GCM_TABLE_BITS 4? I've tested that it still works, both before and after your change, but I don't test it regularly.

...

+static inline void +block16_xor_bytes (union nettle_block16 *r,
   const union nettle_block16 *x,
   const uint8_t *bytes)
+{

memxor3 (r->b, x->b, bytes, 16);

+}

[...]

...

+static inline void +block8_xor_bytes (union nettle_block8 *r,
   const union nettle_block8 *x,
   const uint8_t *bytes)
+{

memxor3 (r->b, x->b, bytes, 8);

+}

Not sure these two wrappers are that helpful. Do you have a good reason to add them?

The rest of the patch looks like a nice consolidation.

...

--- a/gcm.c +++ b/gcm.c @@ -53,16 +53,10 @@ #include "nettle-internal.h" #include "macros.h" #include "ctr-internal.h" +#include "block-internal.h"

#define GHASH_POLYNOMIAL 0xE1UL

-static void -gcm_gf_add (union nettle_block16 *r,
   const union nettle_block16 *x, const union nettle_block16 *y)
-{

r->u64[0] = x->u64[0] ^ y->u64[0];

r->u64[1] = x->u64[1] ^ y->u64[1];

-} /* Multiplication by 010...0; a big-endian shift right. If the bit shifted out is one, the defining polynomial is added to cancel it out. r == x is allowed. */ @@ -108,7 +102,7 @@ gcm_gf_mul (union nettle_block16 *x, const union nettle_block16 *y) for (j = 0; j < 8; j++, b <<= 1) { if (b & 0x80)
   gcm_gf_add(&Z, &Z, &V);
   block16_xor3(&Z, &Z, &V);

This and few other calls below can be block16_xor rather than block16_xor3.

Regards, /Niels

-- Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677. Internet email is subject to wholesale government surveillance.

Dmitry Eremin-Solenikov

7:31 p.m.

New subject: [PATCH 3/5] block-internal: add block XORing functions

Hello,

вт, 3 сент. 2019 г. в 20:05, Niels Möller nisse@lysator.liu.se:

...

dbaryshkov@gmail.com writes:

...
From: Dmitry Eremin-Solenikov dbaryshkov@gmail.com

Add common implementations for functions doing XOR over nettle_block16/nettle_block8.

I've merged the first two patches. Thanks! Do you know if anyone is using GCM_TABLE_BITS 4? I've tested that it still works, both before and after your change, but I don't test it regularly.

I don't know. As the size difference between GCM_TABLE_BITS being 4 and 8 is not that big, maybe we can drop it alltogether. I can send a patch ;-)

...

...
+static inline void +block16_xor_bytes (union nettle_block16 *r,
           const union nettle_block16 *x,
           const uint8_t *bytes)
+{

memxor3 (r->b, x->b, bytes, 16);

+}
[...]

...
+static inline void +block8_xor_bytes (union nettle_block8 *r,
           const union nettle_block8 *x,
           const uint8_t *bytes)
+{

memxor3 (r->b, x->b, bytes, 8);

+}
Not sure these two wrappers are that helpful. Do you have a good reason to add them?

They fit into cmac128/cmac64/siv-cmac code, as they simplify code there a bit. Using them you just say that Block1 = Block2 ^ bytestring, rather than XORing Block.b fields.

If you'd like, I can drop them, but from my point of view they look like good encapsulation.

...

The rest of the patch looks like a nice consolidation.

...
--- a/gcm.c +++ b/gcm.c @@ -53,16 +53,10 @@ #include "nettle-internal.h" #include "macros.h" #include "ctr-internal.h" +#include "block-internal.h"

#define GHASH_POLYNOMIAL 0xE1UL

-static void -gcm_gf_add (union nettle_block16 *r,
    const union nettle_block16 *x, const union nettle_block16 *y)
-{

r->u64[0] = x->u64[0] ^ y->u64[0];

r->u64[1] = x->u64[1] ^ y->u64[1];

-} /* Multiplication by 010...0; a big-endian shift right. If the bit shifted out is one, the defining polynomial is added to cancel it out. r == x is allowed. */ @@ -108,7 +102,7 @@ gcm_gf_mul (union nettle_block16 *x, const union nettle_block16 *y) for (j = 0; j < 8; j++, b <<= 1) { if (b & 0x80)
    gcm_gf_add(&Z, &Z, &V);
    block16_xor3(&Z, &Z, &V);
This and few other calls below can be block16_xor rather than block16_xor3.

Will fix in next iteration.

-- With best wishes Dmitry

dbaryshkov＠gmail.com

26 Aug 26 Aug

6:20 p.m.

New subject: [PATCH 4/5] block modes: move Galois shifts to block-internal.h

From: Dmitry Eremin-Solenikov dbaryshkov@gmail.com

Move Galois polynomial shifts to block-internal.h, simplifying common code. GCM is left unconverted for now, this will be fixed later.

diff --git a/Makefile.in b/Makefile.in index f6658c86341c..ae9c8a7563f9 100644 --- a/Makefile.in +++ b/Makefile.in @@ -231,7 +231,7 @@ DISTFILES = $(SOURCES) $(HEADERS) getopt.h getopt_int.h \ nettle.pc.in hogweed.pc.in \ $(des_headers) descore.README desdata.stamp \ aes-internal.h block-internal.h \ - camellia-internal.h cmac-internal.h serpent-internal.h \ + camellia-internal.h serpent-internal.h \ cast128_sboxes.h desinfo.h desCode.h \ ripemd160-internal.h sha2-internal.h \ memxor-internal.h nettle-internal.h nettle-write.h \ diff --git a/block-internal.h b/block-internal.h index 84839c872f63..8cc30f6f5a02 100644 --- a/block-internal.h +++ b/block-internal.h @@ -90,4 +90,80 @@ block8_xor_bytes (union nettle_block8 *r, memxor3 (r->b, x->b, bytes, 8); }

+#define LSHIFT_WORD(x) ((((x) & 0x7f7f7f7f7f7f7f7f) << 1) | \ + (((x) & 0x8080808080808080) >> 15)) +#define RSHIFT_WORD(x) ((((x) & 0xfefefefefefefefe) >> 1) | \ + (((x) & 0x0001010101010101) << 15)) + +/* Galois multiplications by 2: + * functions differ in shifting right or left, big- or little- endianness + * and by defininy polynom. + * r == x is allowed. */ + +#if WORDS_BIGENDIAN +static inline void +block16_lshift_be (union nettle_block16 *dst, + const union nettle_block16 *src, + uint64_t poly) +{ + uint64_t carry = src->u64[0] >> 63; + dst->u64[0] = (src->u64[0] << 1) | (src->u64[1] >> 63); + dst->u64[1] = (src->u64[1] << 1) ^ (poly & -carry); +} +#else /* !WORDS_BIGENDIAN */ +static inline void +block16_lshift_be (union nettle_block16 *dst, + const union nettle_block16 *src, + uint64_t poly) +{ + uint64_t carry = (src->u64[0] & 0x80) >> 7; + dst->u64[0] = LSHIFT_WORD(src->u64[0]) | ((src->u64[1] & 0x80) << 49); + dst->u64[1] = LSHIFT_WORD(src->u64[1]) ^ ((poly << 56) & -carry); +} +#endif /* !WORDS_BIGENDIAN */ + +#if WORDS_BIGENDIAN +static inline void +block16_lshift_le (union nettle_block16 *dst, + const union nettle_block16 *src, + uint64_t poly) +{ + uint64_t carry = (src->u64[1] & 0x80) >> 7; + dst->u64[1] = LSHIFT_WORD(src->u64[1]) | ((src->u64[0] & 0x80) << 49); + dst->u64[0] = LSHIFT_WORD(src->u64[0]) ^ ((poly << 56) & -carry); +} +#else /* !WORDS_BIGENDIAN */ +static inline void +block16_lshift_le (union nettle_block16 *dst, + const union nettle_block16 *src, + uint64_t poly) +{ + uint64_t carry = src->u64[1] >> 63; + dst->u64[1] = (src->u64[1] << 1) | (src->u64[0] >> 63); + dst->u64[0] = (src->u64[0] << 1) ^ (poly & -carry); +} +#endif /* !WORDS_BIGNDIAN */ + +#if WORDS_BIGENDIAN +static inline void +block8_lshift_be (union nettle_block8 *dst, + const union nettle_block8 *src, + uint64_t poly) +{ + uint64_t carry = src->u64 >> 63; + + dst->u64 = (src->u64 << 1) ^ (poly & -carry); +} +#else /* !WORDS_BIGENDIAN */ +static inline void +block8_lshift_be (union nettle_block8 *dst, + const union nettle_block8 *src, + uint64_t poly) +{ + uint64_t carry = (src->u64 & 0x80) >> 7; + + dst->u64 = LSHIFT_WORD(src->u64) ^ ((poly << 56) & -carry); +} +#endif /* !WORDS_BIGENDIAN */ + #endif /* NETTLE_BLOCK_INTERNAL_H_INCLUDED */ diff --git a/cmac-internal.h b/cmac-internal.h deleted file mode 100644 index 80db7fcc58cd..000000000000 --- a/cmac-internal.h +++ /dev/null @@ -1,54 +0,0 @@ -/* cmac-internal.h - - CMAC mode internal functions - - Copyright (C) 2017 Red Hat, Inc. - - Contributed by Nikos Mavrogiannopoulos - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. -*/ - -#ifndef NETTLE_CMAC_INTERNAL_H_INCLUDED -#define NETTLE_CMAC_INTERNAL_H_INCLUDED - -#include "cmac.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define _cmac128_block_mulx _nettle_cmac128_block_mulx - -void _cmac128_block_mulx(union nettle_block16 *out, - const union nettle_block16 *in); - -#ifdef __cplusplus -} -#endif - -#endif /* CMAC_INTERNAL_H_INCLUDED */ diff --git a/cmac.c b/cmac.c index 194324421c58..9b745daaaf1b 100644 --- a/cmac.c +++ b/cmac.c @@ -44,32 +44,16 @@

#include "memxor.h" #include "nettle-internal.h" -#include "cmac-internal.h" #include "block-internal.h" #include "macros.h"

/* shift one and XOR with 0x87. */ -#if WORDS_BIGENDIAN -void -_cmac128_block_mulx(union nettle_block16 *dst, - const union nettle_block16 *src) -{ - uint64_t carry = src->u64[0] >> 63; - dst->u64[0] = (src->u64[0] << 1) | (src->u64[1] >> 63); - dst->u64[1] = (src->u64[1] << 1) ^ (0x87 & -carry); -} -#else /* !WORDS_BIGENDIAN */ -#define LE_SHIFT(x) ((((x) & 0x7f7f7f7f7f7f7f7f) << 1) | \ - (((x) & 0x8080808080808080) >> 15)) -void +static inline void _cmac128_block_mulx(union nettle_block16 *dst, const union nettle_block16 *src) { - uint64_t carry = (src->u64[0] & 0x80) >> 7; - dst->u64[0] = LE_SHIFT(src->u64[0]) | ((src->u64[1] & 0x80) << 49); - dst->u64[1] = LE_SHIFT(src->u64[1]) ^ (0x8700000000000000 & -carry); + block16_lshift_be(dst, src, 0x87); } -#endif /* !WORDS_BIGENDIAN */

void cmac128_set_key(struct cmac128_key *key, const void *cipher, diff --git a/cmac64.c b/cmac64.c index e7bb438580d6..c423a804145e 100644 --- a/cmac64.c +++ b/cmac64.c @@ -47,28 +47,13 @@ #include "block-internal.h" #include "macros.h"

-/* shift one and XOR with 0x87. */ -#if WORDS_BIGENDIAN -static void +/* shift one and XOR with 0x1b. */ +static inline void _cmac64_block_mulx(union nettle_block8 *dst, const union nettle_block8 *src) { - uint64_t carry = src->u64 >> 63; - - dst->u64 = (src->u64 << 1) ^ (0x1b & -carry); -} -#else /* !WORDS_BIGENDIAN */ -#define LE_SHIFT(x) ((((x) & 0x7f7f7f7f7f7f7f7f) << 1) | \ - (((x) & 0x8080808080808080) >> 15)) -static void -_cmac64_block_mulx(union nettle_block8 *dst, - const union nettle_block8 *src) -{ - uint64_t carry = (src->u64 & 0x80) >> 7; - - dst->u64 = LE_SHIFT(src->u64) ^ (0x1b00000000000000 & -carry); + block8_lshift_be(dst, src, 0x1b); } -#endif /* !WORDS_BIGENDIAN */

void cmac64_set_key(struct cmac64_key *key, const void *cipher, diff --git a/eax.c b/eax.c index 63f3ff82fe65..24f5b4f592fe 100644 --- a/eax.c +++ b/eax.c @@ -82,27 +82,13 @@ omac_final (union nettle_block16 *state, const struct eax_key *key, f (cipher, EAX_BLOCK_SIZE, state->b, state->b); }

-/* Allows r == a */ -static void -gf2_double (uint8_t *r, const uint8_t *a) -{ - unsigned high = - (a[0] >> 7); - unsigned i; - /* Shift left */ - for (i = 0; i < EAX_BLOCK_SIZE - 1; i++) - r[i] = (a[i] << 1) + (a[i+1] >> 7); - - /* Wrap around for x^{128} = x^7 + x^2 + x + 1 */ - r[EAX_BLOCK_SIZE - 1] = (a[EAX_BLOCK_SIZE - 1] << 1) ^ (high & 0x87); -} - void eax_set_key (struct eax_key *key, const void *cipher, nettle_cipher_func *f) { static const union nettle_block16 zero_block; f (cipher, EAX_BLOCK_SIZE, key->pad_block.b, zero_block.b); - gf2_double (key->pad_block.b, key->pad_block.b); - gf2_double (key->pad_partial.b, key->pad_block.b); + block16_lshift_be (&key->pad_block, &key->pad_block, 0x87); + block16_lshift_be (&key->pad_partial, &key->pad_block, 0x87); block16_xor (&key->pad_partial, &key->pad_block); }

diff --git a/gcm.c b/gcm.c index c550c5d810ef..17c889e67553 100644 --- a/gcm.c +++ b/gcm.c @@ -71,13 +71,9 @@ gcm_gf_shift (union nettle_block16 *r, const union nettle_block16 *x) r->u64[1] = (x->u64[1] >> 1) | ((x->u64[0] & 1) << 63); r->u64[0] = (x->u64[0] >> 1) ^ (mask & ((uint64_t) GHASH_POLYNOMIAL << 56)); #else /* ! WORDS_BIGENDIAN */ -#define RSHIFT_WORD(x) \ - ((((x) & 0xfefefefefefefefeUL) >> 1) \ - | (((x) & 0x0001010101010101UL) << 15)) mask = - ((x->u64[1] >> 56) & 1); r->u64[1] = RSHIFT_WORD(x->u64[1]) | ((x->u64[0] >> 49) & 0x80); r->u64[0] = RSHIFT_WORD(x->u64[0]) ^ (mask & GHASH_POLYNOMIAL); -# undef RSHIFT_WORD #endif /* ! WORDS_BIGENDIAN */ }

diff --git a/siv-cmac-aes128.c b/siv-cmac-aes128.c index 82ac16e91992..fd2e23a4513e 100644 --- a/siv-cmac-aes128.c +++ b/siv-cmac-aes128.c @@ -44,7 +44,6 @@ #include "ctr.h" #include "memxor.h" #include "memops.h" -#include "cmac-internal.h"

void siv_cmac_aes128_set_key(struct siv_cmac_aes128_ctx *ctx, const uint8_t *key) diff --git a/siv-cmac-aes256.c b/siv-cmac-aes256.c index 9401bbf119c5..eda7f1c27a55 100644 --- a/siv-cmac-aes256.c +++ b/siv-cmac-aes256.c @@ -44,7 +44,6 @@ #include "ctr.h" #include "memxor.h" #include "memops.h" -#include "cmac-internal.h"

void siv_cmac_aes256_set_key(struct siv_cmac_aes256_ctx *ctx, const uint8_t *key) diff --git a/siv-cmac.c b/siv-cmac.c index 42f740cddf5d..c0c8fff900dd 100644 --- a/siv-cmac.c +++ b/siv-cmac.c @@ -44,10 +44,17 @@ #include "ctr.h" #include "memxor.h" #include "memops.h" -#include "cmac-internal.h" #include "nettle-internal.h" #include "block-internal.h"

+/* shift one and XOR with 0x87. */ +static inline void +_cmac128_block_mulx(union nettle_block16 *dst, + const union nettle_block16 *src) +{ + block16_lshift_be(dst, src, 0x87); +} + /* This is an implementation of S2V for the AEAD case where * vectors if zero, are considered as S empty components */ static void diff --git a/xts.c b/xts.c index 6730b3ad76ff..8f565751cac1 100644 --- a/xts.c +++ b/xts.c @@ -44,31 +44,17 @@ #include "macros.h" #include "memxor.h" #include "nettle-internal.h" +#include "block-internal.h"

/* shift left one and XOR with 0x87 if there is carry. */ /* the algorithm reads this as a 128bit Little Endian number */ /* src and dest can point to the same buffer for in-place operations */ -#if WORDS_BIGENDIAN -#define BE_SHIFT(x) ((((x) & 0x7f7f7f7f7f7f7f7f) << 1) | \ - (((x) & 0x8080808080808080) >> 15)) static void xts_shift(union nettle_block16 *dst, const union nettle_block16 *src) { - uint64_t carry = (src->u64[1] & 0x80) >> 7; - dst->u64[1] = BE_SHIFT(src->u64[1]) | ((src->u64[0] & 0x80) << 49); - dst->u64[0] = BE_SHIFT(src->u64[0]) ^ (0x8700000000000000 & -carry); + block16_lshift_le(dst, src, 0x87); } -#else /* !WORDS_BIGENDIAN */ -static void -xts_shift(union nettle_block16 *dst, - const union nettle_block16 *src) -{ - uint64_t carry = src->u64[1] >> 63; - dst->u64[1] = (src->u64[1] << 1) | (src->u64[0] >> 63); - dst->u64[0] = (src->u64[0] << 1) ^ (0x87 & -carry); -} -#endif /* !WORDS_BIGNDIAN */

static void check_length(size_t length, uint8_t *dst)

-- 2.23.0.rc1

nisse＠lysator.liu.se

3 Sep 3 Sep

5:26 p.m.

New subject: [PATCH 4/5] block modes: move Galois shifts to block-internal.h

dbaryshkov@gmail.com writes:

...

From: Dmitry Eremin-Solenikov dbaryshkov@gmail.com

Move Galois polynomial shifts to block-internal.h, simplifying common code. GCM is left unconverted for now, this will be fixed later.

Thanks for cleaning this up! Some comments below.

...

--- a/block-internal.h +++ b/block-internal.h @@ -90,4 +90,80 @@ block8_xor_bytes (union nettle_block8 *r, memxor3 (r->b, x->b, bytes, 8); }

+#define LSHIFT_WORD(x) ((((x) & 0x7f7f7f7f7f7f7f7f) << 1) | \
	(((x) & 0x8080808080808080) >> 15))
+#define RSHIFT_WORD(x) ((((x) & 0xfefefefefefefefe) >> 1) | \
	(((x) & 0x0001010101010101) << 15))

Names of these macros should say U64 or UINT64 rather than WORD. And something to suggest that they're for alien endianness. Maybe "LSHIFT_ALIEN_UINT64.

And UINT64_C for the constants.

...

+/* Galois multiplications by 2:

functions differ in shifting right or left, big- or little- endianness

and by defininy polynom.

r == x is allowed. */

This is a bit complex, perhaps it can be clarified a bit. We have both the issue of big or little byte order within words. And bit order used for representating of the polynomial: usually a less significant bit within a byte represents a coefficient for a smaller power of the polynomial variable x, but one of the algorithms (I can't recall which one) uses opposite bit order.

And if I remember correctly, they all use the same polynomial, but due to bit-order differences, there are two different ways to represent it. Which of the functions are called with more than one constant for the poly argument?

And "defining" is misspelled.

...

+#if WORDS_BIGENDIAN +static inline void +block16_lshift_be (union nettle_block16 *dst,
   const union nettle_block16 *src,
   uint64_t poly)
+{

uint64_t carry = src->u64[0] >> 63;

dst->u64[0] = (src->u64[0] << 1) | (src->u64[1] >> 63);

dst->u64[1] = (src->u64[1] << 1) ^ (poly & -carry);

+} +#else /* !WORDS_BIGENDIAN */

There will be less clutter if all code for #if WORDS_BIGENDIAN is grouped together. And I think I prefer "mulx" rather than "shift" somewhere in the name, to indicate that it's not a plain shift.

...

--- a/cmac.c +++ b/cmac.c @@ -44,32 +44,16 @@

#include "memxor.h" #include "nettle-internal.h" -#include "cmac-internal.h" #include "block-internal.h" #include "macros.h"

/* shift one and XOR with 0x87. */ -#if WORDS_BIGENDIAN -void -_cmac128_block_mulx(union nettle_block16 *dst,
    const union nettle_block16 *src)
-{

uint64_t carry = src->u64[0] >> 63;

dst->u64[0] = (src->u64[0] << 1) | (src->u64[1] >> 63);

dst->u64[1] = (src->u64[1] << 1) ^ (0x87 & -carry);

-} -#else /* !WORDS_BIGENDIAN */ -#define LE_SHIFT(x) ((((x) & 0x7f7f7f7f7f7f7f7f) << 1) | \
                (((x) & 0x8080808080808080) >> 15))
-void +static inline void _cmac128_block_mulx(union nettle_block16 *dst, const union nettle_block16 *src) {

uint64_t carry = (src->u64[0] & 0x80) >> 7;

dst->u64[0] = LE_SHIFT(src->u64[0]) | ((src->u64[1] & 0x80) << 49);

dst->u64[1] = LE_SHIFT(src->u64[1]) ^ (0x8700000000000000 & -carry);

block16_lshift_be(dst, src, 0x87);

} -#endif /* !WORDS_BIGENDIAN */

I think it's clearer to delete this and similar wrappers.

Regards, /Niels

-- Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677. Internet email is subject to wholesale government surveillance.

Dmitry Eremin-Solenikov

11:18 p.m.

New subject: [PATCH 4/5] block modes: move Galois shifts to block-internal.h

вт, 3 сент. 2019 г. в 20:26, Niels Möller nisse@lysator.liu.se:

...

dbaryshkov@gmail.com writes:

...
From: Dmitry Eremin-Solenikov dbaryshkov@gmail.com

Move Galois polynomial shifts to block-internal.h, simplifying common code. GCM is left unconverted for now, this will be fixed later.

Thanks for cleaning this up! Some comments below.

...
--- a/block-internal.h +++ b/block-internal.h @@ -90,4 +90,80 @@ block8_xor_bytes (union nettle_block8 *r, memxor3 (r->b, x->b, bytes, 8); }

+#define LSHIFT_WORD(x) ((((x) & 0x7f7f7f7f7f7f7f7f) << 1) | \
                (((x) & 0x8080808080808080) >> 15))
+#define RSHIFT_WORD(x) ((((x) & 0xfefefefefefefefe) >> 1) | \
                (((x) & 0x0001010101010101) << 15))
Names of these macros should say U64 or UINT64 rather than WORD. And something to suggest that they're for alien endianness. Maybe "LSHIFT_ALIEN_UINT64.

Ack

...

And UINT64_C for the constants.

Ack

...

...
+/* Galois multiplications by 2:

functions differ in shifting right or left, big- or little- endianness

and by defininy polynom.

r == x is allowed. */

This is a bit complex, perhaps it can be clarified a bit. We have both the issue of big or little byte order within words. And bit order used for representating of the polynomial: usually a less significant bit within a byte represents a coefficient for a smaller power of the polynomial variable x, but one of the algorithms (I can't recall which one) uses opposite bit order.

For GCM. This is why I left it unconverted in this step.

...

And if I remember correctly, they all use the same polynomial, but due to bit-order differences, there are two different ways to represent it. Which of the functions are called with more than one constant for the poly argument?

They take 0x87 for block16 functions and 0x1b for block8. Except GCM, which uses 0xE1. I will probably inline these values.

...

And "defining" is misspelled.

Ack

...

...
+#if WORDS_BIGENDIAN +static inline void +block16_lshift_be (union nettle_block16 *dst,
           const union nettle_block16 *src,
           uint64_t poly)
+{

uint64_t carry = src->u64[0] >> 63;

dst->u64[0] = (src->u64[0] << 1) | (src->u64[1] >> 63);

dst->u64[1] = (src->u64[1] << 1) ^ (poly & -carry);

+} +#else /* !WORDS_BIGENDIAN */
There will be less clutter if all code for #if WORDS_BIGENDIAN is grouped together. And I think I prefer "mulx" rather than "shift" somewhere in the name, to indicate that it's not a plain shift.

Ack

...

...
--- a/cmac.c +++ b/cmac.c @@ -44,32 +44,16 @@

#include "memxor.h" #include "nettle-internal.h" -#include "cmac-internal.h" #include "block-internal.h" #include "macros.h"

/* shift one and XOR with 0x87. */ -#if WORDS_BIGENDIAN -void -_cmac128_block_mulx(union nettle_block16 *dst,
            const union nettle_block16 *src)
-{

uint64_t carry = src->u64[0] >> 63;

dst->u64[0] = (src->u64[0] << 1) | (src->u64[1] >> 63);

dst->u64[1] = (src->u64[1] << 1) ^ (0x87 & -carry);

-} -#else /* !WORDS_BIGENDIAN */ -#define LE_SHIFT(x) ((((x) & 0x7f7f7f7f7f7f7f7f) << 1) | \
                (((x) & 0x8080808080808080) >> 15))
-void +static inline void _cmac128_block_mulx(union nettle_block16 *dst, const union nettle_block16 *src) {

uint64_t carry = (src->u64[0] & 0x80) >> 7;

dst->u64[0] = LE_SHIFT(src->u64[0]) | ((src->u64[1] & 0x80) << 49);

dst->u64[1] = LE_SHIFT(src->u64[1]) ^ (0x8700000000000000 & -carry);

block16_lshift_be(dst, src, 0x87);

} -#endif /* !WORDS_BIGENDIAN */
I think it's clearer to delete this and similar wrappers.

Ack

-- With best wishes Dmitry

dbaryshkov＠gmail.com

26 Aug 26 Aug

6:20 p.m.

New subject: [PATCH 5/5] gcm: move block shifting function to block-internal.h

From: Dmitry Eremin-Solenikov dbaryshkov@gmail.com

Move GCM's block shift function to block-internal.h. This concludes moving of all Galois mul-by-2 to single header.

Signed-off-by: Dmitry Eremin-Solenikov dbaryshkov@gmail.com --- block-internal.h | 29 +++++++++++++++++++++++++++++ gcm.c | 15 ++------------- 2 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/block-internal.h b/block-internal.h index 8cc30f6f5a02..874e4dbe1929 100644 --- a/block-internal.h +++ b/block-internal.h @@ -166,4 +166,33 @@ block8_lshift_be (union nettle_block8 *dst, } #endif /* !WORDS_BIGENDIAN */

+#if WORDS_BIGENDIAN +static inline void +block16_rshift_be (union nettle_block16 *r, + const union nettle_block16 *x, + uint64_t poly) +{ + uint64_t mask; + + /* Shift uses big-endian representation. */ + mask = - (x->u64[1] & 1); + r->u64[1] = (x->u64[1] >> 1) | ((x->u64[0] & 1) << 63); + r->u64[0] = (x->u64[0] >> 1) ^ (mask & (poly << 56)); +} +#else /* ! WORDS_BIGENDIAN */ +static inline void +block16_rshift_be (union nettle_block16 *r, + const union nettle_block16 *x, + uint64_t poly) +{ + uint64_t mask; + + /* Shift uses big-endian representation. */ + mask = - ((x->u64[1] >> 56) & 1); + r->u64[1] = RSHIFT_WORD(x->u64[1]) | ((x->u64[0] >> 49) & 0x80); + r->u64[0] = RSHIFT_WORD(x->u64[0]) ^ (mask & poly); +} +#endif /* ! WORDS_BIGENDIAN */ + +/* shift one and XOR with 0x87. */ #endif /* NETTLE_BLOCK_INTERNAL_H_INCLUDED */ diff --git a/gcm.c b/gcm.c index 17c889e67553..eca6ab6cab25 100644 --- a/gcm.c +++ b/gcm.c @@ -60,21 +60,10 @@ /* Multiplication by 010...0; a big-endian shift right. If the bit shifted out is one, the defining polynomial is added to cancel it out. r == x is allowed. */ -static void +static inline void gcm_gf_shift (union nettle_block16 *r, const union nettle_block16 *x) { - uint64_t mask; - - /* Shift uses big-endian representation. */ -#if WORDS_BIGENDIAN - mask = - (x->u64[1] & 1); - r->u64[1] = (x->u64[1] >> 1) | ((x->u64[0] & 1) << 63); - r->u64[0] = (x->u64[0] >> 1) ^ (mask & ((uint64_t) GHASH_POLYNOMIAL << 56)); -#else /* ! WORDS_BIGENDIAN */ - mask = - ((x->u64[1] >> 56) & 1); - r->u64[1] = RSHIFT_WORD(x->u64[1]) | ((x->u64[0] >> 49) & 0x80); - r->u64[0] = RSHIFT_WORD(x->u64[0]) ^ (mask & GHASH_POLYNOMIAL); -#endif /* ! WORDS_BIGENDIAN */ + block16_rshift_be (r, x, GHASH_POLYNOMIAL); }

#if GCM_TABLE_BITS == 0

-- 2.23.0.rc1

2155

Age (days ago)

2163

Last active (days ago)

nettle-bugs@lists.lysator.liu.se

8 comments

3 participants

tags (0)

participants (3)

dbaryshkov＠gmail.com
Dmitry Eremin-Solenikov
nisse＠lysator.liu.se