nettle-bugs July 2020

nettle-bugs@lists.lysator.liu.se

6 participants
23 discussions

[PATCH] Add bcrypt tests to testsuite.
by Stephen R. van den Berg 06 Jul '20

06 Jul '20

--- testsuite/blowfish-test.c | 47 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/testsuite/blowfish-test.c b/testsuite/blowfish-test.c index cadeda5f..c495b301 100644 --- a/testsuite/blowfish-test.c +++ b/testsuite/blowfish-test.c @@ -45,6 +45,22 @@ test_blowfish(const struct tstring *key, free(data); } +static void +test_bcrypt(int succeed, const struct tstring *key, + const struct tstring *hash) +{ + if (succeed != blowfish_bcrypt_verify(key->length, key->data, + hash->length, hash->data)) + { + fprintf(stderr, "blowfish_bcrypt_verify failed:\nKey:"); + tstring_print_hex(key); + fprintf(stderr, "\nHash: "); + tstring_print_hex(hash); + fprintf(stderr, "\n"); + FAIL(); + } +} + void test_main(void) { @@ -52,4 +68,35 @@ test_main(void) test_blowfish(SDATA("abcdefghijklmnopqrstuvwxyz"), SDATA("BLOWFISH"), SHEX("32 4E D0 FE F4 13 A2 03")); + /* Tests for BSD-style bcrypt. + From John the Ripper 1.7.9 via Phpass */ + test_bcrypt(1, SDATA("U*U"), SDATA("$2a$05$CCCCCCCCCCCCCCCCCCCCC.E5YPO9kmyuRGyh0XouQYb4YMJKvyOeW")); + test_bcrypt(1, SDATA("U*U*"), SDATA("$2a$05$CCCCCCCCCCCCCCCCCCCCC.VGOzA784oUp/Z0DY336zx7pLYAy0lwK")); + test_bcrypt(1, SDATA("U*U*U"), SDATA("$2a$05$XXXXXXXXXXXXXXXXXXXXXOAcXxm9kjPGEMsLznoKqmqw7tc8WCx4a")); + test_bcrypt(1, SDATA(""), SDATA("$2a$05$CCCCCCCCCCCCCCCCCCCCC.7uG0VCzI2bS7j6ymqJi9CdcdxiRTWNy")); + test_bcrypt(1, SDATA("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789chars after 72 are ignored"), SDATA("$2a$05$abcdefghijklmnopqrstuu5s2v8.iXieOjg/.AySBTTZIIVFJeBui")); + test_bcrypt(1, SDATA("\xa3"), SDATA("$2x$05$/OK.fbVrR/bpIqNJ5ianF.CE5elHaaO4EbggVDjb8P19RukzXSM3e")); + test_bcrypt(1, SDATA("\xa3"), SDATA("$2y$05$/OK.fbVrR/bpIqNJ5ianF.Sa7shbm4.OzKpvFnX1pQLmQW96oUlCq")); + test_bcrypt(1, SDATA("\xd1\x91"), SDATA("$2x$05$6bNw2HLQYeqHYyBfLMsv/OiwqTymGIGzFsA4hOTWebfehXHNprcAS")); + test_bcrypt(1, SDATA("\xd0\xc1\xd2\xcf\xcc\xd8"), SDATA("$2x$05$6bNw2HLQYeqHYyBfLMsv/O9LIGgn8OMzuDoHfof8AQimSGfcSWxnS")); + test_bcrypt(1, SDATA("\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa""chars after 72 are ignored as usual"), SDATA("$2a$05$/OK.fbVrR/bpIqNJ5ianF.swQOIzjOiJ9GHEPuhEkvqrUyvWhEMx6")); + test_bcrypt(1, SDATA("\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55\xaa\x55"), SDATA("$2a$05$/OK.fbVrR/bpIqNJ5ianF.R9xrDjiycxMbQE2bp.vgqlYpW5wx2yy")); + test_bcrypt(1, SDATA(""), SDATA("$2a$05$CCCCCCCCCCCCCCCCCCCCC.7uG0VCzI2bS7j6ymqJi9CdcdxiRTWNy")); + test_bcrypt(1, SDATA("\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff\x55\xaa\xff"), SDATA("$2a$05$/OK.fbVrR/bpIqNJ5ianF.9tQZzcJfm3uj2NvJ/n5xkhpqLrMpWCe")); + /* From Openwall's crypt v1.2 via Phpass */ + test_bcrypt(0, SDATA(""), SDATA("$2a$03$CCCCCCCCCCCCCCCCCCCCC.")); + test_bcrypt(0, SDATA(""), SDATA("$2a$32$CCCCCCCCCCCCCCCCCCCCC.")); + test_bcrypt(0, SDATA(""), SDATA("$2z$05$CCCCCCCCCCCCCCCCCCCCC.")); + test_bcrypt(0, SDATA(""), SDATA("$2`$05$CCCCCCCCCCCCCCCCCCCCC.")); + test_bcrypt(0, SDATA(""), SDATA("$2{$05$CCCCCCCCCCCCCCCCCCCCC.")); + /* Stephen's personal tests */ + test_bcrypt(1, SDATA("yawinpassword"), + SDATA("$2a$04$MzVXtd4o0y4DOlyHMMLMDeE4/eezrsT5Xad.2lmGr/NkCpwBgvn3e")); + test_bcrypt(0, SDATA("xawinpassword"), + SDATA("$2a$04$MzVXtd4o0y4DOlyHMMLMDeE4/eezrsT5Xad.2lmGr/NkCpwBgvn3e")); + test_bcrypt(1, SDATA("Bootq9sH5"), + SDATA("$2y$10$1b2lPgo4XumibnJGN3r3sOsXFfVVYlebFjlw47qpaslC4KIwu9dAK")); + test_bcrypt(0, SDATA("Bootq9sH6"), + SDATA("$2y$10$1b2lPgo4XumibnJGN3r3sOsXFfVVYlebFjlw47qpaslC4KIwu9dAK")); + test_bcrypt(0, SDATA("1234"), SDATA("$2y$")); } -- 2.20.1

2 5

[PATCH 3/3] v4.0 Blowfish: Supply lengths instead of C-strings.
by Stephen R. van den Berg 01 Jul '20

01 Jul '20

--- blowfish-bcrypt.c | 56 ++++++++++++++++++++++++----------------------- blowfish.h | 14 +++++++----- nettle.texinfo | 27 +++++++++++++---------- 3 files changed, 52 insertions(+), 45 deletions(-) diff --git a/blowfish-bcrypt.c b/blowfish-bcrypt.c index c06f9e90..491aab82 100644 --- a/blowfish-bcrypt.c +++ b/blowfish-bcrypt.c @@ -76,13 +76,14 @@ static const char radix64_encode_table[64] = "0123456789"; int -blowfish_bcrypt_verify(const char *key, - const char *hashed) +blowfish_bcrypt_verify(size_t lenkey, const char *key, + size_t lenhashed, const char *hashed) { char newhash[BLOWFISH_BCRYPT_HASH_SIZE]; - return blowfish_bcrypt_hash(sizeof newhash, - newhash, key, hashed, -1, (void*)0) + return blowfish_bcrypt_hash(newhash, + lenkey, key, lenhashed, hashed, + -1, (void*)0) && !strcmp(newhash, hashed); } @@ -159,10 +160,12 @@ static void swap32(uint32_t *x, int count) #endif } -static void set_xkey(const char *key, bf_key expanded, bf_key initial, - unsigned bug, uint32_t safety) +static void set_xkey(size_t lenkey, const char *key, + bf_key expanded, bf_key initial, + unsigned bug, uint32_t safety) { const char *ptr = key; + size_t n = lenkey; unsigned i, j; uint32_t sign, diff, tmp[2]; @@ -219,10 +222,10 @@ static void set_xkey(const char *key, bf_key expanded, bf_key initial, */ if (j) sign |= tmp[1] & 0x80; - if (!*ptr) - ptr = key; - else + if (n--) ptr++; + else + ptr = key, n = lenkey; } diff |= tmp[0] ^ tmp[1]; /* Non-zero on any differences */ @@ -259,8 +262,9 @@ static void set_xkey(const char *key, bf_key expanded, bf_key initial, initial[0] ^= sign; } -static int ibcrypt(size_t length, char *dst, - const char *key, const char *scheme, +static int ibcrypt(char *dst, + size_t lenkey, const char *key, + size_t lenscheme, const char *scheme, int minlog2rounds, int log2rounds, const uint8_t *salt) { @@ -277,12 +281,10 @@ static int ibcrypt(size_t length, char *dst, uint32_t *ptr; uint32_t count; int i; - size_t lenscheme = strlen(scheme); unsigned cscheme; unsigned bug = 0; uint32_t safety = 0; - if (length < BLOWFISH_BCRYPT_HASH_SIZE || - lenscheme < 2) + if (lenscheme < 2) return 0; if (lenscheme >= 3 && *scheme++ != '$') @@ -336,7 +338,7 @@ static int ibcrypt(size_t length, char *dst, return 0; count = (uint32_t)1 << log2rounds; - set_xkey(key, data.expanded_key, data.ctx.p, bug, safety); + set_xkey(lenkey, key, data.expanded_key, data.ctx.p, bug, safety); memcpy(data.ctx.s, _nettle_blowfish_initial_ctx.s, sizeof(data.ctx.s)); L = R = 0; @@ -461,12 +463,13 @@ static int ibcrypt(size_t length, char *dst, * The performance cost of this quick self-test is around 0.6% at the "$2a$08" * setting. */ -int blowfish_bcrypt_hash(size_t length, char *dst, - const char *key, const char *scheme, +int blowfish_bcrypt_hash(char *dst, + size_t lenkey, const char *key, + size_t lenscheme, const char *scheme, int log2rounds, const uint8_t *salt) { - const char *test_pw = "8b \xd0\xc1\xd2\xcf\xcc\xd8"; - const char *test_scheme = "$2a$00$abcdefghijklmnopqrstuu"; + const char test_pw[] = "8b \xd0\xc1\xd2\xcf\xcc\xd8"; + const char test_scheme[] = "$2a$00$abcdefghijklmnopqrstuu"; static const char * const test_hashes[2] = {"i1D709vfamulimlGcq0qq3UvuUasvEa\0\x55", /* 'a', 'b', 'y' */ "VUrPmXD6q/nVSSp7pNDhCR9071IfIRe\0\x55"}; /* 'x' */ @@ -478,10 +481,9 @@ int blowfish_bcrypt_hash(size_t length, char *dst, char o[HASHOFFSET + 31 + 1 + 1 + 1]; } buf; - if (length) - *dst = '\0'; + *dst = '\0'; /* Hash the supplied password */ - cscheme = ibcrypt(length, dst, key, scheme, 4, log2rounds, salt); + cscheme = ibcrypt(dst, lenkey, key, lenscheme, scheme, 4, log2rounds, salt); /* * Do a quick self-test. It is important that we make both calls to ibcrypt() @@ -497,18 +499,18 @@ int blowfish_bcrypt_hash(size_t length, char *dst, memset(buf.o, 0x55, sizeof(buf.o)); buf.o[sizeof(buf.o) - 1] = 0; - ok = ibcrypt(sizeof(buf.o) - (1 + 1), buf.o, test_pw, - buf.s, 0, -1, (void*)0); + ok = ibcrypt(buf.o, sizeof(test_pw), test_pw, + sizeof(buf.s), buf.s, 0, -1, (void*)0); ok = (ok && !memcmp(buf.o, buf.s, HASHOFFSET) && !memcmp(buf.o + HASHOFFSET, test_hash, 31 + 1 + 1 + 1)); { - const char *k = "\xff\xa3" "34" "\xff\xff\xff\xa3" "345"; + const char k[] = "\xff\xa3" "34" "\xff\xff\xff\xa3" "345"; bf_key ae, ai, ye, yi; - set_xkey(k, ae, ai, 0, 0x10000); /* $2a$ */ - set_xkey(k, ye, yi, 0, 0); /* $2y$ */ + set_xkey(sizeof(k), k, ae, ai, 0, 0x10000); /* $2a$ */ + set_xkey(sizeof(k), k, ye, yi, 0, 0); /* $2y$ */ ai[0] ^= 0x10000; /* undo the safety (for comparison) */ ok = ok && ai[0] == 0xdb9c59bc && ye[17] == 0x33343500 && !memcmp(ae, ye, sizeof(ae)) && diff --git a/blowfish.h b/blowfish.h index af48e20f..1c5bdbe1 100644 --- a/blowfish.h +++ b/blowfish.h @@ -86,16 +86,18 @@ void blowfish_decrypt(const struct blowfish_ctx *ctx, size_t length, uint8_t *dst, const uint8_t *src); + +/* dst parameter must point to a buffer of minimally + * BLOWFISH_BCRYPT_HASH_SIZE bytes */ int -blowfish_bcrypt_hash(size_t length, - char *dst, - const char *key, - const char *scheme, +blowfish_bcrypt_hash(char *dst, + size_t lenkey, const char *key, + size_t lenscheme, const char *scheme, int log2rounds, const uint8_t *salt); int -blowfish_bcrypt_verify(const char *key, - const char *hashed); +blowfish_bcrypt_verify(size_t lenkey, const char *key, + size_t lenhashed, const char *hashed); #ifdef __cplusplus } diff --git a/nettle.texinfo b/nettle.texinfo index 75e18b58..2269e11d 100644 --- a/nettle.texinfo +++ b/nettle.texinfo @@ -1513,7 +1513,7 @@ in any other way. Analogous to @code{blowfish_encrypt} @end deftypefun -@deftypefun int blowfish_bcrypt_hash (size_t @var{length}, char *@var{dst}, const char *@var{key}, const char *@var{scheme}, int @var{log2rounds}, const uint8_t *@var{salt}) +@deftypefun int blowfish_bcrypt_hash (char *@var{dst}, size_t @var{lenkey}, const char *@var{key}, size_t @var{lenscheme}, const char *@var{scheme}, int @var{log2rounds}, const uint8_t *@var{salt}) Compute the bcrypt password hash. The function will return @code{0} if the hash cannot be computed due to invalid input. @@ -1522,13 +1522,13 @@ in the array pointed to by @var{dst}. The hash is computed based on the chosen @var{scheme}, number of rounds @var{log2rounds} and specified @var{salt}. -@var{length} must be at least @code{BLOWFISH_BCRYPT_HASH_SIZE}. +@var{dst} must point to a character array of at least + @code{BLOWFISH_BCRYPT_HASH_SIZE} bytes. -@var{dst} must point to a character array of the specified @var{length}. +@var{key} contains the plaintext password string of size @var{lenkey}. -@var{key} contains the zero terminated plaintext password string. - -@var{scheme} contains either just the chosen scheme (valid schemes +@var{scheme} is of size @var{lenscheme} and contains either just the +chosen scheme (valid schemes are: @code{2a}, @code{2b}, @code{2x} or @code{2y}), or (the prefix of) an existing hashed password (typically @code{$2b$10$...}). @@ -1543,26 +1543,28 @@ the salt will be extracted from @var{scheme}. Sample code to generate a bcrypt hash: @example char cleartxtpassword[] = "ExamplePassword"; +char scheme[] = "2b"; uint8_t salt[BLOWFISH_BCRYPT_BINSALT_SIZE]; @dots{} /* Make sure that salt is filled with random bytes */ @dots{} char hashedresult[BLOWFISH_BCRYPT_HASH_SIZE]; -int result = blowfish_bcrypt(sizeof(hashedresult), hashedresult, - cleartxtpassword, "2b", 10, salt); +int result = blowfish_bcrypt(hashedresult, + sizeof(cleartxtpassword), cleartxtpassword, + sizeof(scheme), scheme, 10, salt); if (result) printf("%s\n", hashedresult); @end example @end deftypefun -@deftypefun int blowfish_bcrypt_verify (const char *@var{key}, const char *@var{hashed}) +@deftypefun int blowfish_bcrypt_verify (size_t @var{lenkey}, const char *@var{key}, size_t @var{lenhashed}, const char *@var{hashed}) Verifies the bcrypt password hash against the supplied plaintext password. The function will return @code{0} if the password does not match. The function will return @code{1} if the password matches. -@var{key} contains the zero terminated plaintext password string. +@var{key} contains the plaintext password string of size @var{lenkey}. -@var{hashed} contains the zero terminated hashed string to compare with. +@var{hashed} contains the hashed string of size @var{lenhashed} to compare with. Sample code to verify a bcrypt hash: @example @@ -1573,7 +1575,8 @@ char existinghashed[] = "$" /* separator */ "1b2lPgo4XumibnJGN3r3sO" /* base64 encoded 16-byte salt */ "u7wE7xNfYDKlAxZffJDCJdVfFTAyevu"; /* Hashedpart */ -if (blowfish_bcrypt_verify(cleartxtpassword, existinghashed)) +if (blowfish_bcrypt_verify(sizeof(cleartxtpassword), cleartxtpassword, + sizeof(existinghashed), existinghashed)) printf("Password is correct."); else printf("Password is incorrect."); -- 2.20.1

2 6

[Patch] Optimize AES and GHASH for PowerPC64 (support little-endian and big-endian)
by Maamoun TK 01 Jul '20

01 Jul '20

Patch implementation benchmark for GCM_AES (Tested on POWER8): little-endian: - Encrypt x~17.5 of nettle C implementation - Decrypt x~17.5 of nettle C implementation - Update x~30 of nettle C implementation big-endian: - Encrypt x~18.5 of nettle C implementation - Decrypt x~18.5 of nettle C implementation - Update x~28.5 of nettle C implementation I investigated falling back to process 4 blocks instead of 8 blocks, 8x loop outperform 4x loop by x~1.22 on POWER8. Processing 4 blocks leaves plenty of registers on the table, to reduce the performance margin these registers can be used to overlap the instructions inside the loop. GHASH loop has two separate operations: - Digest calculation - Reduction The reduction procedure is hardly parallelized, to overcome that, the two procedures can overlap as follows |`````````````````````````````````````````| |Head: | |Digest calculation [0..3] | |`````````````````````````````````````````| |Loop: | |Overlapped Reduction [i+0..i+3] and | |Digest calculation [i+4..i+7] | |`````````````````````````````````````````| |Tail: | |Reduction [i+4..i+7] | ``````````````````````````````````````````` With that implemented the performance margin is reduced to x~1.15 and the code is messed up a little bit I can do more documentations so the reader can still keep track of what's going on but with that said the performance margin is still considerable and I think it's better to stick with 8x loop. - The patch passed the testsuite for both ppc64 and ppc64el. --- Makefile.in | 2 +- asm.m4 | 17 +- config.m4.in | 1 + configure.ac | 12 + gcm.c | 19 +- powerpc64/aes-decrypt-internal.asm | 584 +++++++++++++++++++++ powerpc64/aes-encrypt-internal.asm | 545 +++++++++++++++++++ powerpc64/gcm-hash8.asm | 1015 ++++++++++++++++++++++++++++++++++++ powerpc64/machine.m4 | 0 testsuite/gcm-test.c | 23 + 10 files changed, 2214 insertions(+), 4 deletions(-) create mode 100644 powerpc64/aes-decrypt-internal.asm create mode 100644 powerpc64/aes-encrypt-internal.asm create mode 100644 powerpc64/gcm-hash8.asm create mode 100644 powerpc64/machine.m4 diff --git a/Makefile.in b/Makefile.in index 64ff1001..fdc819f1 100644 --- a/Makefile.in +++ b/Makefile.in @@ -603,7 +603,7 @@ distdir: $(DISTFILES) done set -e; for d in sparc32 sparc64 x86 \ x86_64 x86_64/aesni x86_64/sha_ni x86_64/fat \ - arm arm/neon arm/v6 arm/fat ; do \ + arm arm/neon arm/v6 arm/fat powerpc64 ; do \ mkdir "$(distdir)/$$d" ; \ find "$(srcdir)/$$d" -maxdepth 1 '(' -name '*.asm' -o -name '*.m4' ')' \ -exec cp '{}' "$(distdir)/$$d" ';' ; \ diff --git a/asm.m4 b/asm.m4 index 59d64098..1033a326 100644 --- a/asm.m4 +++ b/asm.m4 @@ -30,13 +30,26 @@ COFF_STYLE, yes, define(<GMP_NUMB_BITS>,<>)dnl define(<PROLOGUE>, +<ifelse(ASM_POWERPC64,no, <.globl C_NAME($1) DECLARE_FUNC(C_NAME($1)) -C_NAME($1): ASM_X86_ENDBR>) +C_NAME($1): ASM_X86_ENDBR>, +<.globl C_NAME($1) +DECLARE_FUNC(C_NAME($1)) +.section ".opd","aw" +.align 3 +C_NAME($1): +.quad .C_NAME($1),.TOC.@tocbase,0 +.previous +.align 5 +.C_NAME($1):>)>) define(<EPILOGUE>, <ifelse(ELF_STYLE,yes, -<.size C_NAME($1), . - C_NAME($1)>,<>)>) +<ifelse(ASM_POWERPC64,no, +<.size C_NAME($1), . - C_NAME($1)>, +<.size .C_NAME($1), . - .C_NAME($1) +.size C_NAME($1), . - .C_NAME($1)>)>,<>)>) define(<m4_log2>, <m4_log2_internal($1,1,0)>) define(<m4_log2_internal>, diff --git a/config.m4.in b/config.m4.in index f7f5f283..6265a679 100644 --- a/config.m4.in +++ b/config.m4.in @@ -10,6 +10,7 @@ define(<RODATA>, <@ASM_RODATA@>)dnl define(<WORDS_BIGENDIAN>, <@ASM_WORDS_BIGENDIAN@>)dnl define(<ASM_X86_ENDBR>,<@ASM_X86_ENDBR@>)dnl define(<ASM_X86_MARK_CET_ALIGN>,<@ASM_X86_MARK_CET_ALIGN@>)dnl +define(<ASM_POWERPC64>,<@ASM_POWERPC64@>)dnl divert(1) @ASM_X86_MARK_CET@ @ASM_MARK_NOEXEC_STACK@ diff --git a/configure.ac b/configure.ac index 90ea1ea8..d7baface 100644 --- a/configure.ac +++ b/configure.ac @@ -435,6 +435,9 @@ if test "x$enable_assembler" = xyes ; then esac fi ;; + *powerpc64*) + asm_path=powerpc64 + ;; *) enable_assembler=no ;; @@ -572,7 +575,9 @@ AH_VERBATIM([HAVE_NATIVE], #undef HAVE_NATIVE_ecc_secp384r1_redc #undef HAVE_NATIVE_ecc_secp521r1_modp #undef HAVE_NATIVE_ecc_secp521r1_redc +#undef HAVE_NATIVE_gcm_init_key8 #undef HAVE_NATIVE_gcm_hash8 +#undef HAVE_NATIVE_gcm_fill #undef HAVE_NATIVE_salsa20_core #undef HAVE_NATIVE_sha1_compress #undef HAVE_NATIVE_sha256_compress @@ -866,6 +871,12 @@ if test "$nettle_cv_asm_x86_gnu_property" = yes; then .popsection' fi +if test "$host_cpu" = "powerpc64" ; then + ASM_POWERPC64='yes' +else + ASM_POWERPC64='no' +fi + AC_SUBST(ASM_SYMBOL_PREFIX) AC_SUBST(ASM_ELF_STYLE) AC_SUBST(ASM_COFF_STYLE) @@ -879,6 +890,7 @@ AC_SUBST(EMULATOR) AC_SUBST(ASM_X86_ENDBR) AC_SUBST(ASM_X86_MARK_CET) AC_SUBST(ASM_X86_MARK_CET_ALIGN) +AC_SUBST(ASM_POWERPC64) AC_SUBST(LIBNETTLE_MAJOR) AC_SUBST(LIBNETTLE_MINOR) diff --git a/gcm.c b/gcm.c index cf615daf..809c03bc 100644 --- a/gcm.c +++ b/gcm.c @@ -140,6 +140,12 @@ gcm_gf_mul (union nettle_block16 *x, const union nettle_block16 *table) memcpy (x->b, Z.b, sizeof(Z)); } # elif GCM_TABLE_BITS == 8 +# if HAVE_NATIVE_gcm_init_key8 + +#define gcm_init_key _nettle_gcm_init_key8 +void +_nettle_gcm_init_key8 (union nettle_block16 *table); +# endif /* HAVE_NATIVE_gcm_init_key8 */ # if HAVE_NATIVE_gcm_hash8 #define gcm_hash _nettle_gcm_hash8 @@ -225,6 +231,13 @@ gcm_gf_mul (union nettle_block16 *x, const union nettle_block16 *table) #endif /* GCM_TABLE_BITS */ +#if HAVE_NATIVE_gcm_fill + +#define gcm_fill _nettle_gcm_fill +void +_nettle_gcm_fill (uint8_t *ctr, size_t blocks, union nettle_block16 *buffer); +#endif /* HAVE_NATIVE_gcm_fill */ + /* Increment the rightmost 32 bits. */ #define INC32(block) INCREMENT(4, (block.b) + GCM_BLOCK_SIZE - 4) @@ -245,7 +258,9 @@ gcm_set_key(struct gcm_key *key, memset(key->h[0].b, 0, GCM_BLOCK_SIZE); f (cipher, GCM_BLOCK_SIZE, key->h[i].b, key->h[0].b); -#if GCM_TABLE_BITS +#ifdef gcm_init_key + gcm_init_key(key->h); +#elif GCM_TABLE_BITS /* Algorithm 3 from the gcm paper. First do powers of two, then do the rest by adding. */ while (i /= 2) @@ -333,6 +348,7 @@ gcm_update(struct gcm_ctx *ctx, const struct gcm_key *key, ctx->auth_size += length; } +#ifndef gcm_fill static nettle_fill16_func gcm_fill; static void gcm_fill(uint8_t *ctr, size_t blocks, union nettle_block16 *buffer) @@ -349,6 +365,7 @@ gcm_fill(uint8_t *ctr, size_t blocks, union nettle_block16 *buffer) WRITE_UINT32(ctr + GCM_BLOCK_SIZE - 4, c); } +#endif /* !gcm_fill */ void gcm_encrypt (struct gcm_ctx *ctx, const struct gcm_key *key, diff --git a/powerpc64/aes-decrypt-internal.asm b/powerpc64/aes-decrypt-internal.asm new file mode 100644 index 00000000..70fd69f0 --- /dev/null +++ b/powerpc64/aes-decrypt-internal.asm @@ -0,0 +1,584 @@ +C powerpc64/aes-decrypt-internal.asm + +ifelse(< + Copyright (C) 2020 Mamone Tarsha + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +>) + +C Register usage: + +define(<SP>, <1>) +define(<TOCP>, <2>) + +define(<ROUNDS>, <3>) +define(<KEYS>, <4>) +define(<LENGTH>, <6>) +define(<DST>, <7>) +define(<SRC>, <8>) + +define(<swap_mask>, <0>) + +define(<K>, <1>) +define(<S0>, <2>) +define(<S1>, <3>) +define(<S2>, <4>) +define(<S3>, <5>) +define(<S4>, <6>) +define(<S5>, <7>) +define(<S6>, <8>) +define(<S7>, <9>) +define(<S8>, <10>) +define(<S9>, <11>) +define(<S10>, <12>) +define(<S11>, <13>) +define(<S12>, <14>) +define(<S13>, <15>) +define(<S14>, <16>) +define(<S15>, <17>) + +define(<KX>, <33>) +define(<S0X>, <34>) +define(<S1X>, <35>) +define(<S2X>, <36>) +define(<S3X>, <37>) +define(<S4X>, <38>) +define(<S5X>, <39>) +define(<S6X>, <40>) +define(<S7X>, <41>) +define(<S8X>, <42>) +define(<S9X>, <43>) +define(<S10X>, <44>) +define(<S11X>, <45>) +define(<S12X>, <46>) +define(<S13X>, <47>) +define(<S14X>, <48>) +define(<S15X>, <49>) + +C ZERO vector register is used in place of RoundKey +C for vncipher instruction because the order of InvMixColumns +C and Xor processes are flipped in that instruction. +C The Xor process with RoundKey is executed afterward. +define(<ZERO>, <18>) + +.file "aes-decrypt-internal.asm" + +.machine "any" + +IF_LE(<.abiversion 2>) +.text + + C _aes_decrypt(unsigned rounds, const uint32_t *keys, + C const struct aes_table *T, + C size_t length, uint8_t *dst, + C uint8_t *src) + +IF_LE(<.align 5>) +PROLOGUE(_nettle_aes_decrypt) +IF_LE(<.localentry _nettle_aes_decrypt,0>) + + vxor ZERO,ZERO,ZERO + + ld 5,.swap_mask@got(TOCP) + lvx swap_mask,0,5 + + subi ROUNDS,ROUNDS,1 + srdi LENGTH,LENGTH,4 + + srdi 5,LENGTH,4 #16x loop count + cmpldi 5,0 + beq L8x + + std 17,-120(SP); + std 18,-112(SP); + std 19,-104(SP); + std 20,-96(SP); + std 21,-88(SP); + std 22,-80(SP); + std 23,-72(SP); + std 24,-64(SP); + std 25,-56(SP); + std 26,-48(SP); + std 27,-40(SP); + std 28,-32(SP); + std 29,-24(SP); + std 30,-16(SP); + std 31,-8(SP); + + li 17,0x10 + li 18,0x20 + li 19,0x30 + li 20,0x40 + li 21,0x50 + li 22,0x60 + li 23,0x70 + li 24,0x80 + li 25,0x90 + li 26,0xA0 + li 27,0xB0 + li 28,0xC0 + li 29,0xD0 + li 30,0xE0 + li 31,0xF0 + +.align 5 +Lx16_loop: + lxvd2x KX,0,KEYS + vperm K,K,K,swap_mask + + lxvd2x S0X,0,SRC + lxvd2x S1X,17,SRC + lxvd2x S2X,18,SRC + lxvd2x S3X,19,SRC + lxvd2x S4X,20,SRC + lxvd2x S5X,21,SRC + lxvd2x S6X,22,SRC + lxvd2x S7X,23,SRC + lxvd2x S8X,24,SRC + lxvd2x S9X,25,SRC + lxvd2x S10X,26,SRC + lxvd2x S11X,27,SRC + lxvd2x S12X,28,SRC + lxvd2x S13X,29,SRC + lxvd2x S14X,30,SRC + lxvd2x S15X,31,SRC + +IF_LE(<vperm S0,S0,S0,swap_mask + vperm S1,S1,S1,swap_mask + vperm S2,S2,S2,swap_mask + vperm S3,S3,S3,swap_mask + vperm S4,S4,S4,swap_mask + vperm S5,S5,S5,swap_mask + vperm S6,S6,S6,swap_mask + vperm S7,S7,S7,swap_mask + vperm S8,S8,S8,swap_mask + vperm S9,S9,S9,swap_mask + vperm S10,S10,S10,swap_mask + vperm S11,S11,S11,swap_mask + vperm S12,S12,S12,swap_mask + vperm S13,S13,S13,swap_mask + vperm S14,S14,S14,swap_mask + vperm S15,S15,S15,swap_mask>) + + vxor S0,S0,K + vxor S1,S1,K + vxor S2,S2,K + vxor S3,S3,K + vxor S4,S4,K + vxor S5,S5,K + vxor S6,S6,K + vxor S7,S7,K + vxor S8,S8,K + vxor S9,S9,K + vxor S10,S10,K + vxor S11,S11,K + vxor S12,S12,K + vxor S13,S13,K + vxor S14,S14,K + vxor S15,S15,K + + mtctr ROUNDS + li 10,0x10 +.align 5 +L16x_round_loop: + lxvd2x KX,10,KEYS + vperm K,K,K,swap_mask + vncipher S0,S0,ZERO + vncipher S1,S1,ZERO + vncipher S2,S2,ZERO + vncipher S3,S3,ZERO + vncipher S4,S4,ZERO + vncipher S5,S5,ZERO + vncipher S6,S6,ZERO + vncipher S7,S7,ZERO + vncipher S8,S8,ZERO + vncipher S9,S9,ZERO + vncipher S10,S10,ZERO + vncipher S11,S11,ZERO + vncipher S12,S12,ZERO + vncipher S13,S13,ZERO + vncipher S14,S14,ZERO + vncipher S15,S15,ZERO + vxor S0,S0,K + vxor S1,S1,K + vxor S2,S2,K + vxor S3,S3,K + vxor S4,S4,K + vxor S5,S5,K + vxor S6,S6,K + vxor S7,S7,K + vxor S8,S8,K + vxor S9,S9,K + vxor S10,S10,K + vxor S11,S11,K + vxor S12,S12,K + vxor S13,S13,K + vxor S14,S14,K + vxor S15,S15,K + addi 10,10,0x10 + bdnz L16x_round_loop + + lxvd2x KX,10,KEYS + vperm K,K,K,swap_mask + vncipherlast S0,S0,K + vncipherlast S1,S1,K + vncipherlast S2,S2,K + vncipherlast S3,S3,K + vncipherlast S4,S4,K + vncipherlast S5,S5,K + vncipherlast S6,S6,K + vncipherlast S7,S7,K + vncipherlast S8,S8,K + vncipherlast S9,S9,K + vncipherlast S10,S10,K + vncipherlast S11,S11,K + vncipherlast S12,S12,K + vncipherlast S13,S13,K + vncipherlast S14,S14,K + vncipherlast S15,S15,K + +IF_LE(<vperm S0,S0,S0,swap_mask + vperm S1,S1,S1,swap_mask + vperm S2,S2,S2,swap_mask + vperm S3,S3,S3,swap_mask + vperm S4,S4,S4,swap_mask + vperm S5,S5,S5,swap_mask + vperm S6,S6,S6,swap_mask + vperm S7,S7,S7,swap_mask + vperm S8,S8,S8,swap_mask + vperm S9,S9,S9,swap_mask + vperm S10,S10,S10,swap_mask + vperm S11,S11,S11,swap_mask + vperm S12,S12,S12,swap_mask + vperm S13,S13,S13,swap_mask + vperm S14,S14,S14,swap_mask + vperm S15,S15,S15,swap_mask>) + + stxvd2x S0X,0,DST + stxvd2x S1X,17,DST + stxvd2x S2X,18,DST + stxvd2x S3X,19,DST + stxvd2x S4X,20,DST + stxvd2x S5X,21,DST + stxvd2x S6X,22,DST + stxvd2x S7X,23,DST + stxvd2x S8X,24,DST + stxvd2x S9X,25,DST + stxvd2x S10X,26,DST + stxvd2x S11X,27,DST + stxvd2x S12X,28,DST + stxvd2x S13X,29,DST + stxvd2x S14X,30,DST + stxvd2x S15X,31,DST + + addi SRC,SRC,0x100 + addi DST,DST,0x100 + subic. 5,5,1 + bne Lx16_loop + + ld 17,-120(SP); + ld 18,-112(SP); + ld 19,-104(SP); + ld 20,-96(SP); + ld 21,-88(SP); + ld 22,-80(SP); + ld 23,-72(SP); + ld 24,-64(SP); + ld 25,-56(SP); + ld 26,-48(SP); + ld 27,-40(SP); + ld 28,-32(SP); + ld 29,-24(SP); + ld 30,-16(SP); + ld 31,-8(SP); + + clrldi LENGTH,LENGTH,60 + +L8x: + srdi 5,LENGTH,3 + cmpldi 5,0 + beq L4x + + lxvd2x KX,0,KEYS + vperm K,K,K,swap_mask + + lxvd2x S0X,0,SRC + li 9,0x10 + lxvd2x S1X,9,SRC + addi 9,9,0x10 + lxvd2x S2X,9,SRC + addi 9,9,0x10 + lxvd2x S3X,9,SRC + addi 9,9,0x10 + lxvd2x S4X,9,SRC + addi 9,9,0x10 + lxvd2x S5X,9,SRC + addi 9,9,0x10 + lxvd2x S6X,9,SRC + addi 9,9,0x10 + lxvd2x S7X,9,SRC + +IF_LE(<vperm S0,S0,S0,swap_mask + vperm S1,S1,S1,swap_mask + vperm S2,S2,S2,swap_mask + vperm S3,S3,S3,swap_mask + vperm S4,S4,S4,swap_mask + vperm S5,S5,S5,swap_mask + vperm S6,S6,S6,swap_mask + vperm S7,S7,S7,swap_mask>) + + vxor S0,S0,K + vxor S1,S1,K + vxor S2,S2,K + vxor S3,S3,K + vxor S4,S4,K + vxor S5,S5,K + vxor S6,S6,K + vxor S7,S7,K + + mtctr ROUNDS + li 10,0x10 +.align 5 +L8x_round_loop: + lxvd2x KX,10,KEYS + vperm K,K,K,swap_mask + vncipher S0,S0,ZERO + vncipher S1,S1,ZERO + vncipher S2,S2,ZERO + vncipher S3,S3,ZERO + vncipher S4,S4,ZERO + vncipher S5,S5,ZERO + vncipher S6,S6,ZERO + vncipher S7,S7,ZERO + vxor S0,S0,K + vxor S1,S1,K + vxor S2,S2,K + vxor S3,S3,K + vxor S4,S4,K + vxor S5,S5,K + vxor S6,S6,K + vxor S7,S7,K + addi 10,10,0x10 + bdnz L8x_round_loop + + lxvd2x KX,10,KEYS + vperm K,K,K,swap_mask + vncipherlast S0,S0,K + vncipherlast S1,S1,K + vncipherlast S2,S2,K + vncipherlast S3,S3,K + vncipherlast S4,S4,K + vncipherlast S5,S5,K + vncipherlast S6,S6,K + vncipherlast S7,S7,K + +IF_LE(<vperm S0,S0,S0,swap_mask + vperm S1,S1,S1,swap_mask + vperm S2,S2,S2,swap_mask + vperm S3,S3,S3,swap_mask + vperm S4,S4,S4,swap_mask + vperm S5,S5,S5,swap_mask + vperm S6,S6,S6,swap_mask + vperm S7,S7,S7,swap_mask>) + + stxvd2x S0X,0,DST + li 9,0x10 + stxvd2x S1X,9,DST + addi 9,9,0x10 + stxvd2x S2X,9,DST + addi 9,9,0x10 + stxvd2x S3X,9,DST + addi 9,9,0x10 + stxvd2x S4X,9,DST + addi 9,9,0x10 + stxvd2x S5X,9,DST + addi 9,9,0x10 + stxvd2x S6X,9,DST + addi 9,9,0x10 + stxvd2x S7X,9,DST + + addi SRC,SRC,0x80 + addi DST,DST,0x80 + + clrldi LENGTH,LENGTH,61 + +L4x: + srdi 5,LENGTH,2 + cmpldi 5,0 + beq L2x + + lxvd2x KX,0,KEYS + vperm K,K,K,swap_mask + + lxvd2x S0X,0,SRC + li 9,0x10 + lxvd2x S1X,9,SRC + addi 9,9,0x10 + lxvd2x S2X,9,SRC + addi 9,9,0x10 + lxvd2x S3X,9,SRC + +IF_LE(<vperm S0,S0,S0,swap_mask + vperm S1,S1,S1,swap_mask + vperm S2,S2,S2,swap_mask + vperm S3,S3,S3,swap_mask>) + + vxor S0,S0,K + vxor S1,S1,K + vxor S2,S2,K + vxor S3,S3,K + + mtctr ROUNDS + li 10,0x10 +.align 5 +L4x_round_loop: + lxvd2x KX,10,KEYS + vperm K,K,K,swap_mask + vncipher S0,S0,ZERO + vncipher S1,S1,ZERO + vncipher S2,S2,ZERO + vncipher S3,S3,ZERO + vxor S0,S0,K + vxor S1,S1,K + vxor S2,S2,K + vxor S3,S3,K + addi 10,10,0x10 + bdnz L4x_round_loop + + lxvd2x KX,10,KEYS + vperm K,K,K,swap_mask + vncipherlast S0,S0,K + vncipherlast S1,S1,K + vncipherlast S2,S2,K + vncipherlast S3,S3,K + +IF_LE(<vperm S0,S0,S0,swap_mask + vperm S1,S1,S1,swap_mask + vperm S2,S2,S2,swap_mask + vperm S3,S3,S3,swap_mask>) + + stxvd2x S0X,0,DST + li 9,0x10 + stxvd2x S1X,9,DST + addi 9,9,0x10 + stxvd2x S2X,9,DST + addi 9,9,0x10 + stxvd2x S3X,9,DST + + addi SRC,SRC,0x40 + addi DST,DST,0x40 + + clrldi LENGTH,LENGTH,62 + +L2x: + srdi 5,LENGTH,1 + cmpldi 5,0 + beq L1x + + lxvd2x KX,0,KEYS + vperm K,K,K,swap_mask + + lxvd2x S0X,0,SRC + li 9,0x10 + lxvd2x S1X,9,SRC + +IF_LE(<vperm S0,S0,S0,swap_mask + vperm S1,S1,S1,swap_mask>) + + vxor S0,S0,K + vxor S1,S1,K + + mtctr ROUNDS + li 10,0x10 +.align 5 +L2x_round_loop: + lxvd2x KX,10,KEYS + vperm K,K,K,swap_mask + vncipher S0,S0,ZERO + vncipher S1,S1,ZERO + vxor S0,S0,K + vxor S1,S1,K + addi 10,10,0x10 + bdnz L2x_round_loop + + lxvd2x KX,10,KEYS + vperm K,K,K,swap_mask + vncipherlast S0,S0,K + vncipherlast S1,S1,K + +IF_LE(<vperm S0,S0,S0,swap_mask + vperm S1,S1,S1,swap_mask>) + + stxvd2x S0X,0,DST + li 9,0x10 + stxvd2x S1X,9,DST + + addi SRC,SRC,0x20 + addi DST,DST,0x20 + + clrldi LENGTH,LENGTH,63 + +L1x: + cmpldi LENGTH,0 + beq Ldone + + lxvd2x KX,0,KEYS + vperm K,K,K,swap_mask + + lxvd2x S0X,0,SRC + +IF_LE(<vperm S0,S0,S0,swap_mask>) + + vxor S0,S0,K + + mtctr ROUNDS + li 10,0x10 +.align 5 +L1x_round_loop: + lxvd2x KX,10,KEYS + vperm K,K,K,swap_mask + vncipher S0,S0,ZERO + vxor S0,S0,K + addi 10,10,0x10 + bdnz L1x_round_loop + + lxvd2x KX,10,KEYS + vperm K,K,K,swap_mask + vncipherlast S0,S0,K + +IF_LE(<vperm S0,S0,S0,swap_mask>) + + stxvd2x S0X,0,DST + +Ldone: + blr +EPILOGUE(_nettle_aes_decrypt) + + .data + .align 4 +.swap_mask: +IF_LE(<.byte 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7>) +IF_BE(<.byte 3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12>) diff --git a/powerpc64/aes-encrypt-internal.asm b/powerpc64/aes-encrypt-internal.asm new file mode 100644 index 00000000..20711598 --- /dev/null +++ b/powerpc64/aes-encrypt-internal.asm @@ -0,0 +1,545 @@ +C powerpc64/aes-encrypt-internal.asm + +ifelse(< + Copyright (C) 2020 Mamone Tarsha + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +>) + +C Register usage: + +define(<SP>, <1>) +define(<TOCP>, <2>) + +define(<ROUNDS>, <3>) +define(<KEYS>, <4>) +define(<LENGTH>, <6>) +define(<DST>, <7>) +define(<SRC>, <8>) + +define(<swap_mask>, <0>) + +define(<K>, <1>) +define(<S0>, <2>) +define(<S1>, <3>) +define(<S2>, <4>) +define(<S3>, <5>) +define(<S4>, <6>) +define(<S5>, <7>) +define(<S6>, <8>) +define(<S7>, <9>) +define(<S8>, <10>) +define(<S9>, <11>) +define(<S10>, <12>) +define(<S11>, <13>) +define(<S12>, <14>) +define(<S13>, <15>) +define(<S14>, <16>) +define(<S15>, <17>) + +define(<KX>, <33>) +define(<S0X>, <34>) +define(<S1X>, <35>) +define(<S2X>, <36>) +define(<S3X>, <37>) +define(<S4X>, <38>) +define(<S5X>, <39>) +define(<S6X>, <40>) +define(<S7X>, <41>) +define(<S8X>, <42>) +define(<S9X>, <43>) +define(<S10X>, <44>) +define(<S11X>, <45>) +define(<S12X>, <46>) +define(<S13X>, <47>) +define(<S14X>, <48>) +define(<S15X>, <49>) + +.file "aes-encrypt-internal.asm" + +.machine "any" + +IF_LE(<.abiversion 2>) +.text + + C _aes_encrypt(unsigned rounds, const uint32_t *keys, + C const struct aes_table *T, + C size_t length, uint8_t *dst, + C uint8_t *src) + +IF_LE(<.align 5>) +PROLOGUE(_nettle_aes_encrypt) +IF_LE(<.localentry _nettle_aes_encrypt,0>) + + ld 5,.swap_mask@got(TOCP) + lvx swap_mask,0,5 + + subi ROUNDS,ROUNDS,1 + srdi LENGTH,LENGTH,4 + + srdi 5,LENGTH,4 #16x loop count + cmpldi 5,0 + beq L8x + + std 17,-120(SP); + std 18,-112(SP); + std 19,-104(SP); + std 20,-96(SP); + std 21,-88(SP); + std 22,-80(SP); + std 23,-72(SP); + std 24,-64(SP); + std 25,-56(SP); + std 26,-48(SP); + std 27,-40(SP); + std 28,-32(SP); + std 29,-24(SP); + std 30,-16(SP); + std 31,-8(SP); + + li 17,0x10 + li 18,0x20 + li 19,0x30 + li 20,0x40 + li 21,0x50 + li 22,0x60 + li 23,0x70 + li 24,0x80 + li 25,0x90 + li 26,0xA0 + li 27,0xB0 + li 28,0xC0 + li 29,0xD0 + li 30,0xE0 + li 31,0xF0 + +.align 5 +Lx16_loop: + lxvd2x KX,0,KEYS + vperm K,K,K,swap_mask + + lxvd2x S0X,0,SRC + lxvd2x S1X,17,SRC + lxvd2x S2X,18,SRC + lxvd2x S3X,19,SRC + lxvd2x S4X,20,SRC + lxvd2x S5X,21,SRC + lxvd2x S6X,22,SRC + lxvd2x S7X,23,SRC + lxvd2x S8X,24,SRC + lxvd2x S9X,25,SRC + lxvd2x S10X,26,SRC + lxvd2x S11X,27,SRC + lxvd2x S12X,28,SRC + lxvd2x S13X,29,SRC + lxvd2x S14X,30,SRC + lxvd2x S15X,31,SRC + +IF_LE(<vperm S0,S0,S0,swap_mask + vperm S1,S1,S1,swap_mask + vperm S2,S2,S2,swap_mask + vperm S3,S3,S3,swap_mask + vperm S4,S4,S4,swap_mask + vperm S5,S5,S5,swap_mask + vperm S6,S6,S6,swap_mask + vperm S7,S7,S7,swap_mask + vperm S8,S8,S8,swap_mask + vperm S9,S9,S9,swap_mask + vperm S10,S10,S10,swap_mask + vperm S11,S11,S11,swap_mask + vperm S12,S12,S12,swap_mask + vperm S13,S13,S13,swap_mask + vperm S14,S14,S14,swap_mask + vperm S15,S15,S15,swap_mask>) + + vxor S0,S0,K + vxor S1,S1,K + vxor S2,S2,K + vxor S3,S3,K + vxor S4,S4,K + vxor S5,S5,K + vxor S6,S6,K + vxor S7,S7,K + vxor S8,S8,K + vxor S9,S9,K + vxor S10,S10,K + vxor S11,S11,K + vxor S12,S12,K + vxor S13,S13,K + vxor S14,S14,K + vxor S15,S15,K + + mtctr ROUNDS + li 10,0x10 +.align 5 +L16x_round_loop: + lxvd2x KX,10,KEYS + vperm K,K,K,swap_mask + vcipher S0,S0,K + vcipher S1,S1,K + vcipher S2,S2,K + vcipher S3,S3,K + vcipher S4,S4,K + vcipher S5,S5,K + vcipher S6,S6,K + vcipher S7,S7,K + vcipher S8,S8,K + vcipher S9,S9,K + vcipher S10,S10,K + vcipher S11,S11,K + vcipher S12,S12,K + vcipher S13,S13,K + vcipher S14,S14,K + vcipher S15,S15,K + addi 10,10,0x10 + bdnz L16x_round_loop + + lxvd2x KX,10,KEYS + vperm K,K,K,swap_mask + vcipherlast S0,S0,K + vcipherlast S1,S1,K + vcipherlast S2,S2,K + vcipherlast S3,S3,K + vcipherlast S4,S4,K + vcipherlast S5,S5,K + vcipherlast S6,S6,K + vcipherlast S7,S7,K + vcipherlast S8,S8,K + vcipherlast S9,S9,K + vcipherlast S10,S10,K + vcipherlast S11,S11,K + vcipherlast S12,S12,K + vcipherlast S13,S13,K + vcipherlast S14,S14,K + vcipherlast S15,S15,K + +IF_LE(<vperm S0,S0,S0,swap_mask + vperm S1,S1,S1,swap_mask + vperm S2,S2,S2,swap_mask + vperm S3,S3,S3,swap_mask + vperm S4,S4,S4,swap_mask + vperm S5,S5,S5,swap_mask + vperm S6,S6,S6,swap_mask + vperm S7,S7,S7,swap_mask + vperm S8,S8,S8,swap_mask + vperm S9,S9,S9,swap_mask + vperm S10,S10,S10,swap_mask + vperm S11,S11,S11,swap_mask + vperm S12,S12,S12,swap_mask + vperm S13,S13,S13,swap_mask + vperm S14,S14,S14,swap_mask + vperm S15,S15,S15,swap_mask>) + + stxvd2x S0X,0,DST + stxvd2x S1X,17,DST + stxvd2x S2X,18,DST + stxvd2x S3X,19,DST + stxvd2x S4X,20,DST + stxvd2x S5X,21,DST + stxvd2x S6X,22,DST + stxvd2x S7X,23,DST + stxvd2x S8X,24,DST + stxvd2x S9X,25,DST + stxvd2x S10X,26,DST + stxvd2x S11X,27,DST + stxvd2x S12X,28,DST + stxvd2x S13X,29,DST + stxvd2x S14X,30,DST + stxvd2x S15X,31,DST + + addi SRC,SRC,0x100 + addi DST,DST,0x100 + subic. 5,5,1 + bne Lx16_loop + + ld 17,-120(SP); + ld 18,-112(SP); + ld 19,-104(SP); + ld 20,-96(SP); + ld 21,-88(SP); + ld 22,-80(SP); + ld 23,-72(SP); + ld 24,-64(SP); + ld 25,-56(SP); + ld 26,-48(SP); + ld 27,-40(SP); + ld 28,-32(SP); + ld 29,-24(SP); + ld 30,-16(SP); + ld 31,-8(SP); + + clrldi LENGTH,LENGTH,60 + +L8x: + srdi 5,LENGTH,3 + cmpldi 5,0 + beq L4x + + lxvd2x KX,0,KEYS + vperm K,K,K,swap_mask + + lxvd2x S0X,0,SRC + li 9,0x10 + lxvd2x S1X,9,SRC + addi 9,9,0x10 + lxvd2x S2X,9,SRC + addi 9,9,0x10 + lxvd2x S3X,9,SRC + addi 9,9,0x10 + lxvd2x S4X,9,SRC + addi 9,9,0x10 + lxvd2x S5X,9,SRC + addi 9,9,0x10 + lxvd2x S6X,9,SRC + addi 9,9,0x10 + lxvd2x S7X,9,SRC + +IF_LE(<vperm S0,S0,S0,swap_mask + vperm S1,S1,S1,swap_mask + vperm S2,S2,S2,swap_mask + vperm S3,S3,S3,swap_mask + vperm S4,S4,S4,swap_mask + vperm S5,S5,S5,swap_mask + vperm S6,S6,S6,swap_mask + vperm S7,S7,S7,swap_mask>) + + vxor S0,S0,K + vxor S1,S1,K + vxor S2,S2,K + vxor S3,S3,K + vxor S4,S4,K + vxor S5,S5,K + vxor S6,S6,K + vxor S7,S7,K + + mtctr ROUNDS + li 10,0x10 +.align 5 +L8x_round_loop: + lxvd2x KX,10,KEYS + vperm K,K,K,swap_mask + vcipher S0,S0,K + vcipher S1,S1,K + vcipher S2,S2,K + vcipher S3,S3,K + vcipher S4,S4,K + vcipher S5,S5,K + vcipher S6,S6,K + vcipher S7,S7,K + addi 10,10,0x10 + bdnz L8x_round_loop + + lxvd2x KX,10,KEYS + vperm K,K,K,swap_mask + vcipherlast S0,S0,K + vcipherlast S1,S1,K + vcipherlast S2,S2,K + vcipherlast S3,S3,K + vcipherlast S4,S4,K + vcipherlast S5,S5,K + vcipherlast S6,S6,K + vcipherlast S7,S7,K + +IF_LE(<vperm S0,S0,S0,swap_mask + vperm S1,S1,S1,swap_mask + vperm S2,S2,S2,swap_mask + vperm S3,S3,S3,swap_mask + vperm S4,S4,S4,swap_mask + vperm S5,S5,S5,swap_mask + vperm S6,S6,S6,swap_mask + vperm S7,S7,S7,swap_mask>) + + stxvd2x S0X,0,DST + li 9,0x10 + stxvd2x S1X,9,DST + addi 9,9,0x10 + stxvd2x S2X,9,DST + addi 9,9,0x10 + stxvd2x S3X,9,DST + addi 9,9,0x10 + stxvd2x S4X,9,DST + addi 9,9,0x10 + stxvd2x S5X,9,DST + addi 9,9,0x10 + stxvd2x S6X,9,DST + addi 9,9,0x10 + stxvd2x S7X,9,DST + + addi SRC,SRC,0x80 + addi DST,DST,0x80 + + clrldi LENGTH,LENGTH,61 + +L4x: + srdi 5,LENGTH,2 + cmpldi 5,0 + beq L2x + + lxvd2x KX,0,KEYS + vperm K,K,K,swap_mask + + lxvd2x S0X,0,SRC + li 9,0x10 + lxvd2x S1X,9,SRC + addi 9,9,0x10 + lxvd2x S2X,9,SRC + addi 9,9,0x10 + lxvd2x S3X,9,SRC + +IF_LE(<vperm S0,S0,S0,swap_mask + vperm S1,S1,S1,swap_mask + vperm S2,S2,S2,swap_mask + vperm S3,S3,S3,swap_mask>) + + vxor S0,S0,K + vxor S1,S1,K + vxor S2,S2,K + vxor S3,S3,K + + mtctr ROUNDS + li 10,0x10 +.align 5 +L4x_round_loop: + lxvd2x KX,10,KEYS + vperm K,K,K,swap_mask + vcipher S0,S0,K + vcipher S1,S1,K + vcipher S2,S2,K + vcipher S3,S3,K + addi 10,10,0x10 + bdnz L4x_round_loop + + lxvd2x KX,10,KEYS + vperm K,K,K,swap_mask + vcipherlast S0,S0,K + vcipherlast S1,S1,K + vcipherlast S2,S2,K + vcipherlast S3,S3,K + +IF_LE(<vperm S0,S0,S0,swap_mask + vperm S1,S1,S1,swap_mask + vperm S2,S2,S2,swap_mask + vperm S3,S3,S3,swap_mask>) + + stxvd2x S0X,0,DST + li 9,0x10 + stxvd2x S1X,9,DST + addi 9,9,0x10 + stxvd2x S2X,9,DST + addi 9,9,0x10 + stxvd2x S3X,9,DST + + addi SRC,SRC,0x40 + addi DST,DST,0x40 + + clrldi LENGTH,LENGTH,62 + +L2x: + srdi 5,LENGTH,1 + cmpldi 5,0 + beq L1x + + lxvd2x KX,0,KEYS + vperm K,K,K,swap_mask + + lxvd2x S0X,0,SRC + li 9,0x10 + lxvd2x S1X,9,SRC + +IF_LE(<vperm S0,S0,S0,swap_mask + vperm S1,S1,S1,swap_mask>) + + vxor S0,S0,K + vxor S1,S1,K + + mtctr ROUNDS + li 10,0x10 +.align 5 +L2x_round_loop: + lxvd2x KX,10,KEYS + vperm K,K,K,swap_mask + vcipher S0,S0,K + vcipher S1,S1,K + addi 10,10,0x10 + bdnz L2x_round_loop + + lxvd2x KX,10,KEYS + vperm K,K,K,swap_mask + vcipherlast S0,S0,K + vcipherlast S1,S1,K + +IF_LE(<vperm S0,S0,S0,swap_mask + vperm S1,S1,S1,swap_mask>) + + stxvd2x S0X,0,DST + li 9,0x10 + stxvd2x S1X,9,DST + + addi SRC,SRC,0x20 + addi DST,DST,0x20 + + clrldi LENGTH,LENGTH,63 + +L1x: + cmpldi LENGTH,0 + beq Ldone + + lxvd2x KX,0,KEYS + vperm K,K,K,swap_mask + + lxvd2x S0X,0,SRC + +IF_LE(<vperm S0,S0,S0,swap_mask>) + + vxor S0,S0,K + + mtctr ROUNDS + li 10,0x10 +.align 5 +L1x_round_loop: + lxvd2x KX,10,KEYS + vperm K,K,K,swap_mask + vcipher S0,S0,K + addi 10,10,0x10 + bdnz L1x_round_loop + + lxvd2x KX,10,KEYS + vperm K,K,K,swap_mask + vcipherlast S0,S0,K + +IF_LE(<vperm S0,S0,S0,swap_mask>) + + stxvd2x S0X,0,DST + +Ldone: + blr +EPILOGUE(_nettle_aes_encrypt) + + .data + .align 4 +.swap_mask: +IF_LE(<.byte 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7>) +IF_BE(<.byte 3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12>) diff --git a/powerpc64/gcm-hash8.asm b/powerpc64/gcm-hash8.asm new file mode 100644 index 00000000..fc9ce739 --- /dev/null +++ b/powerpc64/gcm-hash8.asm @@ -0,0 +1,1015 @@ +C powerpc64/gcm-hash8.asm + +ifelse(< + Copyright (C) 2020 Mamone Tarsha + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +>) + +C Register usage: +C VSX instructions is used to load and store data to memory "lxvd2x, stxvd2x" +C instead of VR instructions "lvx, stvx" as a workaround to access unaligned data +C VSX registers are defined with "X" suffix + +define(<SP>, <1>) +define(<TOCP>, <2>) + +define(<TABLE>, <3>) +define(<X>, <4>) +define(<LENGTH>, <5>) +define(<DATA>, <6>) + +define(<zero>, <0>) +define(<swap_mask>, <1>) +define(<hidw_mask>, <2>) +define(<lodw_mask>, <3>) +define(<poly>, <4>) +define(<poly_h>, <4>) +define(<poly_l>, <5>) +define(<RP>, <6>) +define(<Mh>, <7>) +define(<Ml>, <8>) +define(<H>, <9>) +define(<Hh>, <10>) +define(<Hl>, <11>) +define(<RP2>, <9>) +define(<M2h>, <10>) +define(<M2l>, <11>) + +define(<HX>, <41>) +define(<HhX>, <42>) +define(<HlX>, <43>) +define(<H_HhX>, <44>) +define(<H_HX>, <45>) +define(<H_HlX>, <46>) + +define(<sl1>, <1>) +define(<msb>, <5>) +define(<H2>, <6>) +define(<H2h>, <7>) +define(<H2l>, <8>) +define(<H_h>, <12>) +define(<H_m>, <13>) +define(<H_l>, <14>) +define(<H_Hh>, <12>) +define(<H_H>, <13>) +define(<H_Hl>, <14>) +define(<H_t>, <15>) +define(<H2_h>, <16>) +define(<H2_m>, <17>) +define(<H2_l>, <18>) +define(<H2_t>, <19>) + +define(<C0X>, <38>) +define(<C1X>, <39>) +define(<C2X>, <40>) +define(<C3X>, <44>) +define(<C4X>, <38>) +define(<C5X>, <39>) +define(<C6X>, <40>) +define(<C7X>, <44>) + +define(<CX>, <45>) + +define(<C0>, <6>) +define(<C1>, <7>) +define(<C2>, <8>) +define(<C3>, <12>) +define(<C4>, <6>) +define(<C5>, <7>) +define(<C6>, <8>) +define(<C7>, <12>) + +define(<C>, <13>) + +define(<Ch>, <14>) +define(<Cl>, <15>) +define(<Cm>, <16>) + +define(<C01h>, <14>) +define(<C01l>, <15>) +define(<C01>, <16>) +define(<C23h>, <17>) +define(<C23l>, <18>) +define(<C23>, <19>) +define(<C45h>, <20>) +define(<C45l>, <21>) +define(<C45>, <22>) +define(<C67h>, <6>) +define(<C67l>, <7>) +define(<C67>, <8>) + +define(<H21>, <9>) +define(<H21h>, <10>) +define(<H21l>, <11>) +define(<H43>, <23>) +define(<H43h>, <24>) +define(<H43l>, <25>) +define(<H65>, <26>) +define(<H65h>, <27>) +define(<H65l>, <28>) +define(<H87>, <29>) +define(<H87h>, <30>) +define(<H87l>, <31>) + +define(<H21X>, <41>) +define(<H21hX>, <42>) +define(<H21lX>, <43>) +define(<H43X>, <55>) +define(<H43hX>, <56>) +define(<H43lX>, <57>) +define(<H65X>, <58>) +define(<H65hX>, <59>) +define(<H65lX>, <60>) +define(<H87X>, <61>) +define(<H87hX>, <62>) +define(<H87lX>, <63>) + +# gcm_fill registers: + +define(<CTR>, <3>) +define(<BLOCKS>, <4>) +define(<BUFFER>, <5>) + +define(<CTR0>, <2>) +define(<CTR0S>, <3>) +define(<CTR1>, <4>) +define(<CTR2>, <5>) +define(<CTR3>, <6>) +define(<CTR4>, <7>) +define(<CTR5>, <8>) +define(<CTR6>, <9>) +define(<CTR7>, <10>) + +define(<CTR0X>, <34>) +define(<CTR0SX>, <35>) +define(<CTR1X>, <36>) +define(<CTR2X>, <37>) +define(<CTR3X>, <38>) +define(<CTR4X>, <39>) +define(<CTR5X>, <40>) +define(<CTR6X>, <41>) +define(<CTR7X>, <42>) + +define(<I1>, <11>) +define(<I2>, <12>) +define(<I3>, <13>) +define(<I4>, <14>) +define(<I5>, <15>) +define(<I6>, <16>) +define(<I7>, <17>) +define(<I8>, <18>) + +.file "gcm-hash8.asm" + +.machine "any" + +IF_LE(<.abiversion 2>) +.text + + # void gcm_init_key (union gcm_block *table) + +IF_LE(<.align 5>) +PROLOGUE(_nettle_gcm_init_key8) +IF_LE(<.localentry _nettle_gcm_init_key8,0>) + + ld 7,.polynomial@got(TOCP) + lvx poly,0,7 +IF_LE(<ld 7,.swap_mask@got(TOCP) + lvx swap_mask,0,7>) + ld 7,.hidw_mask@got(TOCP) + lvx hidw_mask,0,7 + ld 7,.lodw_mask@got(TOCP) + lvx lodw_mask,0,7 + + li 10,0x800 + lxvd2x HX,10,TABLE # load H +IF_LE(<vperm H,H,H,swap_mask>) + + # --- calculate H = H shift left 1 modulo polynomial --- + + vupkhsw msb,H # most significant bit word-extend + vspltisb sl1,1 # splat 1 for shift left + vspltw msb,msb,0 # most significant bit extend + vsl H,H,sl1 # H shift left 1 + vand msb,msb,poly + vxor zero,zero,zero + vxor H_t,H,msb + + vsldoi H,H_t,H_t,8 # doubleword swap + vsldoi Hh,H,zero,8 + vsldoi Hl,zero,H,8 + + # --- calculate H^2 = H*H --- + + # reduction pre-processing + vsldoi poly_h,zero,poly,8 + vsldoi poly_l,poly_h,poly_h,8 + + # polynomial multiplication "classical" + vpmsumd H_h,H_t,Hh # H^1h*H^1h + vpmsumd H_l,H_t,Hl # H^1l*H^1l + vpmsumd H_m,H_t,H # H^1h*H^1l⊕H^1l*H^1h + + # reduction first phase # [1] + vpmsumd RP,H_l,poly_h # [1] + + # polynomial multiplication post-processing # [2] + vsldoi Mh,zero,H_m,8 # [2] + vsldoi Ml,H_m,zero,8 # [2] + vsldoi RP,RP,RP,8 # [1] + vxor H_h,H_h,Mh # [2] + vxor H_l,H_l,Ml # [2] + vxor H_l,H_l,RP # [1] + + # reduction second phase + vpmsumd RP,H_l,poly_l + vxor H_h,H_l,H_h + vxor H2_t,H_h,RP + + vsldoi H2,H2_t,H2_t,8 + vsldoi H2h,H2,zero,8 + vsldoi H2l,zero,H2,8 + + # --- calculate [H^2.Hi⊕H^2.Lo:H^1.Hi⊕H^1.Lo] --- + + vperm H_Hh,H2,H,lodw_mask + vperm H_Hl,H2,H,hidw_mask + vxor H_H,H_Hh,H_Hl + + # --- store H,[H^2.Hi⊕H^2.Lo:H^1.Hi⊕H^1.Lo] --- + + li 8,0x00 + li 9,0x100 + li 10,0x200 + stxvd2x HlX,8,TABLE + stxvd2x HX,9,TABLE + stxvd2x HhX,10,TABLE + + li 8,0x300 + li 9,0x400 + li 10,0x500 + stxvd2x H_HhX,8,TABLE + stxvd2x H_HX,9,TABLE + stxvd2x H_HlX,10,TABLE + + # --- calculate H^3,H^4 --- + + # polynomial multiplication "classical" + vpmsumd H_l,H_t,H2l # H^1l*H^2l + vpmsumd H_m,H_t,H2 # H^1h*H^2l⊕H^1l*H^2h + vpmsumd H_h,H_t,H2h # H^1h*H^2h + vpmsumd H2_l,H2_t,H2l # H^2l*H^2l + vpmsumd H2_m,H2_t,H2 # H^2h*H^2l⊕H^2l*H^2h + vpmsumd H2_h,H2_t,H2h # H^2h*H^2h + + # reduction first phase # [1] + vpmsumd RP,H_l,poly_h # [1] H^3 + vpmsumd RP2,H2_l,poly_h # [1] H^4 + + # polynomial multiplication post-processing # [2] + vsldoi Mh,zero,H_m,8 # [2] H^3 + vsldoi M2h,zero,H2_m,8 # [2] H^4 + vsldoi Ml,H_m,zero,8 # [2] H^3 + vsldoi M2l,H2_m,zero,8 # [2] H^4 + vsldoi RP,RP,RP,8 # [1] H^3 + vsldoi RP2,RP2,RP2,8 # [1] H^4 + vxor H_h,H_h,Mh # [2] H^3 + vxor H2_h,H2_h,M2h # [2] H^4 + vxor H_l,H_l,Ml # [2] H^3 + vxor H2_l,H2_l,M2l # [2] H^4 + vxor H_l,H_l,RP # [1] H^3 + vxor H2_l,H2_l,RP2 # [1] H^4 + + # reduction second phase + vpmsumd RP,H_l,poly_l # H^3 + vpmsumd RP2,H2_l,poly_l # H^4 + vxor H_h,H_l,H_h # H^3 + vxor H2_h,H2_l,H2_h # H^4 + vxor H_h,H_h,RP # H^3 + vxor H2_h,H2_h,RP2 # H^4 + + vsldoi H2,H2_h,H2_h,8 # H^4 + vsldoi H,H_h,H_h,8 # H^3 + vsldoi H2l,zero,H2,8 # H^4 + vsldoi H2h,H2,zero,8 # H^4 + + # --- calculate [H^4.Hi⊕H^4.Lo:H^3.Hi⊕H^3.Lo] --- + + vperm H_Hh,H2,H,lodw_mask + vperm H_Hl,H2,H,hidw_mask + vxor H_H,H_Hh,H_Hl + + # --- store [H^4.Hi⊕H^4.Lo:H^3.Hi⊕H^3.Lo] --- + + li 8,0x600 + li 9,0x700 + li 10,0x800 + stxvd2x H_HhX,8,TABLE + stxvd2x H_HX,9,TABLE + stxvd2x H_HlX,10,TABLE + + # --- calculate H^5,H^6 --- + + # polynomial multiplication "classical" + vpmsumd H_l,H_t,H2l # H^1l*H^4l + vpmsumd H_m,H_t,H2 # H^1h*H^4l⊕H^1l*H^4h + vpmsumd H_h,H_t,H2h # H^1h*H^4h + vpmsumd H2_l,H2_t,H2l # H^2l*H^4l + vpmsumd H2_m,H2_t,H2 # H^2h*H^4l⊕H^2l*H^4h + vpmsumd H2_h,H2_t,H2h # H^2h*H^4h + + # reduction first phase # [1] + vpmsumd RP,H_l,poly_h # [1] H^5 + vpmsumd RP2,H2_l,poly_h # [1] H^6 + + # polynomial multiplication post-processing # [2] + vsldoi Mh,zero,H_m,8 # [2] H^5 + vsldoi M2h,zero,H2_m,8 # [2] H^6 + vsldoi Ml,H_m,zero,8 # [2] H^5 + vsldoi M2l,H2_m,zero,8 # [2] H^6 + vsldoi RP,RP,RP,8 # [1] H^5 + vsldoi RP2,RP2,RP2,8 # [1] H^6 + vxor H_h,H_h,Mh # [2] H^5 + vxor H2_h,H2_h,M2h # [2] H^6 + vxor H_l,H_l,Ml # [2] H^5 + vxor H2_l,H2_l,M2l # [2] H^6 + vxor H_l,H_l,RP # [1] H^5 + vxor H2_l,H2_l,RP2 # [1] H^6 + + # reduction second phase + vpmsumd RP,H_l,poly_l # H^5 + vpmsumd RP2,H2_l,poly_l # H^6 + vxor H_h,H_l,H_h # H^5 + vxor H2_h,H2_l,H2_h # H^6 + vxor H_h,H_h,RP # H^5 + vxor H2_h,H2_h,RP2 # H^6 + + vsldoi H2,H2_h,H2_h,8 # H^6 + vsldoi H,H_h,H_h,8 # H^5 + vsldoi H2l,zero,H2,8 # H^6 + vsldoi H2h,H2,zero,8 # H^6 + + # --- calculate [H^6.Hi⊕H^6.Lo:H^5.Hi⊕H^5.Lo] --- + + vperm H_Hh,H2,H,lodw_mask + vperm H_Hl,H2,H,hidw_mask + vxor H_H,H_Hh,H_Hl + + # --- store [H^6.Hi⊕H^6.Lo:H^5.Hi⊕H^5.Lo] --- + + li 8,0x900 + li 9,0xA00 + li 10,0xB00 + stxvd2x H_HhX,8,TABLE + stxvd2x H_HX,9,TABLE + stxvd2x H_HlX,10,TABLE + + # --- calculate H^7,H^8 --- + + # polynomial multiplication "classical" + vpmsumd H_l,H_t,H2l # H^1l*H^6l + vpmsumd H_m,H_t,H2 # H^1h*H^6l⊕H^1l*H^6h + vpmsumd H_h,H_t,H2h # H^1h*H^6h + vpmsumd H2_l,H2_t,H2l # H^2l*H^6l + vpmsumd H2_m,H2_t,H2 # H^2h*H^6l⊕H^2l*H^6h + vpmsumd H2_h,H2_t,H2h # H^2h*H^6h + + # reduction first phase # [1] + vpmsumd RP,H_l,poly_h # [1] H^7 + vpmsumd RP2,H2_l,poly_h # [1] H^8 + + # polynomial multiplication post-processing # [2] + vsldoi Mh,zero,H_m,8 # [2] H^7 + vsldoi M2h,zero,H2_m,8 # [2] H^8 + vsldoi Ml,H_m,zero,8 # [2] H^7 + vsldoi M2l,H2_m,zero,8 # [2] H^8 + vsldoi RP,RP,RP,8 # [1] H^7 + vsldoi RP2,RP2,RP2,8 # [1] H^8 + vxor H_h,H_h,Mh # [2] H^7 + vxor H2_h,H2_h,M2h # [2] H^8 + vxor H_l,H_l,Ml # [2] H^7 + vxor H2_l,H2_l,M2l # [2] H^8 + vxor H_l,H_l,RP # [1] H^7 + vxor H2_l,H2_l,RP2 # [1] H^8 + + # reduction second phase + vpmsumd RP,H_l,poly_l # H^7 + vpmsumd RP2,H2_l,poly_l # H^8 + vxor H_h,H_l,H_h # H^7 + vxor H2_h,H2_l,H2_h # H^8 + vxor H_h,H_h,RP # H^7 + vxor H2_h,H2_h,RP2 # H^8 + + vsldoi H,H_h,H_h,8 # H^7 + vsldoi H2,H2_h,H2_h,8 # H^8 + + # --- calculate [H^8.Hi⊕H^8.Lo:H^7.Hi⊕H^7.Lo] --- + + vperm H_Hh,H2,H,lodw_mask + vperm H_Hl,H2,H,hidw_mask + vxor H_H,H_Hh,H_Hl + + # --- store [H^8.Hi⊕H^8.Lo:H^7.Hi⊕H^7.Lo] --- + + li 8,0xC00 + li 9,0xD00 + li 10,0xE00 + stxvd2x H_HhX,8,TABLE + stxvd2x H_HX,9,TABLE + stxvd2x H_HlX,10,TABLE + + blr +EPILOGUE(_nettle_gcm_init_key8) + + # void gcm_hash (const struct gcm_key *key, union gcm_block *x, + # size_t length, const uint8_t *data) + +IF_LE(<.align 5>) +PROLOGUE(_nettle_gcm_hash8) +IF_LE(<.localentry _nettle_gcm_hash8,0>) + + vxor zero,zero,zero + + ld 7,.polynomial@got(TOCP) + lvx poly,0,7 +IF_LE(<ld 7,.swap_mask@got(TOCP) + lvx swap_mask,0,7>) + ld 7,.hidw_mask@got(TOCP) + lvx hidw_mask,0,7 + ld 7,.lodw_mask@got(TOCP) + lvx lodw_mask,0,7 + + vsldoi poly_h,zero,poly,8 + vsldoi poly_l,poly_h,poly_h,8 + + lxvd2x CX,0,X # load X +IF_LE(<vperm C,C,C,swap_mask>) + + srdi 7,LENGTH,7 # 8x loop count + cmpldi 7,0 + beq L2x + + # backup registers + stdu SP,-224(SP) + std 28,216(SP) + std 29,208(SP) + std 30,200(SP) + std 31,192(SP) + li 8,176 + stvx 20,8,SP + subi 8,8,16 + stvx 21,8,SP + subi 8,8,16 + stvx 22,8,SP + subi 8,8,16 + stvx 23,8,SP + subi 8,8,16 + stvx 24,8,SP + subi 8,8,16 + stvx 25,8,SP + subi 8,8,16 + stvx 26,8,SP + subi 8,8,16 + stvx 27,8,SP + subi 8,8,16 + stvx 28,8,SP + subi 8,8,16 + stvx 29,8,SP + subi 8,8,16 + stvx 30,8,SP + subi 8,8,16 + stvx 31,8,SP + + # table loading + li 8,0x300 + li 9,0x400 + li 10,0x500 + lxvd2x H21hX,8,TABLE + lxvd2x H21X,9,TABLE + lxvd2x H21lX,10,TABLE + li 8,0x600 + li 9,0x700 + li 10,0x800 + lxvd2x H43hX,8,TABLE + lxvd2x H43X,9,TABLE + lxvd2x H43lX,10,TABLE + li 8,0x900 + li 9,0xA00 + li 10,0xB00 + lxvd2x H65hX,8,TABLE + lxvd2x H65X,9,TABLE + lxvd2x H65lX,10,TABLE + li 8,0xC00 + li 9,0xD00 + li 10,0xE00 + lxvd2x H87hX,8,TABLE + lxvd2x H87X,9,TABLE + lxvd2x H87lX,10,TABLE + + li 8,0x10 + li 9,0x20 + li 10,0x30 + li 28,0x40 + li 29,0x50 + li 30,0x60 + li 31,0x70 + + mtctr 7 +.align 5 +L8x_loop: + # input loading + lxvd2x C0X,0,DATA # load C0 + lxvd2x C1X,8,DATA # load C1 + lxvd2x C2X,9,DATA # load C2 + lxvd2x C3X,10,DATA # load C3 + + # swap permuting +IF_LE(<vperm C0,C0,C0,swap_mask + vperm C1,C1,C1,swap_mask + vperm C2,C2,C2,swap_mask + vperm C3,C3,C3,swap_mask>) + + # previous digest combining + vxor C0,C0,C + + # polynomial multiplication "karatsuba" pre-processing + vperm C23h,C2,C3,hidw_mask + vperm C23l,C2,C3,lodw_mask + vperm C01h,C0,C1,hidw_mask + vperm C01l,C0,C1,lodw_mask + + # input loading + lxvd2x C4X,28,DATA # load C4 + lxvd2x C5X,29,DATA # load C5 + lxvd2x C6X,30,DATA # load C6 + lxvd2x C7X,31,DATA # load C7 + + # swap permuting +IF_LE(<vperm C4,C4,C4,swap_mask + vperm C5,C5,C5,swap_mask + vperm C6,C6,C6,swap_mask + vperm C7,C7,C7,swap_mask>) + + # polynomial multiplication "karatsuba" pre-processing + vperm C45h,C4,C5,hidw_mask + vperm C45l,C4,C5,lodw_mask + vperm C67h,C6,C7,hidw_mask + vperm C67l,C6,C7,lodw_mask + vxor C23,C23h,C23l + vxor C01,C01h,C01l + vxor C45,C45h,C45l + vxor C67,C67h,C67l + + # polynomial multiplication "karatsuba" + vpmsumd C23h,C23h,H65h # H23 = H^6h*C2h⊕H^5h*C3h + vpmsumd C23l,C23l,H65l # L23 = H^6l*C2l⊕H^5l*C3l + vpmsumd C01h,C01h,H87h # H01 = H^8h*C0h⊕H^7h*C1h + vpmsumd C01l,C01l,H87l # L01 = H^8l*C0l⊕H^7l*C1l + vpmsumd C67h,C67h,H21h # H67 = H^2h*C6h⊕H^1h*C7h + vpmsumd C67l,C67l,H21l # L67 = H^2l*C6l⊕H^1l*C7l + vpmsumd C45h,C45h,H43h # H45 = H^4h*C4h⊕H^3h*C5h + vpmsumd C45l,C45l,H43l # L45 = H^4l*C4l⊕H^3l*C5l + vpmsumd C23,C23,H65 # M23 = (H^6h⊕H^5h)*(C2h⊕C3h)⊕(H^6l⊕H^5l)*(C2l⊕C3l) + vpmsumd C01,C01,H87 # M01 = (H^8h⊕H^7h)*(C0h⊕C1h)⊕(H^8l⊕H^7l)*(C0l⊕C1l) + vpmsumd C45,C45,H43 # M45 = (H^4h⊕H^3h)*(C4h⊕C5h)⊕(H^4l⊕H^3l)*(C4l⊕C5l) + vpmsumd C67,C67,H21 # M67 = (H^2h⊕H^1h)*(C6h⊕C7h)⊕(H^2l⊕H^1l)*(C6l⊕C7l) + + # polynomial multiplication "karatsuba" post-processing + vxor C23,C23,C23h + vxor C01,C01,C01h + vxor C45,C45,C45h + vxor C67,C67,C67h + vxor C23,C23,C23l + vxor C01,C01,C01l + vxor C45,C45,C45l + vxor C67,C67,C67l + + # deferred recombination of partial products + vxor C01h,C01h,C23h # H0 = H01⊕H23 + vxor C45h,C45h,C67h # H1 = H45⊕H67 + vxor C01l,C01l,C23l # L0 = L01⊕L23 + vxor C45l,C45l,C67l # L1 = L45⊕L45 + vxor C01,C01,C23 # M0 = M01⊕M23 + vxor C45,C45,C67 # M1 = M45⊕M45 + vxor C01h,C01h,C45h # H = H0⊕H1 + vxor C01l,C01l,C45l # L = L0⊕L1 + vxor C01,C01,C45 # M = M0⊕M1 + + # reduction first phase # [1] + vpmsumd RP,C01l,poly_h # [1] + + # polynomial multiplication post-processing # [2] + vsldoi Mh,zero,C01,8 # [2] + vsldoi Ml,C01,zero,8 # [2] + vsldoi RP,RP,RP,8 # [1] + vxor C01h,C01h,Mh # [2] + vxor C01l,C01l,Ml # [2] + vxor C01l,C01l,RP # [1] + + # reduction second phase + vpmsumd RP,C01l,poly_l + vxor C01h,C01l,C01h + vxor C,C01h,RP + + addi DATA,DATA,0x80 + bdnz L8x_loop + + # restore registers + li 8,0 + lvx 31,8,SP + addi 8,8,16 + lvx 30,8,SP + addi 8,8,16 + lvx 29,8,SP + addi 8,8,16 + lvx 28,8,SP + addi 8,8,16 + lvx 27,8,SP + addi 8,8,16 + lvx 26,8,SP + addi 8,8,16 + lvx 25,8,SP + addi 8,8,16 + lvx 24,8,SP + addi 8,8,16 + lvx 23,8,SP + addi 8,8,16 + lvx 22,8,SP + addi 8,8,16 + lvx 21,8,SP + addi 8,8,16 + lvx 20,8,SP + ld 31,192(SP) + ld 30,200(SP) + ld 29,208(SP) + ld 28,216(SP) + addi SP,SP,224 + + clrldi LENGTH,LENGTH,57 +L2x: + srdi 7,LENGTH,5 + cmpldi 7,0 + beq L1x + + # table loading + li 8,0x300 + li 9,0x400 + li 10,0x500 + lxvd2x H21hX,8,TABLE + lxvd2x H21X,9,TABLE + lxvd2x H21lX,10,TABLE + + li 10,0x10 + + mtctr 7 +.align 5 +L2x_loop: + # input loading + lxvd2x C0X,0,DATA # load C0 + lxvd2x C1X,10,DATA # load C1 + + # swap permuting +IF_LE(<vperm C0,C0,C0,swap_mask + vperm C1,C1,C1,swap_mask>) + + # previous digest combining + vxor C0,C0,C + + # polynomial multiplication "karatsuba" pre-processing + vperm C01h,C0,C1,hidw_mask + vperm C01l,C0,C1,lodw_mask + vxor C01,C01h,C01l + + # polynomial multiplication "karatsuba" + vpmsumd C01h,C01h,H21h # H01 = H^2h*C0h⊕H^1h*C1h + vpmsumd C01l,C01l,H21l # L01 = H^2l*C0l⊕H^1l*C1l + vpmsumd C01,C01,H21 # M01 = (H^2h⊕H^1h)*(C0h⊕C1h)⊕(H^2l⊕H^1l)*(C0l⊕C1l) + + # polynomial multiplication "karatsuba" post-processing + vxor C01,C01,C01h + vxor C01,C01,C01l + + # reduction first phase # [1] + vpmsumd RP,C01l,poly_h # [1] + + # polynomial multiplication post-processing # [2] + vsldoi Mh,zero,C01,8 # [2] + vsldoi Ml,C01,zero,8 # [2] + vsldoi RP,RP,RP,8 # [1] + vxor C01h,C01h,Mh # [2] + vxor C01l,C01l,Ml # [2] + vxor C01l,C01l,RP # [1] + + # reduction second phase + vpmsumd RP,C01l,poly_l + vxor C01h,C01l,C01h + vxor C,C01h,RP + + addi DATA,DATA,0x20 + bdnz L2x_loop + + clrldi LENGTH,LENGTH,59 +L1x: + srdi 7,LENGTH,4 + cmpldi 7,0 + beq Lrem + + # table loading + li 9,0x100 + li 10,0x200 + lxvd2x HlX,0,TABLE + lxvd2x HX, 9,TABLE + lxvd2x HhX,10,TABLE + + # input loading + lxvd2x C0X,0,DATA # load C0 + + # swap permuting +IF_LE(<vperm C0,C0,C0,swap_mask>) + + # previous digest combining + vxor C0,C0,C + + vpmsumd Cl,C0,Hl # L = Hl*Cl + vpmsumd Cm,C0,H # M = Hh*Cl⊕Hl*Ch + vpmsumd Ch,C0,Hh # H = Hh*Ch + + # reduction first phase # [1] + vpmsumd RP,Cl,poly_h # [1] + + # polynomial multiplication post-processing # [2] + vsldoi Mh,zero,Cm,8 # [2] + vsldoi Ml,Cm,zero,8 # [2] + vsldoi RP,RP,RP,8 # [1] + vxor Ch,Ch,Mh # [2] + vxor Cl,Cl,Ml # [2] + vxor Cl,Cl,RP # [1] + + # reduction second phase + vpmsumd RP,Cl,poly_l + vxor Ch,Cl,Ch + vxor C,Ch,RP + + addi DATA,DATA,0x10 + clrldi LENGTH,LENGTH,60 +Lrem: + cmpldi LENGTH,0 + beq Ldone + + # table loading + li 9,0x100 + li 10,0x200 + lxvd2x HlX,0,TABLE + lxvd2x HX, 9,TABLE + lxvd2x HhX,10,TABLE + + # input loading + stdu SP,-16(SP) + stvx zero,0,SP +Lst_loop: + subic. LENGTH,LENGTH,1 + lbzx 7,LENGTH,DATA + stbx 7,LENGTH,SP + bne Lst_loop + lxvd2x C0X,0,SP + addi SP,SP,16 + + # swap permuting +IF_LE(<vperm C0,C0,C0,swap_mask>) + + # previous digest combining + vxor C0,C0,C + + vpmsumd Cl,C0,Hl # L = Hl*Cl + vpmsumd Cm,C0,H # M = Hh*Cl⊕Hl*Ch + vpmsumd Ch,C0,Hh # H = Hh*Ch + + # reduction first phase # [1] + vpmsumd RP,Cl,poly_h # [1] + + # polynomial multiplication post-processing # [2] + vsldoi Mh,zero,Cm,8 # [2] + vsldoi Ml,Cm,zero,8 # [2] + vsldoi RP,RP,RP,8 # [1] + vxor Ch,Ch,Mh # [2] + vxor Cl,Cl,Ml # [2] + vxor Cl,Cl,RP # [1] + + # reduction second phase + vpmsumd RP,Cl,poly_l + vxor Ch,Cl,Ch + vxor C,Ch,RP + +Ldone: +IF_LE(<vperm C,C,C,swap_mask>) + stxvd2x CX,0,X # store C + blr +EPILOGUE(_nettle_gcm_hash8) + + # gcm_fill (uint8_t *ctr, size_t blocks, union gcm_block *buffer) + +IF_LE(<.align 5>) +PROLOGUE(_nettle_gcm_fill) +IF_LE(<.localentry _nettle_gcm_fill,0>) + +IF_LE(<ld 6,.swap_mask@got(TOCP) + lvx swap_mask,0,6>) + + vxor zero,zero,zero + vspltisb I1,1 + vspltisb I2,2 + vspltisb I3,3 + vspltisb I4,4 + vspltisb I5,5 + vspltisb I6,6 + vspltisb I7,7 + vspltisb I8,8 + vsldoi I1,zero,I1,1 + vsldoi I2,zero,I2,1 + vsldoi I3,zero,I3,1 + vsldoi I4,zero,I4,1 + vsldoi I5,zero,I5,1 + vsldoi I6,zero,I6,1 + vsldoi I7,zero,I7,1 + vsldoi I8,zero,I8,1 + + lxvd2x CTR0X,0,CTR + IF_LE(<vperm CTR0,CTR0,CTR0,swap_mask>) + + srdi 6,BLOCKS,3 # 8x loop count + cmpldi 6,0 + beq Lfill_4x + + std 25,-56(SP); + std 26,-48(SP); + std 27,-40(SP); + std 28,-32(SP); + std 29,-24(SP); + std 30,-16(SP); + std 31,-8(SP); + + li 25,0x10 + li 26,0x20 + li 27,0x30 + li 28,0x40 + li 29,0x50 + li 30,0x60 + li 31,0x70 + + mtctr 6 + L8x_fill_loop: + vadduwm CTR1,CTR0,I1 + vadduwm CTR2,CTR0,I2 + vadduwm CTR3,CTR0,I3 + vadduwm CTR4,CTR0,I4 + vadduwm CTR5,CTR0,I5 + vadduwm CTR6,CTR0,I6 + vadduwm CTR7,CTR0,I7 + + IF_LE(<vperm CTR0S,CTR0,CTR0,swap_mask + vperm CTR1,CTR1,CTR1,swap_mask + vperm CTR2,CTR2,CTR2,swap_mask + vperm CTR3,CTR3,CTR3,swap_mask + vperm CTR4,CTR4,CTR4,swap_mask + vperm CTR5,CTR5,CTR5,swap_mask + vperm CTR6,CTR6,CTR6,swap_mask + vperm CTR7,CTR7,CTR7,swap_mask>) + + IF_LE(<stxvd2x CTR0SX,0,BUFFER>) + IF_BE(<stxvd2x CTR0X,0,BUFFER>) + stxvd2x CTR1X,25,BUFFER + stxvd2x CTR2X,26,BUFFER + stxvd2x CTR3X,27,BUFFER + stxvd2x CTR4X,28,BUFFER + stxvd2x CTR5X,29,BUFFER + stxvd2x CTR6X,30,BUFFER + stxvd2x CTR7X,31,BUFFER + + vadduwm CTR0,CTR0,I8 + addi BUFFER,BUFFER,0x80 + bdnz L8x_fill_loop + + ld 25,-56(SP); + ld 26,-48(SP); + ld 27,-40(SP); + ld 28,-32(SP); + ld 29,-24(SP); + ld 30,-16(SP); + ld 31,-8(SP); + + clrldi BLOCKS,BLOCKS,61 + + Lfill_4x: + srdi 6,BLOCKS,2 + cmpldi 6,0 + beq Lfill_2x + + li 8,0x10 + li 9,0x20 + li 10,0x30 + + vadduwm CTR1,CTR0,I1 + vadduwm CTR2,CTR0,I2 + vadduwm CTR3,CTR0,I3 + + IF_LE(<vperm CTR0S,CTR0,CTR0,swap_mask + vperm CTR1,CTR1,CTR1,swap_mask + vperm CTR2,CTR2,CTR2,swap_mask + vperm CTR3,CTR3,CTR3,swap_mask>) + + IF_LE(<stxvd2x CTR0SX,0,BUFFER>) + IF_BE(<stxvd2x CTR0X,0,BUFFER>) + stxvd2x CTR1X,8,BUFFER + stxvd2x CTR2X,9,BUFFER + stxvd2x CTR3X,10,BUFFER + + vadduwm CTR0,CTR0,I4 + addi BUFFER,BUFFER,0x40 + + clrldi BLOCKS,BLOCKS,62 + + Lfill_2x: + srdi 6,BLOCKS,1 + cmpldi 6,0 + beq Lfill_1x + + li 10,0x10 + + vadduwm CTR1,CTR0,I1 + + IF_LE(<vperm CTR0S,CTR0,CTR0,swap_mask + vperm CTR1,CTR1,CTR1,swap_mask>) + + IF_LE(<stxvd2x CTR0SX,0,BUFFER>) + IF_BE(<stxvd2x CTR0X,0,BUFFER>) + stxvd2x CTR1X,10,BUFFER + + vadduwm CTR0,CTR0,I2 + addi BUFFER,BUFFER,0x20 + + clrldi BLOCKS,BLOCKS,63 + + Lfill_1x: + cmpldi BLOCKS,0 + beq Lfill_done + + IF_LE(<vperm CTR0S,CTR0,CTR0,swap_mask>) + + IF_LE(<stxvd2x CTR0SX,0,BUFFER>) + IF_BE(<stxvd2x CTR0X,0,BUFFER>) + + vadduwm CTR0,CTR0,I1 + + Lfill_done: + IF_LE(<vperm CTR0,CTR0,CTR0,swap_mask>) + stxvd2x CTR0X,0,CTR + + blr +EPILOGUE(_nettle_gcm_fill) + + .data +IF_LE(<.align 4 +.polynomial: + .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 + .align 4 +.swap_mask: + .byte 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7 + .align 4 +.hidw_mask: + .byte 23,22,21,20,19,18,17,16,7,6,5,4,3,2,1,0 + .align 4 +.lodw_mask: + .byte 31,30,29,28,27,26,25,24,15,14,13,12,11,10,9,8>) +IF_BE(<.align 4 +.polynomial: + .byte 0xc2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 + .align 4 +.hidw_mask: + .byte 0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23 + .align 4 +.lodw_mask: + .byte 8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31>) diff --git a/powerpc64/machine.m4 b/powerpc64/machine.m4 new file mode 100644 index 00000000..e69de29b diff --git a/testsuite/gcm-test.c b/testsuite/gcm-test.c index c8174019..df1fc94a 100644 --- a/testsuite/gcm-test.c +++ b/testsuite/gcm-test.c @@ -170,6 +170,29 @@ test_main(void) "16aedbf5a0de6a57a637b39b"), SHEX("619cc5aefffe0bfa462af43c1699d050")); + /* Test 128 bytes */ + test_aead(&nettle_gcm_aes128, NULL, + SHEX("feffe9928665731c6d6a8f9467308308"), + SHEX(""), + SHEX("d9313225f88406e5a55909c5aff5269a" + "86a7a9531534f7da2e4c303d8a318a72" + "1c3c0c95956809532fcf0e2449a6b525" + "b16aedf5aa0de657ba637b391aafd255" + "5ae376bc5e9f6a1b08e34db7a6ee0736" + "9ba662ea12f6f197e6bc3ed69d2480f3" + "ea5691347f2ba69113eb37910ebc18c8" + "0f697234582016fa956ca8f63ae6b473"), + SHEX("42831ec2217774244b7221b784d0d49c" + "e3aa212f2c02a4e035c17e2329aca12e" + "21d514b25466931c7d8f6a5aac84aa05" + "1ba30b396a0aac973d58e091473f5985" + "874b1178906ddbeab04ab2fe6cce8c57" + "8d7e961bd13fd6a8c56b66ca5e576492" + "1a48cd8bda04e66343e73055118b69b9" + "ced486813846958a11e602c03cfc232b"), + SHEX("cafebabefacedbaddecaf888"), + SHEX("796836f1246c9d735c5e1be0a715ccc3")); + /* Test case 7 */ test_aead(&nettle_gcm_aes192, NULL, SHEX("00000000000000000000000000000000" -- 2.17.1

3 8

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

2006

2005

2004

2003

2002

nettle-bugs July 2020