Re: [PATCH 0/8] Implement Curve448 ECDH and Ed448

7 Dec 2019

nisse@lysator.liu.se (Niels Möller) writes:
...
I've tried out this mod function (for 64-bit):
static void
ecc_448_modp(const struct ecc_modulo *m, mp_limb_t *rp)
...
...
This gives a speedup of 85% over the general ecc_mod (on my machine),
and gives about 35% speedup for scalar multiplication (both mul_g and
mul_a). So with this change, performance of mul_g and mul_1 is roughly
midway between secp384 and secp521.
Tried the below first implementation of an x86_64 mod function. Gives a
speedup of almost three times over the above C function. With this, the
mul_g operation is 20% slower than for secp384, and the mul_a operation
is slightly faster.
Rgards,
/Niels

diff --git a/configure.ac b/configure.ac
index 3547cae4..2933facf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -476,7 +476,8 @@ asm_nettle_optional_list="gcm-hash8.asm cpuid.asm \
 asm_hogweed_optional_list=""
 if test "x$enable_public_key" = "xyes" ; then
   asm_hogweed_optional_list="ecc-192-modp.asm ecc-224-modp.asm \
-    ecc-25519-modp.asm ecc-256-redc.asm ecc-384-modp.asm ecc-521-modp.asm"
+    ecc-256-redc.asm ecc-384-modp.asm ecc-521-modp.asm \
+    ecc-25519-modp.asm ecc-curve448-modp.asm"
 fi
OPT_NETTLE_OBJS=""
@@ -580,6 +581,7 @@ AH_VERBATIM([HAVE_NATIVE],
 #undef HAVE_NATIVE_ecc_256_redc
 #undef HAVE_NATIVE_ecc_384_modp
 #undef HAVE_NATIVE_ecc_384_redc
+#undef HAVE_NATIVE_ecc_curve448_modp
 #undef HAVE_NATIVE_ecc_521_modp
 #undef HAVE_NATIVE_ecc_521_redc
 #undef HAVE_NATIVE_gcm_hash8
diff --git a/ecc-448.c b/ecc-448.c
index 7d68e1c8..2e840024 100644
--- a/ecc-448.c
+++ b/ecc-448.c
@@ -45,7 +45,11 @@
#include "ecc-448.h"
-#if GMP_NUMB_BITS == 64
+#if HAVE_NATIVE_ecc_curve448_modp
+#define ecc_448_modp nettle_ecc_curve448_modp
+void
+ecc_448_modp (const struct ecc_modulo *m, mp_limb_t *rp);
+#elif GMP_NUMB_BITS == 64
 static void
 ecc_448_modp(const struct ecc_modulo *m, mp_limb_t *rp)
 {
diff --git a/x86_64/ecc-curve448-modp.asm b/x86_64/ecc-curve448-modp.asm
new file mode 100644
index 00000000..5ce81960
--- /dev/null
+++ b/x86_64/ecc-curve448-modp.asm
@@ -0,0 +1,141 @@
+C x86_64/ecc-curve448-modp.asm
+
+ifelse(<
+   Copyright (C) 2019 Niels Möller
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+>)
+
+	.file "ecc-curve448-modp.asm"
+
+define(<RP>, <%rsi>)
+define(<X0>, <%rax>)
+define(<X1>, <%rbx>)
+define(<X2>, <%rcx>)
+define(<X3>, <%rdx>)
+define(<X4>, <%rbp>)
+define(<X5>, <%rdi>)
+define(<X6>, <%r8>)
+define(<X7>, <%r9>)
+define(<T0>, <%r10>)
+define(<T1>, <%r11>)
+define(<T2>, <%r12>)
+
+PROLOGUE(nettle_ecc_curve448_modp)
+	W64_ENTRY(2, 0)
+
+	push	%rbx
+	push	%rbp
+	push	%r12
+
+	C First load the values to be shifted by 32.
+	mov 88(RP), X1
+	mov X1, X0
+	mov 96(RP), X2
+	mov X1, T0
+	mov 104(RP), X3
+	mov X2, T1
+	mov 56(RP), X4
+	mov X3, T2
+	mov 64(RP), X5
+	mov 72(RP), X6
+	mov 80(RP), X7
+
+	C Multiply by 2^32
+	shl $32, X0
+	shrd $32, X2, X1
+	shrd $32, X3, X2
+	shrd $32, X4, X3
+	shrd $32, X5, X4
+	shrd $32, X6, X5
+	shrd $32, X7, X6
+	shr $32, X7
+
+	C Multiply by 2
+	add T0, T0
+	adc T1, T1
+	adc T2, T2
+	adc $0, X7
+
+	C Main additions
+	add 56(RP), X0
+	adc 64(RP), X1
+	adc 72(RP), X2
+	adc 80(RP), X3
+	adc T0, X4
+	adc T1, X5
+	adc T2, X6
+	adc $0, X7
+
+	add (RP), X0
+	adc 8(RP), X1
+	adc 16(RP), X2
+	adc 24(RP), X3
+	adc 32(RP), X4
+	adc 40(RP), X5
+	adc 48(RP), X6
+	adc $0, X7
+
+	mov X7, T0
+	mov X7, T1
+	shl $32, T0
+	shr $32, T1
+	xor T2, T2
+	add X7, X0
+	adc $0, X1
+	adc $0, X2
+	adc T0, X3
+	adc T1, X4
+	adc $0, X5
+	adc $0, X6
+	adc $0, T2
+
+	mov T2, T0
+	shl $32, T0
+
+	add T2, X0
+	mov X0, (RP)
+	adc $0, X1
+	mov X1, 8(RP)
+	adc $0, X2
+	mov X2, 16(RP)
+	adc T0, X3
+	mov X3, 24(RP)
+	adc $0, X4
+	mov X4, 32(RP)
+	adc $0, X5
+	mov X5, 40(RP)
+	adc $0, X6
+	mov X6, 48(RP)
+
+	pop	%r12
+	pop	%rbp
+	pop	%rbx
+
+	W64_EXIT(2, 0)
+	ret
+EPILOGUE(nettle_ecc_curve448_modp)
-- 
Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677.
Internet email is subject to wholesale government surveillance.

    

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

2006

2005

2004

2003

2002

Re: [PATCH 0/8] Implement Curve448 ECDH and Ed448