--- Makefile.in | 2 +- aclocal.m4 | 52 ++++++++++++++++++++++++++++++++++++++++ configure.ac | 11 +++++++++ powerpc64/README | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++ powerpc64/machine.m4 | 24 +++++++++++++++++++ 5 files changed, 155 insertions(+), 1 deletion(-) create mode 100644 powerpc64/README create mode 100644 powerpc64/machine.m4
diff --git a/Makefile.in b/Makefile.in index 77efb5c9..1508e8f4 100644 --- a/Makefile.in +++ b/Makefile.in @@ -603,7 +603,7 @@ distdir: $(DISTFILES) done set -e; for d in sparc32 sparc64 x86 \ x86_64 x86_64/aesni x86_64/sha_ni x86_64/fat \ - arm arm/neon arm/v6 arm/fat ; do \ + arm arm/neon arm/v6 arm/fat powerpc64 ; do \ mkdir "$(distdir)/$$d" ; \ find "$(srcdir)/$$d" -maxdepth 1 '(' -name '*.asm' -o -name '*.m4' ')' \ -exec cp '{}' "$(distdir)/$$d" ';' ; \ diff --git a/aclocal.m4 b/aclocal.m4 index 513b2df4..e38181ad 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -462,6 +462,58 @@ foo: fi ])
+dnl NETTLE_CHECK_POWER_CRYPTO_EXT +dnl --------------------- +dnl Check if POWER crypto extensions should be used. +dnl Obeys enable_power_crypto_ext, which should be set earlier. +AC_DEFUN([NETTLE_CHECK_POWER_CRYPTO_EXT], +[if test "$enable_power_crypto_ext" = auto ; then + if test "$cross_compiling" = yes ; then + dnl Check if compiler/assembler accepts it. + AC_CACHE_CHECK([if assembler accepts crypto instructions], + nettle_cv_asm_power_vcrypto, + [GMP_TRY_ASSEMBLE([ +.text +foo: + vcipher 0, 0, 1 +], + [nettle_cv_asm_power_vcrypto=yes], + [nettle_cv_asm_power_vcrypto=no])]) + enable_power_crypto_ext="$nettle_cv_asm_power_vcrypto" + else + AC_CACHE_CHECK([if crypto extensions supported], + nettle_cv_asm_power_vcrypto, + [AC_RUN_IFELSE([AC_LANG_PROGRAM([[ +#if defined(__FreeBSD__) && __FreeBSD__ < 12 +#include <sys/sysctl.h> +#else +#include <sys/auxv.h> +#endif +// Define from arch/powerpc/include/uapi/asm/cputable.h in Linux kernel +#ifndef PPC_FEATURE2_VEC_CRYPTO +#define PPC_FEATURE2_VEC_CRYPTO 0x02000000 +#endif + unsigned long hwcap2 = 0; + ]], [[ +#if defined(__FreeBSD__) +#if __FreeBSD__ < 12 + size_t len = sizeof(hwcap2); + sysctlbyname("hw.cpu_features2", &hwcap2, &len, NULL, 0); +#else + elf_aux_info(AT_HWCAP2, &hwcap2, sizeof(hwcap2)); +#endif +#else + hwcap2 = getauxval(AT_HWCAP2); +#endif + return (hwcap2 & PPC_FEATURE2_VEC_CRYPTO) == PPC_FEATURE2_VEC_CRYPTO ? 0 : 1; + ]])], + [nettle_cv_asm_power_vcrypto=yes], + [nettle_cv_asm_power_vcrypto=no])]) + enable_power_crypto_ext="$nettle_cv_asm_power_vcrypto" + fi +fi +]) + dnl NETTLE_CHECK_IFUNC dnl ------------------ dnl Check if __attribute__ ((ifunc(...))) works diff --git a/configure.ac b/configure.ac index 1c0b7393..cc0d67ec 100644 --- a/configure.ac +++ b/configure.ac @@ -89,6 +89,10 @@ AC_ARG_ENABLE(x86-sha-ni, AC_HELP_STRING([--enable-x86-sha-ni], [Enable x86_64 sha_ni instructions. (default=no)]),, [enable_x86_sha_ni=no])
+AC_ARG_ENABLE(power-crypto-ext, + AC_HELP_STRING([--enable-power-crypto-ext], [Enable POWER crypto extentions. (default=auto)]),, + [enable_power_crypto_ext=auto]) + AC_ARG_ENABLE(mini-gmp, AC_HELP_STRING([--enable-mini-gmp], [Enable mini-gmp, used instead of libgmp.]),, [enable_mini_gmp=no]) @@ -434,6 +438,13 @@ if test "x$enable_assembler" = xyes ; then fi fi ;; + *powerpc64*) + NETTLE_CHECK_POWER_CRYPTO_EXT + + if test "x$enable_power_crypto_ext" = xyes ; then + asm_path=powerpc64 + fi + ;; *) enable_assembler=no ;; diff --git a/powerpc64/README b/powerpc64/README new file mode 100644 index 00000000..1890ee9a --- /dev/null +++ b/powerpc64/README @@ -0,0 +1,67 @@ +General-Purpose Register Conventions + +Register Status Use + +GPR0 volatile In function prologs. +GPR1 dedicated Stack pointer. +GPR2 dedicated Table of Contents (TOC) pointer. +GPR3 volatile First word of a function's argument list; + first word of a scalar function return. +GPR4 volatile Second word of a function's argument list; + second word of a scalar function return. +GPR5 volatile Third word of a function's argument list. +GPR6 volatile Fourth word of a function's argument list. +GPR7 volatile Fifth word of a function's argument list. +GPR8 volatile Sixth word of a function's argument list. +GPR9 volatile Seventh word of a function's argument list. +GPR10 volatile Eighth word of a function's argument list. +GPR11 volatile In calls by pointer and as an environment pointer for languages + that require it (for example, PASCAL). +GPR12 volatile For special exception handling required by certain languages and in + glink code. +GPR13 reserved Reserved under 64-bit environment; not restored across system calls. +GPR14:GPR31 nonvolatile These registers must be preserved across a function call. + +Vector Register Conventions + +Register Status + +VR0 Volatile +VR1 Volatile +VR2 Volatile +VR3 Volatile +VR4 Volatile +VR5 Volatile +VR6 Volatile +VR7 Volatile +VR8 Volatile +VR9 Volatile +VR10 Volatile +VR11 Volatile +VR12 Volatile +VR13 Volatile +VR14 Volatile +VR15 Volatile +VR16 Volatile +VR17 Volatile +VR18 Volatile +VR19 Volatile +VR20:31 Nonvolatile (extended ABI mode) their values are preserved across function calls + +Addressing memory + +There are many ways to reference data, the current implementations uses GOT-indirect addressing +(Accessing data through the global offset table): +1. Define data in .data section +2. Load the address of data into register from the global offset table e.g. ld 7, my_var@got(2) +3. Use the address to load the value of data into register e.g. ld 3, 0(7) + +VSX instructions (lxvd2x and stxvd2x) are used to load and store data to memory +instead of VR instructions (lvx and stvx) as it produces a fewer instructions +(lvx and stvx) can be used to load and store data into storage operands +but additional instructions are needed to access unaligned storage operands, please +refer to "6.4.1 Accessing Unaligned Storage Operands" in "POWER ISA Version 2.07 B" +to see an example of accessing unaligned storage operands (lxvd2x and stxvd2x) can +be used to load and store data into unaligned storage operands but permuting is needed +for loading and storing data in little-endian mode +VSX registers are defined with "X" suffix diff --git a/powerpc64/machine.m4 b/powerpc64/machine.m4 new file mode 100644 index 00000000..c8005cd8 --- /dev/null +++ b/powerpc64/machine.m4 @@ -0,0 +1,24 @@ +define(<PROLOGUE>, +<ifelse(WORDS_BIGENDIAN,no, +<.align 5 +.globl C_NAME($1) +DECLARE_FUNC(C_NAME($1)) +C_NAME($1): +addis 2,12,(.TOC.-C_NAME($1))@ha +addi 2,2,(.TOC.-C_NAME($1))@l +.localentry C_NAME($1), .-C_NAME($1)>, +<.globl C_NAME($1) +DECLARE_FUNC(C_NAME($1)) +.section ".opd","aw" +.align 3 +C_NAME($1): +.quad .C_NAME($1),.TOC.@tocbase,0 +.previous +.align 5 +.C_NAME($1):>)>) + +define(<EPILOGUE>, +<ifelse(WORDS_BIGENDIAN,no, +<.size C_NAME($1), . - C_NAME($1)>, +<.size .C_NAME($1), . - .C_NAME($1) +.size C_NAME($1), . - .C_NAME($1)>)>)
Maamoun TK maamoun.tk@googlemail.com writes:
+dnl NETTLE_CHECK_POWER_CRYPTO_EXT +dnl --------------------- +dnl Check if POWER crypto extensions should be used. +dnl Obeys enable_power_crypto_ext, which should be set earlier. +AC_DEFUN([NETTLE_CHECK_POWER_CRYPTO_EXT], +[if test "$enable_power_crypto_ext" = auto ; then
- if test "$cross_compiling" = yes ; then
- dnl Check if compiler/assembler accepts it.
- AC_CACHE_CHECK([if assembler accepts crypto instructions],
nettle_cv_asm_power_vcrypto,
[GMP_TRY_ASSEMBLE([
+.text +foo:
- vcipher 0, 0, 1
+],
[nettle_cv_asm_power_vcrypto=yes],
[nettle_cv_asm_power_vcrypto=no])])
- enable_power_crypto_ext="$nettle_cv_asm_power_vcrypto"
- else
- AC_CACHE_CHECK([if crypto extensions supported],
nettle_cv_asm_power_vcrypto,
[AC_RUN_IFELSE([AC_LANG_PROGRAM([[
+#if defined(__FreeBSD__) && __FreeBSD__ < 12 +#include <sys/sysctl.h> +#else +#include <sys/auxv.h> +#endif
Do you expect that this "auto" logic does what that user wants? I'm thinking, maybe it's simpler to stick with just yes/no (no being the default), and then add support for --enable-fat later, to select code at run-time?
--- /dev/null +++ b/powerpc64/machine.m4 @@ -0,0 +1,24 @@ +define(<PROLOGUE>, +<ifelse(WORDS_BIGENDIAN,no, +<.align 5 +.globl C_NAME($1) +DECLARE_FUNC(C_NAME($1)) +C_NAME($1): +addis 2,12,(.TOC.-C_NAME($1))@ha +addi 2,2,(.TOC.-C_NAME($1))@l +.localentry C_NAME($1), .-C_NAME($1)>, +<.globl C_NAME($1) +DECLARE_FUNC(C_NAME($1)) +.section ".opd","aw" +.align 3 +C_NAME($1): +.quad .C_NAME($1),.TOC.@tocbase,0 +.previous +.align 5 +.C_NAME($1):>)>)
Overriding PROLOGUE here looks fine, but it would be nice with a comment explaining what's needed, and/or linking the some appropriate ABI specification.
Regards, /Niels
On Thu, Jul 9, 2020 at 4:11 PM Niels Möller nisse@lysator.liu.se wrote:
Do you expect that this "auto" logic does what that user wants? I'm thinking, maybe it's simpler to stick with just yes/no (no being the default), and then add support for --enable-fat later, to select code at run-time?
You are right, I removed the "auto" logic and set the default value "no". fat support is added to the patch.
Overriding PROLOGUE here looks fine, but it would be nice with a comment explaining what's needed, and/or linking the some appropriate ABI specification.
Added to the README file.
Thanks, Mamone
nettle-bugs@lists.lysator.liu.se