New subject: Miscomputation with big-endian arm asm

6 Feb 2018

Hi Niels,
thanks for getting back to me so quickly.
On Tue, Feb 06, 2018 at 07:36:22PM +0100, Niels Möller wrote:
...
...
Is there maybe a problem with the list or my email that you can discern?
None previously known to me (it's a plain mailman installation). I think
the most commonly used way to subscribe is to use the web frontend, and
then go to the link in the confirmation email using a web browser.
Mhmm, with another From address and a few more tries I now got myself
subscribed. Before that I had a number of confirmation emails leave my
server and be accepted by mail.lysator.liu.se but the responses never
getting back to my server even with that other From address.
Anywho, I'll leave all the quoted text for now so people can myabe still
pick up on the conversion. Sorry for the mess.
...
...
...
I just ran into a problem where gnutls's certificate verification fails
only on big-endian arm Linux boards but not on the otherwise identical
little-endian ones. After recompiling nettle with --disable-assembler
the problem goes away on big-endian arm as well. Considering that
big-endian arm isn't all that common, I suspect nettle's optimised arm
asm might have some endianness issues.
I have done no testing on big-endian arm. My recent big-endian tests
have been on the ultrasparc t5 in the gcc compile farm
(gcc202.fsffrance.org), and locally using debian's mips cross compiler
and qemu. So I'm fairly confident that the C code is endian-safe.
...
Lots of questions, since I'm unfamiliar with such systems:
...
What board and linux (dist?) are you running this on?
I have a number of Cubieboard2s that run Gentoo Linux with a vanilla,
mainline Linus kernel.
# uname -a
Linux b 4.15.0-gentoo #2 SMP Sun Feb 4 18:46:30 CET 2018 armv7b
ARMv7 Processor rev 4 (v7b) Allwinner sun7i (A20) Family GNU/Linux
The only difference between little- and big-endian boards is the
following Linux kernel config options:
-# CONFIG_CPU_BIG_ENDIAN is not set
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_CPU_ENDIAN_BE8=y
This makes the kernel switch the CPU to big-endian mode on boot.
Userland is big-endian as well.
Big-endian on ARM is somewhat curious in that instruction encoding stays
little-endian but loads and stores use big-endian byte order - if the
CPU is in that mode. It can be switched back and forth at will and it
basically only changes where it starts loading/storing bytes and in
which order it continues.
Because that would be too easy, it has two different big-endian
operating modes called BE32 and BE8. From what I understand, BE32
actually stores bytes the same order as little-endian in memory but
redirects accesses to individual bytes of words to make them appear to
be stored big-endian while BE8 actually stores words in big-endian byte
order and accesses individual bytes directly. The gory details are here
if you're interested:
http://infocenter.arm.com/help/topic/com.arm.doc.ddi0290g/ch06s05s01.html.
https://blog.richliu.com/2010/04/08/907/arm11-be8-and-be32
BE32 is deprecated, newer cores don't even support it.
I'm running BE8, so kernel and userland are BE8:
# file /usr/bin/nettle-hash 
/usr/bin/nettle-hash: ELF 32-bit MSB shared object, ARM, EABI5 BE8
version 1 (SYSV), dynamically linked, interpreter
/lib/ld-linux-armhf.so.3, for GNU/Linux 3.2.0, stripped
...
What's the host triplet?
armv7veb-hardfloat-linux-gnueabi
gcc:
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/libexec/gcc/armv7veb-hardfloat-linux-gnueabi/7.2.0/lto-wrapper
Target: armv7veb-hardfloat-linux-gnueabi
Configured with:
/var/tmp/portage/sys-devel/gcc-7.2.0-r1/work/gcc-7.2.0/configure
--host=armv7veb-hardfloat-linux-gnueabi
--build=armv7veb-hardfloat-linux-gnueabi --prefix=/usr
--bindir=/usr/armv7veb-hardfloat-linux-gnueabi/gcc-bin/7.2.0
--includedir=/usr/lib/gcc/armv7veb-hardfloat-linux-gnueabi/7.2.0/include
--datadir=/usr/share/gcc-data/armv7veb-hardfloat-linux-gnueabi/7.2.0
--mandir=/usr/share/gcc-data/armv7veb-hardfloat-linux-gnueabi/7.2.0/man
--infodir=/usr/share/gcc-data/armv7veb-hardfloat-linux-gnueabi/7.2.0/info
--with-gxx-include-dir=/usr/lib/gcc/armv7veb-hardfloat-linux-gnueabi/7.2.0/include/g++-v7
--with-python-dir=/share/gcc-data/armv7veb-hardfloat-linux-gnueabi/7.2.0/python
--enable-languages=c,c++ --enable-obsolete --enable-secureplt
--disable-werror --with-system-zlib --disable-nls
--enable-checking=release --with-bugurl=https://bugs.gentoo.org/
--with-pkgversion='Gentoo Hardened 7.2.0-r1 p1.1' --enable-esp
--enable-libstdcxx-time --disable-libstdcxx-pch --enable-shared
--enable-threads=posix --enable-__cxa_atexit --enable-clocale=gnu
--disable-multilib --disable-altivec --disable-fixed-point
--with-float=hard --with-float=hard --with-fpu=vfpv3-d16
--disable-libgcj --disable-libgomp --disable-libmudflap --disable-libssp
--disable-libcilkrts --disable-libmpx --enable-vtable-verify
--enable-libvtv --disable-libquadmath --enable-lto --without-isl
--disable-libsanitizer --enable-default-pie --enable-default-ssp
--with-arch=armv7ve
Thread model: posix
gcc version 7.2.0 (Gentoo Hardened 7.2.0-r1 p1.1)
...
Are you cross compiling, or compiling natively?
It's all native on the board. I have a cross-toolchain and qemu on
standby on x86_64 if necessary.
...
Does configure detect it as big-endian (check for WORDS_BIGENDIAN in config.h)?
It seems so:
nettle-3.4 # grep WORDS_BIG config.h
/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the
most
#  define WORDS_BIGENDIAN 1
# ifndef WORDS_BIGENDIAN
#  define WORDS_BIGENDIAN 1
...
Which of nettle's own tests (make check) fail?
With --disable-assembler all checks pass. Here's the make check output
with arm asm:
PASS: aes
PASS: arcfour
PASS: arctwo
PASS: blowfish
PASS: cast128
PASS: base16
PASS: base64
PASS: camellia
Error, expected:
3e00ef2f895f40d6 7f5bb8e81f09a5a1
2c840ec3ce9a7f3b 181be188ef711a1e
984ce172b9216f41 9f445367456d5619
314a42a3da86b001 387bfdb80e0cfe42
Got:
4c4389ad2c1a14e5 9a58af4d26f726c8
2beb7f103f529dc2 a8a203e7c69fb546
141dc7988d5106d1 c03d895bb6576d0f
9718ecc1ae929e8d c8dc1f6ee7038bbb
../run-tests: line 57:  7819 Aborted                 "$1" $testflags
FAIL: chacha
PASS: des
PASS: des3
PASS: des-compat
PASS: md2
PASS: md4
PASS: md5
PASS: md5-compat
PASS: memeql
Assert failed: memxor-test.c:106: MEMEQ (size, dst, c)
../run-tests: line 57:  7865 Aborted                 "$1" $testflags
FAIL: memxor
PASS: gosthash94
PASS: ripemd160
Got:
f278b7b482ad71f2 83dcb4f9fe864547
5ed675894046fbeb 59de3b76052dfd99
Expected:
077709362c2e32df 0ddc3f0dc47bba63
90b6c73bb50f9c31 22ec844ad7c2b3e5
../run-tests: line 57:  7881 Aborted                 "$1" $testflags
FAIL: hkdf
Encrypt failed:
Input:
0000000000000000
Output:
0ab22555fbcab5ce
Expected:
fc207dbfc76c5e17
../run-tests: line 57:  7887 Aborted                 "$1" $testflags
FAIL: salsa20
Got:
9844f81e1408f6ec b932137d33bed7cf
dcf518a3
Expected:
da39a3ee5e6b4b0d 3255bfef95601890
afd80709
../run-tests: line 57:  7893 Aborted                 "$1" $testflags
FAIL: sha1
Got:
51a8a435f33f1941 a7646a966b9f99e5
095b59c1072c0acd 2a893d99
Expected:
23097d223405d822 8642a477bda255b3
2aadbce4bda0b3f7 e36c9da7
../run-tests: line 57:  7899 Aborted                 "$1" $testflags
FAIL: sha224
Got:
312892d3e4bda557 75e12e46320ed33a
329b15d73167b830 ec07ba0845c7b4cf
Expected:
ba7816bf8f01cfea 414140de5dae2223
b00361a396177a9c b410ff61f20015ad
../run-tests: line 57:  7905 Aborted                 "$1" $testflags
FAIL: sha256
PASS: sha384
PASS: sha512
PASS: sha512-224
PASS: sha512-256
PASS: sha3-permute
PASS: sha3-224
PASS: sha3-256
PASS: sha3-384
PASS: sha3-512
PASS: serpent
PASS: twofish
PASS: version
PASS: knuth-lfib
PASS: cbc
PASS: cfb
PASS: ctr
PASS: gcm
PASS: eax
CCM digest failed:
Adata:
0001020304050607
Input:
20212223
Output:
98055abb
Expected:
4dac255d
../run-tests: line 57:  8001 Aborted                 "$1" $testflags
FAIL: ccm
PASS: poly1305
Assert failed: testutils.c:619: MEMEQ(length, data, ciphertext->data)
../run-tests: line 57:  8012 Aborted                 "$1" $testflags
FAIL: chacha-poly1305
Assert failed: hmac-test.c:205: MEMEQ ((tstring_hex("b617318655057264
e28bc0b6fb378c8e f146be00"))->length, digest,
(tstring_hex("b617318655057264 e28bc0b6fb378c8e f146be00"))->data)
../run-tests: line 57:  8018 Aborted                 "$1" $testflags
FAIL: hmac
umac32 failed
msg:
length: 0
tag:
9f972a17
ref:
113145fb
../run-tests: line 57:  8024 Aborted                 "$1" $testflags
FAIL: umac
PASS: meta-hash
PASS: meta-cipher
PASS: meta-aead
PASS: meta-armor
PASS: buffer
Assert failed: yarrow-test.c:185: memcmp(digest, expected_input,
sizeof(digest)) == 0
../run-tests: line 57:  8055 Aborted                 "$1" $testflags
FAIL: yarrow
Assert failed: pbkdf2-test.c:38: MEMEQ
((tstring_hex("0c60c80f961f0e71f3a9b524af6012062fe037a6"))->length, dk,
(tstring_hex("0c60c80f961f0e71f3a9b524af6012062fe037a6"))->data)
../run-tests: line 57:  8061 Aborted                 "$1" $testflags
FAIL: pbkdf2
Assert failed: pss-mgf1-test.c:22: MEMEQ (expected->length, mask,
expected->data)
../run-tests: line 57:  8067 Aborted                 "$1" $testflags
FAIL: pss-mgf1
PASS: sexp
PASS: sexp-format
PASS: rsa2sexp
PASS: sexp2rsa
PASS: bignum
PASS: random-prime
PASS: pkcs1
Assert failed: pss-test.c:29: mpz_cmp(m, expected) == 0
../run-tests: line 57:  8108 Aborted                 "$1" $testflags
FAIL: pss
PASS: rsa-sign-tr
Assert failed: rsa-pss-sign-tr-test.c:72: mpz_cmp(signature, expected)
== 0
../run-tests: line 57:  8119 Aborted                 "$1" $testflags
FAIL: rsa-pss-sign-tr
Assert failed: testutils.c:1004: mpz_cmp (signature, expected) == 0
../run-tests: line 57:  8125 Aborted                 "$1" $testflags
FAIL: rsa
PASS: rsa-encrypt
Assert failed: testutils.c:1004: mpz_cmp (signature, expected) == 0
../run-tests: line 57:  8136 Aborted                 "$1" $testflags
FAIL: rsa-keygen
Assert failed: testutils.c:1189: mpz_cmp (signature.r, expected->r) == 0
&& mpz_cmp (signature.s, expected->s) == 0
../run-tests: line 57:  8142 Aborted                 "$1" $testflags
FAIL: dsa
PASS: dsa-keygen
PASS: curve25519-dh
PASS: ecc-mod
PASS: ecc-modinv
PASS: ecc-redc
PASS: ecc-sqrt
PASS: ecc-dup
PASS: ecc-add
PASS: ecc-mul-g
PASS: ecc-mul-a
PASS: ecdsa-sign
PASS: ecdsa-verify
PASS: ecdsa-keygen
PASS: ecdh
PASS: eddsa-compress
PASS: eddsa-sign
PASS: eddsa-verify
PASS: ed25519
PASS: cxx
PASS: sexp-conv
1c1
<
2de201fee759dffb05a5ff127f4b0b134bf10f466cf174ebff52d387e551225a61e30ec850c38681574a1a8cefa1aa6030481cebc92268863871796ed1afd017969a1d70bb1c936fa1a71a975ddcc07a8d492d6caf5942182b03fa69fea603d904e1cd7c2c9f78e060662d7cf5ec2a5d5af7988e3054513f9f356b749360ec13
---
...
5c96ffe7e925224ce6e98648bf2ed3193cab2fc82af9c7fa7fdc5b623bde1d77c5409129d16d1127ae4fad519c24059fe85f4a4360a900f3dee906e6de2ecd010fa56c02d3f7d0772d43439464a91b025722a6f0b6cb65aee1017b29aff4511f90315caae0be74c2ac496474896e7e3ad200cb7c609ddef5c674272964e4b780
FAIL: pkcs1-conv
test1.out test2.out differ: char 1, line 1
FAIL: nettle-pbkdf2
PASS: symbols
PASS: dlopen
=====================
21 of 94 tests failed
=====================
make[1]: *** [Makefile:136: check] Error 1
Looks bad.
...
...
...
I've not narrowed this down to a proper test case yet because I'm
wondering if this is even warrants digging into. Might this be an easy
fix or do I have to expect this to get so involved that I might just as
well just disable asm on big-endian arm and leave it at that? I *am* all
set to dive into this to provide a better test case and perhaps even
patch - just asking for the odds to solve this with only a beginner's
arm asm skills.
Note how all the SHA1/256 digests below differ for the same certificate.
From these symptoms, the main suspect is the data load in
arm/v6/sha1-compress.asm (see the LOAD macro) and
arm/v6/sha256-compress.asm (look at the code after the .Lcopy label).
...
As a quick test, you could try just deleting all use of the "rev"
instruction in those two files.
...
I'm not sure what's needed to properly support big-endian there, maybe
deleting rev isn't enough, one might also need to shift differently in
the unaligned case. If you want to use the same assembly source file for
both big- and little-endian, with only some m4 ifelse to do conditional
things in the asm files, you should let configure substitute something
in config.m4.in to test on.
Yes, the masking and shifting needs some adjustment, too. I got
sha1-test to succeed with below patch. What do you think: Could we go
some route like that for the other arm asm code as well? I'd be willing
to throw in aarch64 as well because I've got some Pine64s running BE
floating around also. :)
...
The aes code also loads unaligned data, but it reads it byte-by-byte,
without the tricks to use aligned word loads + rotate + sel.
...
Before attempting to support big-endian arm, I'd need some idea on how
to test it.
Any halfway current ARM cross toolchain should be able to also output
big-endian arm binaries (-mbig-endian). Then you could test those with
qemu-user-armeb, which is very light-weight in that it doesn't need a
kernel or emulated system and allows to run binaries directly.
...
If it's hard for me to test, the safest change may be to
just disable all arm assembly on big-endian.
I'm not ready to go there yet. Poking around ARM ASM unexpectedly is
fun. :)
...
From f876368b333c72878808e74a0af5aa631d42d357 Mon Sep 17 00:00:00 2001
From: Michael Weiser michael.weiser@gmx.de
Date: Wed, 7 Feb 2018 00:11:24 +0100
Subject: [PATCH] Support big-endian arm in sha1 armv6 assembly code
---
 arm/v6/sha1-compress.asm | 10 ++++++++++
 asm.m4                   | 10 ++++++++++
 config.m4.in             |  1 +
 configure.ac             |  2 ++
 4 files changed, 23 insertions(+)

diff --git a/arm/v6/sha1-compress.asm b/arm/v6/sha1-compress.asm
index 59d6297e..116a80f0 100644
--- a/arm/v6/sha1-compress.asm
+++ b/arm/v6/sha1-compress.asm
@@ -52,7 +52,9 @@ define(<LOAD>, <
    sel	W, WPREV, T0
    ror	W, W, SHIFT
    mov	WPREV, T0
+NOT_IF_BE(<
    rev	W, W
+>)
    str	W, [SP,#eval(4*$1)]
...
)
define(<EXPN>, <
@@ -127,8 +129,16 @@ PROLOGUE(_nettle_sha1_compress)
    lsl	SHIFT, SHIFT, #3
    mov	T0, #0
    movne	T0, #-1
+IF_BE(<
+	lsr	W, T0, SHIFT
+>, <
    lsl	W, T0, SHIFT
+>)
    uadd8	T0, T0, W		C Sets APSR.GE bits
+IF_BE(<
+	neg     SHIFT, SHIFT		C Rotate right by 32-SHIFT bits
+	add     SHIFT, SHIFT, #32	C because there's no rotate left
+>, <>)
    
    ldr	K, .LK1
    ldm	STATE, {SA,SB,SC,SD,SE}
diff --git a/asm.m4 b/asm.m4
index 4018c235..34e39317 100644
--- a/asm.m4
+++ b/asm.m4
@@ -51,6 +51,16 @@ define(<ALIGN>,
 <.align ifelse(ALIGN_LOG,yes,<m4_log2($1)>,$1)
...
)
+define(<IF_BE>,
+<ifelse(WORDS_BIGENDIAN,yes,
+<$1>,
+<$2>)>)
+
+define(<NOT_IF_BE>,
+<ifelse(WORDS_BIGENDIAN,no,
+<$1>,
+<>)>)
+
 dnl Struct defining macros
dnl STRUCTURE(prefix) 
diff --git a/config.m4.in b/config.m4.in
index e39c880c..11f90a40 100644
--- a/config.m4.in
+++ b/config.m4.in
@@ -7,6 +7,7 @@ define(<TYPE_PROGBITS>, <@ASM_TYPE_PROGBITS@>)dnl
 define(<ALIGN_LOG>, <@ASM_ALIGN_LOG@>)dnl
 define(<W64_ABI>, <@W64_ABI@>)dnl
 define(<RODATA>, <@ASM_RODATA@>)dnl
+define(<WORDS_BIGENDIAN>, <@ASM_WORDS_BIGENDIAN@>)dnl
 divert(1)
 @ASM_MARK_NOEXEC_STACK@
 divert
diff --git a/configure.ac b/configure.ac
index 41bf0998..5db72be8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -691,6 +691,7 @@ ASM_TYPE_FUNCTION='@function'
 ASM_TYPE_PROGBITS='@progbits'
 ASM_MARK_NOEXEC_STACK=''
 ASM_ALIGN_LOG=''
+ASM_WORDS_BIGENDIAN="$ac_cv_c_bigendian"
if test x$enable_assembler = xyes ; then
   AC_CACHE_CHECK([if globals are prefixed by underscore],
@@ -811,6 +812,7 @@ AC_SUBST(ASM_TYPE_PROGBITS)
 AC_SUBST(ASM_MARK_NOEXEC_STACK)
 AC_SUBST(ASM_ALIGN_LOG)
 AC_SUBST(W64_ABI)
+AC_SUBST(ASM_WORDS_BIGENDIAN)
 AC_SUBST(EMULATOR)
AC_SUBST(LIBNETTLE_MAJOR)
-- 
2.16.1

-- 
Thanks,
Michael

    

Re: Miscomputation with big-endian arm asm