Use explicit register names to improve the syntax of assembly files and pass -mregnames to the assembler to allow building the assembly files. I will make a stand-alone patch for GCM which brings all the accumulated modifications so it can be directly merged. --- configure.ac | 4 +- powerpc64/machine.m4 | 4 +- powerpc64/p8/aes-decrypt-internal.asm | 194 +++++++++++++++++----------------- powerpc64/p8/aes-encrypt-internal.asm | 192 ++++++++++++++++----------------- 4 files changed, 198 insertions(+), 196 deletions(-)
diff --git a/configure.ac b/configure.ac index 666b2f4a..6ab32f03 100644 --- a/configure.ac +++ b/configure.ac @@ -458,10 +458,12 @@ if test "x$enable_assembler" = xyes ; then if test "$ABI" = 64 ; then asm_path="powerpc64" if test "x$enable_fat" = xyes ; then - asm_path="powerpc64/fat $asm_path" + CFLAGS="$CFLAGS -Wa,-mregnames" + asm_path="powerpc64/fat $asm_path" OPT_NETTLE_SOURCES="fat-ppc.c $OPT_NETTLE_SOURCES" FAT_TEST_LIST="none crypto_ext" elif test "x$enable_power_crypto_ext" = xyes ; then + CFLAGS="$CFLAGS -Wa,-mregnames" asm_path="powerpc64/p8 $asm_path" fi fi diff --git a/powerpc64/machine.m4 b/powerpc64/machine.m4 index 221fa523..cefabc9b 100644 --- a/powerpc64/machine.m4 +++ b/powerpc64/machine.m4 @@ -24,7 +24,7 @@ define(`EPILOGUE',
C Get vector-scalar register from vector register C VSR(VR) -define(`VSR',`32+$1') +define(`VSR',``vs'eval(32+substr($1,1,len($1)))')
C Load the quadword in DATA_SRC storage into C VEC_DST. GPR is general-purpose register @@ -32,5 +32,5 @@ C used to obtain the effective address of C DATA_SRC storage. C DATA_LOAD_VEC(VEC_DST, DATA_SRC, GPR) define(`DATA_LOAD_VEC', -`ld $3,$2@got(2) +`ld $3,$2@got(r2) lvx $1,0,$3') diff --git a/powerpc64/p8/aes-decrypt-internal.asm b/powerpc64/p8/aes-decrypt-internal.asm index acdbc1bd..7c79ffcb 100644 --- a/powerpc64/p8/aes-decrypt-internal.asm +++ b/powerpc64/p8/aes-decrypt-internal.asm @@ -31,32 +31,32 @@ ifelse(`
C Register usage:
-define(`SP', `1') -define(`TOCP', `2') - -define(`ROUNDS', `3') -define(`KEYS', `4') -define(`LENGTH', `6') -define(`DST', `7') -define(`SRC', `8') - -define(`swap_mask', `0') - -define(`K', `1') -define(`S0', `2') -define(`S1', `3') -define(`S2', `4') -define(`S3', `5') -define(`S4', `6') -define(`S5', `7') -define(`S6', `8') -define(`S7', `9') +define(`SP', `r1') +define(`TOCP', `r2') + +define(`ROUNDS', `r3') +define(`KEYS', `r4') +define(`LENGTH', `r6') +define(`DST', `r7') +define(`SRC', `r8') + +define(`swap_mask', `v0') + +define(`K', `v1') +define(`S0', `v2') +define(`S1', `v3') +define(`S2', `v4') +define(`S3', `v5') +define(`S4', `v6') +define(`S5', `v7') +define(`S6', `v8') +define(`S7', `v9')
C ZERO vector register is used in place of RoundKey C for vncipher instruction because the order of InvMixColumns C and Xor processes are flipped in that instruction. C The Xor process with RoundKey is executed afterward. -define(`ZERO', `10') +define(`ZERO', `v10')
.file "aes-decrypt-internal.asm"
@@ -71,30 +71,30 @@ define(`FUNC_ALIGN', `5') PROLOGUE(_nettle_aes_decrypt) vxor ZERO,ZERO,ZERO
- DATA_LOAD_VEC(swap_mask,.swap_mask,5) + DATA_LOAD_VEC(swap_mask,.swap_mask,r5)
subi ROUNDS,ROUNDS,1 srdi LENGTH,LENGTH,4
- srdi 5,LENGTH,3 #8x loop count - cmpldi 5,0 + srdi r5,LENGTH,3 #8x loop count + cmpldi r5,0 beq L4x
- std 25,-56(SP); - std 26,-48(SP); - std 27,-40(SP); - std 28,-32(SP); - std 29,-24(SP); - std 30,-16(SP); - std 31,-8(SP); - - li 25,0x10 - li 26,0x20 - li 27,0x30 - li 28,0x40 - li 29,0x50 - li 30,0x60 - li 31,0x70 + std r25,-56(SP); + std r26,-48(SP); + std r27,-40(SP); + std r28,-32(SP); + std r29,-24(SP); + std r30,-16(SP); + std r31,-8(SP); + + li r25,0x10 + li r26,0x20 + li r27,0x30 + li r28,0x40 + li r29,0x50 + li r30,0x60 + li r31,0x70
.align 5 Lx8_loop: @@ -102,13 +102,13 @@ Lx8_loop: vperm K,K,K,swap_mask
lxvd2x VSR(S0),0,SRC - lxvd2x VSR(S1),25,SRC - lxvd2x VSR(S2),26,SRC - lxvd2x VSR(S3),27,SRC - lxvd2x VSR(S4),28,SRC - lxvd2x VSR(S5),29,SRC - lxvd2x VSR(S6),30,SRC - lxvd2x VSR(S7),31,SRC + lxvd2x VSR(S1),r25,SRC + lxvd2x VSR(S2),r26,SRC + lxvd2x VSR(S3),r27,SRC + lxvd2x VSR(S4),r28,SRC + lxvd2x VSR(S5),r29,SRC + lxvd2x VSR(S6),r30,SRC + lxvd2x VSR(S7),r31,SRC
IF_LE(`vperm S0,S0,S0,swap_mask vperm S1,S1,S1,swap_mask @@ -129,10 +129,10 @@ IF_LE(`vperm S0,S0,S0,swap_mask vxor S7,S7,K
mtctr ROUNDS - li 10,0x10 + li r10,0x10 .align 5 L8x_round_loop: - lxvd2x VSR(K),10,KEYS + lxvd2x VSR(K),r10,KEYS vperm K,K,K,swap_mask vncipher S0,S0,ZERO vncipher S1,S1,ZERO @@ -150,10 +150,10 @@ L8x_round_loop: vxor S5,S5,K vxor S6,S6,K vxor S7,S7,K - addi 10,10,0x10 + addi r10,r10,0x10 bdnz L8x_round_loop
- lxvd2x VSR(K),10,KEYS + lxvd2x VSR(K),r10,KEYS vperm K,K,K,swap_mask vncipherlast S0,S0,K vncipherlast S1,S1,K @@ -174,44 +174,44 @@ IF_LE(`vperm S0,S0,S0,swap_mask vperm S7,S7,S7,swap_mask')
stxvd2x VSR(S0),0,DST - stxvd2x VSR(S1),25,DST - stxvd2x VSR(S2),26,DST - stxvd2x VSR(S3),27,DST - stxvd2x VSR(S4),28,DST - stxvd2x VSR(S5),29,DST - stxvd2x VSR(S6),30,DST - stxvd2x VSR(S7),31,DST + stxvd2x VSR(S1),r25,DST + stxvd2x VSR(S2),r26,DST + stxvd2x VSR(S3),r27,DST + stxvd2x VSR(S4),r28,DST + stxvd2x VSR(S5),r29,DST + stxvd2x VSR(S6),r30,DST + stxvd2x VSR(S7),r31,DST
addi SRC,SRC,0x80 addi DST,DST,0x80 - subic. 5,5,1 + subic. r5,r5,1 bne Lx8_loop
- ld 25,-56(SP); - ld 26,-48(SP); - ld 27,-40(SP); - ld 28,-32(SP); - ld 29,-24(SP); - ld 30,-16(SP); - ld 31,-8(SP); + ld r25,-56(SP); + ld r26,-48(SP); + ld r27,-40(SP); + ld r28,-32(SP); + ld r29,-24(SP); + ld r30,-16(SP); + ld r31,-8(SP);
clrldi LENGTH,LENGTH,61
L4x: - srdi 5,LENGTH,2 - cmpldi 5,0 + srdi r5,LENGTH,2 + cmpldi r5,0 beq L2x
lxvd2x VSR(K),0,KEYS vperm K,K,K,swap_mask
lxvd2x VSR(S0),0,SRC - li 9,0x10 - lxvd2x VSR(S1),9,SRC - addi 9,9,0x10 - lxvd2x VSR(S2),9,SRC - addi 9,9,0x10 - lxvd2x VSR(S3),9,SRC + li r9,0x10 + lxvd2x VSR(S1),r9,SRC + addi r9,r9,0x10 + lxvd2x VSR(S2),r9,SRC + addi r9,r9,0x10 + lxvd2x VSR(S3),r9,SRC
IF_LE(`vperm S0,S0,S0,swap_mask vperm S1,S1,S1,swap_mask @@ -224,10 +224,10 @@ IF_LE(`vperm S0,S0,S0,swap_mask vxor S3,S3,K
mtctr ROUNDS - li 10,0x10 + li r10,0x10 .align 5 L4x_round_loop: - lxvd2x VSR(K),10,KEYS + lxvd2x VSR(K),r10,KEYS vperm K,K,K,swap_mask vncipher S0,S0,ZERO vncipher S1,S1,ZERO @@ -237,10 +237,10 @@ L4x_round_loop: vxor S1,S1,K vxor S2,S2,K vxor S3,S3,K - addi 10,10,0x10 + addi r10,r10,0x10 bdnz L4x_round_loop
- lxvd2x VSR(K),10,KEYS + lxvd2x VSR(K),r10,KEYS vperm K,K,K,swap_mask vncipherlast S0,S0,K vncipherlast S1,S1,K @@ -253,12 +253,12 @@ IF_LE(`vperm S0,S0,S0,swap_mask vperm S3,S3,S3,swap_mask')
stxvd2x VSR(S0),0,DST - li 9,0x10 - stxvd2x VSR(S1),9,DST - addi 9,9,0x10 - stxvd2x VSR(S2),9,DST - addi 9,9,0x10 - stxvd2x VSR(S3),9,DST + li r9,0x10 + stxvd2x VSR(S1),r9,DST + addi r9,r9,0x10 + stxvd2x VSR(S2),r9,DST + addi r9,r9,0x10 + stxvd2x VSR(S3),r9,DST
addi SRC,SRC,0x40 addi DST,DST,0x40 @@ -266,16 +266,16 @@ IF_LE(`vperm S0,S0,S0,swap_mask clrldi LENGTH,LENGTH,62
L2x: - srdi 5,LENGTH,1 - cmpldi 5,0 + srdi r5,LENGTH,1 + cmpldi r5,0 beq L1x
lxvd2x VSR(K),0,KEYS vperm K,K,K,swap_mask
lxvd2x VSR(S0),0,SRC - li 9,0x10 - lxvd2x VSR(S1),9,SRC + li r9,0x10 + lxvd2x VSR(S1),r9,SRC
IF_LE(`vperm S0,S0,S0,swap_mask vperm S1,S1,S1,swap_mask') @@ -284,19 +284,19 @@ IF_LE(`vperm S0,S0,S0,swap_mask vxor S1,S1,K
mtctr ROUNDS - li 10,0x10 + li r10,0x10 .align 5 L2x_round_loop: - lxvd2x VSR(K),10,KEYS + lxvd2x VSR(K),r10,KEYS vperm K,K,K,swap_mask vncipher S0,S0,ZERO vncipher S1,S1,ZERO vxor S0,S0,K vxor S1,S1,K - addi 10,10,0x10 + addi r10,r10,0x10 bdnz L2x_round_loop
- lxvd2x VSR(K),10,KEYS + lxvd2x VSR(K),r10,KEYS vperm K,K,K,swap_mask vncipherlast S0,S0,K vncipherlast S1,S1,K @@ -305,8 +305,8 @@ IF_LE(`vperm S0,S0,S0,swap_mask vperm S1,S1,S1,swap_mask')
stxvd2x VSR(S0),0,DST - li 9,0x10 - stxvd2x VSR(S1),9,DST + li r9,0x10 + stxvd2x VSR(S1),r9,DST
addi SRC,SRC,0x20 addi DST,DST,0x20 @@ -327,17 +327,17 @@ IF_LE(`vperm S0,S0,S0,swap_mask') vxor S0,S0,K
mtctr ROUNDS - li 10,0x10 + li r10,0x10 .align 5 L1x_round_loop: - lxvd2x VSR(K),10,KEYS + lxvd2x VSR(K),r10,KEYS vperm K,K,K,swap_mask vncipher S0,S0,ZERO vxor S0,S0,K - addi 10,10,0x10 + addi r10,r10,0x10 bdnz L1x_round_loop
- lxvd2x VSR(K),10,KEYS + lxvd2x VSR(K),r10,KEYS vperm K,K,K,swap_mask vncipherlast S0,S0,K
diff --git a/powerpc64/p8/aes-encrypt-internal.asm b/powerpc64/p8/aes-encrypt-internal.asm index 482dff25..3dd6e7b5 100644 --- a/powerpc64/p8/aes-encrypt-internal.asm +++ b/powerpc64/p8/aes-encrypt-internal.asm @@ -31,26 +31,26 @@ ifelse(`
C Register usage:
-define(`SP', `1') -define(`TOCP', `2') - -define(`ROUNDS', `3') -define(`KEYS', `4') -define(`LENGTH', `6') -define(`DST', `7') -define(`SRC', `8') - -define(`swap_mask', `0') - -define(`K', `1') -define(`S0', `2') -define(`S1', `3') -define(`S2', `4') -define(`S3', `5') -define(`S4', `6') -define(`S5', `7') -define(`S6', `8') -define(`S7', `9') +define(`SP', `r1') +define(`TOCP', `r2') + +define(`ROUNDS', `r3') +define(`KEYS', `r4') +define(`LENGTH', `r6') +define(`DST', `r7') +define(`SRC', `r8') + +define(`swap_mask', `v0') + +define(`K', `v1') +define(`S0', `v2') +define(`S1', `v3') +define(`S2', `v4') +define(`S3', `v5') +define(`S4', `v6') +define(`S5', `v7') +define(`S6', `v8') +define(`S7', `v9')
.file "aes-encrypt-internal.asm"
@@ -63,30 +63,30 @@ define(`S7', `9')
define(`FUNC_ALIGN', `5') PROLOGUE(_nettle_aes_encrypt) - DATA_LOAD_VEC(swap_mask,.swap_mask,5) + DATA_LOAD_VEC(swap_mask,.swap_mask,r5)
subi ROUNDS,ROUNDS,1 srdi LENGTH,LENGTH,4
- srdi 5,LENGTH,3 #8x loop count - cmpldi 5,0 + srdi r5,LENGTH,3 #8x loop count + cmpldi r5,0 beq L4x
- std 25,-56(SP); - std 26,-48(SP); - std 27,-40(SP); - std 28,-32(SP); - std 29,-24(SP); - std 30,-16(SP); - std 31,-8(SP); - - li 25,0x10 - li 26,0x20 - li 27,0x30 - li 28,0x40 - li 29,0x50 - li 30,0x60 - li 31,0x70 + std r25,-56(SP); + std r26,-48(SP); + std r27,-40(SP); + std r28,-32(SP); + std r29,-24(SP); + std r30,-16(SP); + std r31,-8(SP); + + li r25,0x10 + li r26,0x20 + li r27,0x30 + li r28,0x40 + li r29,0x50 + li r30,0x60 + li r31,0x70
.align 5 Lx8_loop: @@ -94,13 +94,13 @@ Lx8_loop: vperm K,K,K,swap_mask
lxvd2x VSR(S0),0,SRC - lxvd2x VSR(S1),25,SRC - lxvd2x VSR(S2),26,SRC - lxvd2x VSR(S3),27,SRC - lxvd2x VSR(S4),28,SRC - lxvd2x VSR(S5),29,SRC - lxvd2x VSR(S6),30,SRC - lxvd2x VSR(S7),31,SRC + lxvd2x VSR(S1),r25,SRC + lxvd2x VSR(S2),r26,SRC + lxvd2x VSR(S3),r27,SRC + lxvd2x VSR(S4),r28,SRC + lxvd2x VSR(S5),r29,SRC + lxvd2x VSR(S6),r30,SRC + lxvd2x VSR(S7),r31,SRC
IF_LE(`vperm S0,S0,S0,swap_mask vperm S1,S1,S1,swap_mask @@ -121,10 +121,10 @@ IF_LE(`vperm S0,S0,S0,swap_mask vxor S7,S7,K
mtctr ROUNDS - li 10,0x10 + li r10,0x10 .align 5 L8x_round_loop: - lxvd2x VSR(K),10,KEYS + lxvd2x VSR(K),r10,KEYS vperm K,K,K,swap_mask vcipher S0,S0,K vcipher S1,S1,K @@ -134,10 +134,10 @@ L8x_round_loop: vcipher S5,S5,K vcipher S6,S6,K vcipher S7,S7,K - addi 10,10,0x10 + addi r10,r10,0x10 bdnz L8x_round_loop
- lxvd2x VSR(K),10,KEYS + lxvd2x VSR(K),r10,KEYS vperm K,K,K,swap_mask vcipherlast S0,S0,K vcipherlast S1,S1,K @@ -158,44 +158,44 @@ IF_LE(`vperm S0,S0,S0,swap_mask vperm S7,S7,S7,swap_mask')
stxvd2x VSR(S0),0,DST - stxvd2x VSR(S1),25,DST - stxvd2x VSR(S2),26,DST - stxvd2x VSR(S3),27,DST - stxvd2x VSR(S4),28,DST - stxvd2x VSR(S5),29,DST - stxvd2x VSR(S6),30,DST - stxvd2x VSR(S7),31,DST + stxvd2x VSR(S1),r25,DST + stxvd2x VSR(S2),r26,DST + stxvd2x VSR(S3),r27,DST + stxvd2x VSR(S4),r28,DST + stxvd2x VSR(S5),r29,DST + stxvd2x VSR(S6),r30,DST + stxvd2x VSR(S7),r31,DST
addi SRC,SRC,0x80 addi DST,DST,0x80 - subic. 5,5,1 + subic. r5,r5,1 bne Lx8_loop
- ld 25,-56(SP); - ld 26,-48(SP); - ld 27,-40(SP); - ld 28,-32(SP); - ld 29,-24(SP); - ld 30,-16(SP); - ld 31,-8(SP); + ld r25,-56(SP); + ld r26,-48(SP); + ld r27,-40(SP); + ld r28,-32(SP); + ld r29,-24(SP); + ld r30,-16(SP); + ld r31,-8(SP);
clrldi LENGTH,LENGTH,61
L4x: - srdi 5,LENGTH,2 - cmpldi 5,0 + srdi r5,LENGTH,2 + cmpldi r5,0 beq L2x
lxvd2x VSR(K),0,KEYS vperm K,K,K,swap_mask
lxvd2x VSR(S0),0,SRC - li 9,0x10 - lxvd2x VSR(S1),9,SRC - addi 9,9,0x10 - lxvd2x VSR(S2),9,SRC - addi 9,9,0x10 - lxvd2x VSR(S3),9,SRC + li r9,0x10 + lxvd2x VSR(S1),r9,SRC + addi r9,r9,0x10 + lxvd2x VSR(S2),r9,SRC + addi r9,r9,0x10 + lxvd2x VSR(S3),r9,SRC
IF_LE(`vperm S0,S0,S0,swap_mask vperm S1,S1,S1,swap_mask @@ -208,19 +208,19 @@ IF_LE(`vperm S0,S0,S0,swap_mask vxor S3,S3,K
mtctr ROUNDS - li 10,0x10 + li r10,0x10 .align 5 L4x_round_loop: - lxvd2x VSR(K),10,KEYS + lxvd2x VSR(K),r10,KEYS vperm K,K,K,swap_mask vcipher S0,S0,K vcipher S1,S1,K vcipher S2,S2,K vcipher S3,S3,K - addi 10,10,0x10 + addi r10,r10,0x10 bdnz L4x_round_loop
- lxvd2x VSR(K),10,KEYS + lxvd2x VSR(K),r10,KEYS vperm K,K,K,swap_mask vcipherlast S0,S0,K vcipherlast S1,S1,K @@ -233,12 +233,12 @@ IF_LE(`vperm S0,S0,S0,swap_mask vperm S3,S3,S3,swap_mask')
stxvd2x VSR(S0),0,DST - li 9,0x10 - stxvd2x VSR(S1),9,DST - addi 9,9,0x10 - stxvd2x VSR(S2),9,DST - addi 9,9,0x10 - stxvd2x VSR(S3),9,DST + li r9,0x10 + stxvd2x VSR(S1),r9,DST + addi r9,r9,0x10 + stxvd2x VSR(S2),r9,DST + addi r9,r9,0x10 + stxvd2x VSR(S3),r9,DST
addi SRC,SRC,0x40 addi DST,DST,0x40 @@ -246,16 +246,16 @@ IF_LE(`vperm S0,S0,S0,swap_mask clrldi LENGTH,LENGTH,62
L2x: - srdi 5,LENGTH,1 - cmpldi 5,0 + srdi r5,LENGTH,1 + cmpldi r5,0 beq L1x
lxvd2x VSR(K),0,KEYS vperm K,K,K,swap_mask
lxvd2x VSR(S0),0,SRC - li 9,0x10 - lxvd2x VSR(S1),9,SRC + li r9,0x10 + lxvd2x VSR(S1),r9,SRC
IF_LE(`vperm S0,S0,S0,swap_mask vperm S1,S1,S1,swap_mask') @@ -264,17 +264,17 @@ IF_LE(`vperm S0,S0,S0,swap_mask vxor S1,S1,K
mtctr ROUNDS - li 10,0x10 + li r10,0x10 .align 5 L2x_round_loop: - lxvd2x VSR(K),10,KEYS + lxvd2x VSR(K),r10,KEYS vperm K,K,K,swap_mask vcipher S0,S0,K vcipher S1,S1,K - addi 10,10,0x10 + addi r10,r10,0x10 bdnz L2x_round_loop
- lxvd2x VSR(K),10,KEYS + lxvd2x VSR(K),r10,KEYS vperm K,K,K,swap_mask vcipherlast S0,S0,K vcipherlast S1,S1,K @@ -283,8 +283,8 @@ IF_LE(`vperm S0,S0,S0,swap_mask vperm S1,S1,S1,swap_mask')
stxvd2x VSR(S0),0,DST - li 9,0x10 - stxvd2x VSR(S1),9,DST + li r9,0x10 + stxvd2x VSR(S1),r9,DST
addi SRC,SRC,0x20 addi DST,DST,0x20 @@ -305,16 +305,16 @@ IF_LE(`vperm S0,S0,S0,swap_mask') vxor S0,S0,K
mtctr ROUNDS - li 10,0x10 + li r10,0x10 .align 5 L1x_round_loop: - lxvd2x VSR(K),10,KEYS + lxvd2x VSR(K),r10,KEYS vperm K,K,K,swap_mask vcipher S0,S0,K - addi 10,10,0x10 + addi r10,r10,0x10 bdnz L1x_round_loop
- lxvd2x VSR(K),10,KEYS + lxvd2x VSR(K),r10,KEYS vperm K,K,K,swap_mask vcipherlast S0,S0,K
Maamoun TK maamoun.tk@googlemail.com writes:
Use explicit register names to improve the syntax of assembly files and pass -mregnames to the assembler to allow building the assembly files. I will make a stand-alone patch for GCM which brings all the accumulated modifications so it can be directly merged.
configure.ac | 4 +- powerpc64/machine.m4 | 4 +- powerpc64/p8/aes-decrypt-internal.asm | 194 +++++++++++++++++----------------- powerpc64/p8/aes-encrypt-internal.asm | 192 ++++++++++++++++----------------- 4 files changed, 198 insertions(+), 196 deletions(-)
diff --git a/configure.ac b/configure.ac index 666b2f4a..6ab32f03 100644 --- a/configure.ac +++ b/configure.ac @@ -458,10 +458,12 @@ if test "x$enable_assembler" = xyes ; then if test "$ABI" = 64 ; then asm_path="powerpc64" if test "x$enable_fat" = xyes ; then
- asm_path="powerpc64/fat $asm_path"
- CFLAGS="$CFLAGS -Wa,-mregnames"
- asm_path="powerpc64/fat $asm_path"
I'm not sure it's a good idea to unconditionally use these gcc specific flags. Are they supported by all relevant compilers? I'm considering instead adding the attached patch. It's a pretty large file with various m4 utilities plus an autoconf test to determine if the assembler accepts register names (copied from gmp), and then conditionally doing
forloop(i,0,31,`deflit(`r'i,i)')
to define register names as macros expanding to corresponding integers.
This could be extended with a configure check to try adding -Wa,-mregnames.
diff --git a/powerpc64/machine.m4 b/powerpc64/machine.m4 index 221fa523..cefabc9b 100644 --- a/powerpc64/machine.m4 +++ b/powerpc64/machine.m4 @@ -24,7 +24,7 @@ define(`EPILOGUE',
C Get vector-scalar register from vector register C VSR(VR) -define(`VSR',`32+$1') +define(`VSR',``vs'eval(32+substr($1,1,len($1)))')
May be less brittle with an explicit ifelse chain, like the similar macros in arm/machine.m4. Should work better with the above approach, where r1 may expand to 1, depending on a configure check.
I'm not sure it's a good idea to unconditionally use these gcc specific flags. Are they supported by all relevant compilers?
It seems that Clang doesn't support that flag.
I'm considering instead adding the attached patch.
This is a better solution. I'll consider this patch for upcoming editing.
diff --git a/powerpc64/machine.m4 b/powerpc64/machine.m4 index 221fa523..cefabc9b 100644 --- a/powerpc64/machine.m4 +++ b/powerpc64/machine.m4 @@ -24,7 +24,7 @@ define(`EPILOGUE',
C Get vector-scalar register from vector register C VSR(VR) -define(`VSR',`32+$1') +define(`VSR',``vs'eval(32+substr($1,1,len($1)))')
May be less brittle with an explicit ifelse chain, like the similar macros in arm/machine.m4. Should work better with the above approach, where r1 may expand to 1, depending on a configure check.
I got it, I'll consider this too.
Thank you, Mamone
nettle-bugs@lists.lysator.liu.se