Amitay Isaacs amitay@ozlabs.org writes:
--- /dev/null +++ b/powerpc64/ecc-curve25519-modp.asm @@ -0,0 +1,101 @@ +C powerpc64/ecc-25519-modp.asm +define(`RP', `r4') +define(`XP', `r5')
+define(`U0', `r6') C Overlaps unused modulo input +define(`U1', `r7') +define(`U2', `r8') +define(`U3', `r9') +define(`T0', `r10') +define(`T1', `r11') +define(`M', `r12')
+define(`UN', r3)
Comment seems misplaced, it's UN / r3 that overlaps the unused input, right?
- C void ecc_curve25519_modp (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp)
- .text
+define(`FUNC_ALIGN', `5') +PROLOGUE(_nettle_ecc_curve25519_modp)
- C First fold the limbs affecting bit 255
- ld UN, 56(XP)
- li M, 38
- mulhdu T1, M, UN
- mulld UN, M, UN
- ld U3, 24(XP)
- li T0, 0
- addc U3, UN, U3
- adde T0, T1, T0
- ld UN, 40(XP)
- mulhdu U2, M, UN
- mulld UN, M, UN
- addc U3, U3, U3
- adde T0, T0, T0
- srdi U3, U3, 1 C Undo shift, clear high bit
- C Fold the high limb again, together with RP[5]
- li T1, 19
- mulld T0, T1, T0
- ld U0, 0(XP)
- ld U1, 8(XP)
- ld T1, 16(XP)
- addc U0, T0, U0
- adde U1, UN, U1
- ld T0, 32(XP)
- adde U2, U2, T1
- addze U3, U3
- mulhdu T1, M, T0
- mulld T0, M, T0
- addc U0, T0, U0
- adde U1, T1, U1
- std U0, 0(RP)
- std U1, 8(RP)
- ld T0, 48(XP)
- mulhdu T1, M, T0
- mulld UN, M, T0
- adde U2, UN, U2
- adde U3, T1, U3
- std U2, 16(RP)
- std U3, 24(RP)
- blr
+EPILOGUE(_nettle_ecc_curve25519_modp)
Looks good. I must admit that the x86_64 version this is based on is not so easy to follow.
Regards, /Niels