C arm/v6/aes-encrypt-internal.asm ifelse(< rijndael-arm.S - ARM assembly implementation of AES cipher Copyright (C) 2013 Jussi Kivilinna This file is part of Libgcrypt. Libgcrypt is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. Libgcrypt is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program; if not, see . >) .text .arch armv6 .syntax unified .arm C register macros define(, ) define(, ) define(,<[sp, #0]>) define(,<[sp, #4]>) define(,<[sp, #(48+0)]>) define(,<[sp, #(48+4)]>) define(, <%r1>) define(, <%r2>) define(, <%ip>) define(, <%r4>) define(, <%r5>) define(, <%r6>) define(, <%r7>) define(, <%r8>) define(, <%r9>) define(, <%r10>) define(, <%r11>) define(, <%r0>) define(, <%r3>) define(, <%lr>) C helper macros .macro ldr_unaligned_le rout rsrc offs rtmp ldrb \rout, [\rsrc, #((\offs) + 0)] ldrb \rtmp, [\rsrc, #((\offs) + 1)] orr \rout, \rout, \rtmp, lsl #8 ldrb \rtmp, [\rsrc, #((\offs) + 2)] orr \rout, \rout, \rtmp, lsl #16 ldrb \rtmp, [\rsrc, #((\offs) + 3)] orr \rout, \rout, \rtmp, lsl #24 .endm .macro str_unaligned_le rin rdst offs rtmp0 rtmp1 mov \rtmp0, \rin, lsr #8 strb \rin, [\rdst, #((\offs) + 0)] mov \rtmp1, \rin, lsr #16 strb \rtmp0, [\rdst, #((\offs) + 1)] mov \rtmp0, \rin, lsr #24 strb \rtmp1, [\rdst, #((\offs) + 2)] strb \rtmp0, [\rdst, #((\offs) + 3)] .endm C *********************************************************************** C ARM assembly implementation of the AES cipher C *********************************************************************** .macro preload_first_key round ra ldr \ra, [CTX, #(((\round) * 16) + 0 * 4)] .endm .macro dummy round ra .endm .macro addroundkey ra rb rc rd rna rnb rnc rnd preload_key ldm CTX, {\rna, \rnb, \rnc, \rnd} eor \ra, \rna eor \rb, \rnb eor \rc, \rnc \preload_key 1, \rna eor \rd, \rnd .endm .macro do_encround next_r ra rb rc rd rna rnb rnc rnd preload_key ldr \rnb, [CTX, #(((\next_r) * 16) + 1 * 4)] and RT0, RMASK, \ra, lsl#2 ldr \rnc, [CTX, #(((\next_r) * 16) + 2 * 4)] and RT1, RMASK, \ra, lsr#(8 - 2) ldr \rnd, [CTX, #(((\next_r) * 16) + 3 * 4)] and RT2, RMASK, \ra, lsr#(16 - 2) ldr RT0, [RTAB, RT0] and \ra, RMASK, \ra, lsr#(24 - 2) ldr RT1, [RTAB, RT1] eor \rna, \rna, RT0 ldr RT2, [RTAB, RT2] and RT0, RMASK, \rd, lsl#2 ldr \ra, [RTAB, \ra] eor \rnd, \rnd, RT1, ror #24 and RT1, RMASK, \rd, lsr#(8 - 2) eor \rnc, \rnc, RT2, ror #16 and RT2, RMASK, \rd, lsr#(16 - 2) eor \rnb, \rnb, \ra, ror #8 ldr RT0, [RTAB, RT0] and \rd, RMASK, \rd, lsr#(24 - 2) ldr RT1, [RTAB, RT1] eor \rnd, \rnd, RT0 ldr RT2, [RTAB, RT2] and RT0, RMASK, \rc, lsl#2 ldr \rd, [RTAB, \rd] eor \rnc, \rnc, RT1, ror #24 and RT1, RMASK, \rc, lsr#(8 - 2) eor \rnb, \rnb, RT2, ror #16 and RT2, RMASK, \rc, lsr#(16 - 2) eor \rna, \rna, \rd, ror #8 ldr RT0, [RTAB, RT0] and \rc, RMASK, \rc, lsr#(24 - 2) ldr RT1, [RTAB, RT1] eor \rnc, \rnc, RT0 ldr RT2, [RTAB, RT2] and RT0, RMASK, \rb, lsl#2 ldr \rc, [RTAB, \rc] eor \rnb, \rnb, RT1, ror #24 and RT1, RMASK, \rb, lsr#(8 - 2) eor \rna, \rna, RT2, ror #16 and RT2, RMASK, \rb, lsr#(16 - 2) eor \rnd, \rnd, \rc, ror #8 ldr RT0, [RTAB, RT0] and \rb, RMASK, \rb, lsr#(24 - 2) ldr RT1, [RTAB, RT1] eor \rnb, \rnb, RT0 ldr RT2, [RTAB, RT2] eor \rna, \rna, RT1, ror #24 ldr \rb, [RTAB, \rb] eor \rnd, \rnd, RT2, ror #16 \preload_key (\next_r) + 1, \ra eor \rnc, \rnc, \rb, ror #8 .endm .macro do_lastencround ra rb rc rd rna rnb rnc rnd and RT0, RMASK, \ra, lsl#2 and RT1, RMASK, \ra, lsr#(8 - 2) and RT2, RMASK, \ra, lsr#(16 - 2) ldrb \rna, [RTAB, RT0] and \ra, RMASK, \ra, lsr#(24 - 2) ldrb \rnd, [RTAB, RT1] and RT0, RMASK, \rd, lsl#2 ldrb \rnc, [RTAB, RT2] mov \rnd, \rnd, ror #24 ldrb \rnb, [RTAB, \ra] and RT1, RMASK, \rd, lsr#(8 - 2) mov \rnc, \rnc, ror #16 and RT2, RMASK, \rd, lsr#(16 - 2) mov \rnb, \rnb, ror #8 ldrb RT0, [RTAB, RT0] and \rd, RMASK, \rd, lsr#(24 - 2) ldrb RT1, [RTAB, RT1] orr \rnd, \rnd, RT0 ldrb RT2, [RTAB, RT2] and RT0, RMASK, \rc, lsl#2 ldrb \rd, [RTAB, \rd] orr \rnc, \rnc, RT1, ror #24 and RT1, RMASK, \rc, lsr#(8 - 2) orr \rnb, \rnb, RT2, ror #16 and RT2, RMASK, \rc, lsr#(16 - 2) orr \rna, \rna, \rd, ror #8 ldrb RT0, [RTAB, RT0] and \rc, RMASK, \rc, lsr#(24 - 2) ldrb RT1, [RTAB, RT1] orr \rnc, \rnc, RT0 ldrb RT2, [RTAB, RT2] and RT0, RMASK, \rb, lsl#2 ldrb \rc, [RTAB, \rc] orr \rnb, \rnb, RT1, ror #24 and RT1, RMASK, \rb, lsr#(8 - 2) orr \rna, \rna, RT2, ror #16 ldrb RT0, [RTAB, RT0] and RT2, RMASK, \rb, lsr#(16 - 2) ldrb RT1, [RTAB, RT1] orr \rnd, \rnd, \rc, ror #8 ldrb RT2, [RTAB, RT2] and \rb, RMASK, \rb, lsr#(24 - 2) ldrb \rb, [RTAB, \rb] orr \rnb, \rnb, RT0 orr \rna, \rna, RT1, ror #24 orr \rnd, \rnd, RT2, ror #16 orr \rnc, \rnc, \rb, ror #8 .endm .macro firstencround round ra rb rc rd rna rnb rnc rnd addroundkey \ra,\rb,\rc,\rd,\rna,\rnb,\rnc,\rnd,preload_first_key do_encround (\round) + 1,\ra,\rb,\rc,\rd,\rna,\rnb,\rnc,\rnd,preload_first_key .endm .macro encround round ra rb rc rd rna rnb rnc rnd preload_key do_encround (\round) + 1,\ra,\rb,\rc,\rd,\rna,\rnb,\rnc,\rnd,\preload_key .endm .macro lastencround round ra rb rc rd rna rnb rnc rnd add CTX, #(((\round) + 1) * 16) add RTAB, #1 do_lastencround \ra,\rb,\rc,\rd,\rna,\rnb,\rnc,\rnd addroundkey \rna,\rnb,\rnc,\rnd,\ra,\rb,\rc,\rd,dummy sub CTX, #(((\round) + 1) * 16) sub RTAB, #1 .endm C _aes_encrypt(unsigned rounds, const uint32_t *keys, C const struct aes_table *T, C size_t length, uint8_t *dst, C uint8_t *src) C r0 rounds C r1 ctx C r2 table C r3 length C [sp, #0] dst C [sp, #4] src PROLOGUE(_nettle_aes_encrypt) .cfi_startproc teq PARAM_LENGTH, #0 bxeq lr push {r0,r3,%r4-%r11, %ip, %lr} .cfi_adjust_cfa_offset 48 .cfi_rel_offset r0, 0 C PARAM_LENGTH .cfi_rel_offset r3, 4 C PARAM_ROUNDS .cfi_rel_offset r4, 8 .cfi_rel_offset r5, 12 .cfi_rel_offset r6, 16 .cfi_rel_offset r7, 20 .cfi_rel_offset r8, 24 .cfi_rel_offset r9, 28 .cfi_rel_offset r10, 32 .cfi_rel_offset r11, 36 .cfi_rel_offset ip, 40 .cfi_rel_offset lr, 44 add RTAB, RTAB, #AES_TABLE0 C read input block .Lblock_loop: ldr RT0, FRAME_SRC ifelse(V6,V6,< IF_BE(< rev RA, RA rev RB, RB rev RC, RC rev RD, RD >) str RA, [RT0] str RB, [RT0, #4] str RC, [RT0, #8] str RD, [RT0, #12] >,< IF_LE(< C test if src is unaligned tst RT0, #3 beq 1f >) C unaligned load ldr_unaligned_le RA, RT0, 0, RNA ldr_unaligned_le RB, RT0, 4, RNB ldr_unaligned_le RC, RT0, 8, RNA ldr_unaligned_le RD, RT0, 12, RNB IF_LE(< b 2f .ltorg 1: C aligned load ldm RT0, {RA, RB, RC, RD} 2: >) >) add RT0, RT0, #16 mov RMASK, #0xff str RT0, FRAME_SRC mov RMASK, RMASK, lsl#2; C byte mask firstencround 0, RA, RB, RC, RD, RNA, RNB, RNC, RND encround 1, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key encround 2, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key encround 3, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key encround 4, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key encround 5, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key encround 6, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key encround 7, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key ldr RT0, FRAME_ROUNDS cmp RT0, #12 bge .Lenc_not_128 encround 8, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy lastencround 9, RNA, RNB, RNC, RND, RA, RB, RC, RD .Lenc_done: ldr RT0, FRAME_DST ldr RT1, FRAME_LENGTH C store output block ifelse(V6,V6,< IF_BE(< rev RA, RA rev RB, RB rev RC, RC rev RD, RD >) str RA, [RT0] str RB, [RT0, #4] str RC, [RT0, #8] str RD, [RT0, #12] >,< IF_LE(< C test if dst is unaligned tst RT0, #3 beq 1f >) C unaligned store str_unaligned_le RA, RT0, 0, RNA, RNB str_unaligned_le RB, RT0, 4, RNA, RNB str_unaligned_le RC, RT0, 8, RNA, RNB str_unaligned_le RD, RT0, 12, RNA, RNB IF_LE(< b 2f .ltorg 1: C aligned store C write output block stm RT0, {RA, RB, RC, RD} 2: >) >) add RT0, RT0, #16 subs RT1, RT1, #16 str RT0, FRAME_DST str RT1, FRAME_LENGTH bhi .Lblock_loop .cfi_remember_state pop {%r0,%r3, %r4-%r11,%ip,%pc} .cfi_restore_state .ltorg .Lenc_not_128: beq .Lenc_192 encround 8, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key encround 9, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key encround 10, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key encround 11, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key encround 12, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy lastencround 13, RNA, RNB, RNC, RND, RA, RB, RC, RD b .Lenc_done .ltorg .Lenc_192: encround 8, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key encround 9, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key encround 10, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy lastencround 11, RNA, RNB, RNC, RND, RA, RB, RC, RD b .Lenc_done .cfi_endproc EPILOGUE(_nettle_aes_encrypt)