1// ------------------------------------------------------------------------- 2// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK. 3// All rights reserved. 4// 5// LICENSE TERMS 6// 7// The free distribution and use of this software in both source and binary 8// form is allowed (with or without changes) provided that: 9// 10// 1. distributions of this source code include the above copyright 11// notice, this list of conditions and the following disclaimer// 12// 13// 2. distributions in binary form include the above copyright 14// notice, this list of conditions and the following disclaimer 15// in the documentation and/or other associated materials// 16// 17// 3. the copyright holder's name is not used to endorse products 18// built using this software without specific written permission. 19// 20// 21// ALTERNATIVELY, provided that this notice is retained in full, this product 22// may be distributed under the terms of the GNU General Public License (GPL), 23// in which case the provisions of the GPL apply INSTEAD OF those given above. 24// 25// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org> 26// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> 27 28// DISCLAIMER 29// 30// This software is provided 'as is' with no explicit or implied warranties 31// in respect of its properties including, but not limited to, correctness 32// and fitness for purpose. 33// ------------------------------------------------------------------------- 34// Issue Date: 29/07/2002 35 36.file "aes-i586-asm.S" 37.text 38 39#include <asm/asm-offsets.h> 40 41#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) 42 43/* offsets to parameters with one register pushed onto stack */ 44#define tfm 8 45#define out_blk 12 46#define in_blk 16 47 48/* offsets in crypto_tfm structure */ 49#define ekey (crypto_tfm_ctx_offset + 0) 50#define nrnd (crypto_tfm_ctx_offset + 256) 51#define dkey (crypto_tfm_ctx_offset + 260) 52 53// register mapping for encrypt and decrypt subroutines 54 55#define r0 eax 56#define r1 ebx 57#define r2 ecx 58#define r3 edx 59#define r4 esi 60#define r5 edi 61 62#define eaxl al 63#define eaxh ah 64#define ebxl bl 65#define ebxh bh 66#define ecxl cl 67#define ecxh ch 68#define edxl dl 69#define edxh dh 70 71#define _h(reg) reg##h 72#define h(reg) _h(reg) 73 74#define _l(reg) reg##l 75#define l(reg) _l(reg) 76 77// This macro takes a 32-bit word representing a column and uses 78// each of its four bytes to index into four tables of 256 32-bit 79// words to obtain values that are then xored into the appropriate 80// output registers r0, r1, r4 or r5. 81 82// Parameters: 83// table table base address 84// %1 out_state[0] 85// %2 out_state[1] 86// %3 out_state[2] 87// %4 out_state[3] 88// idx input register for the round (destroyed) 89// tmp scratch register for the round 90// sched key schedule 91 92#define do_col(table, a1,a2,a3,a4, idx, tmp) \ 93 movzx %l(idx),%tmp; \ 94 xor table(,%tmp,4),%a1; \ 95 movzx %h(idx),%tmp; \ 96 shr $16,%idx; \ 97 xor table+tlen(,%tmp,4),%a2; \ 98 movzx %l(idx),%tmp; \ 99 movzx %h(idx),%idx; \ 100 xor table+2*tlen(,%tmp,4),%a3; \ 101 xor table+3*tlen(,%idx,4),%a4; 102 103// initialise output registers from the key schedule 104// NB1: original value of a3 is in idx on exit 105// NB2: original values of a1,a2,a4 aren't used 106#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \ 107 mov 0 sched,%a1; \ 108 movzx %l(idx),%tmp; \ 109 mov 12 sched,%a2; \ 110 xor table(,%tmp,4),%a1; \ 111 mov 4 sched,%a4; \ 112 movzx %h(idx),%tmp; \ 113 shr $16,%idx; \ 114 xor table+tlen(,%tmp,4),%a2; \ 115 movzx %l(idx),%tmp; \ 116 movzx %h(idx),%idx; \ 117 xor table+3*tlen(,%idx,4),%a4; \ 118 mov %a3,%idx; \ 119 mov 8 sched,%a3; \ 120 xor table+2*tlen(,%tmp,4),%a3; 121 122// initialise output registers from the key schedule 123// NB1: original value of a3 is in idx on exit 124// NB2: original values of a1,a2,a4 aren't used 125#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \ 126 mov 0 sched,%a1; \ 127 movzx %l(idx),%tmp; \ 128 mov 4 sched,%a2; \ 129 xor table(,%tmp,4),%a1; \ 130 mov 12 sched,%a4; \ 131 movzx %h(idx),%tmp; \ 132 shr $16,%idx; \ 133 xor table+tlen(,%tmp,4),%a2; \ 134 movzx %l(idx),%tmp; \ 135 movzx %h(idx),%idx; \ 136 xor table+3*tlen(,%idx,4),%a4; \ 137 mov %a3,%idx; \ 138 mov 8 sched,%a3; \ 139 xor table+2*tlen(,%tmp,4),%a3; 140 141 142// original Gladman had conditional saves to MMX regs. 143#define save(a1, a2) \ 144 mov %a2,4*a1(%esp) 145 146#define restore(a1, a2) \ 147 mov 4*a2(%esp),%a1 148 149// These macros perform a forward encryption cycle. They are entered with 150// the first previous round column values in r0,r1,r4,r5 and 151// exit with the final values in the same registers, using stack 152// for temporary storage. 153 154// round column values 155// on entry: r0,r1,r4,r5 156// on exit: r2,r1,r4,r5 157#define fwd_rnd1(arg, table) \ 158 save (0,r1); \ 159 save (1,r5); \ 160 \ 161 /* compute new column values */ \ 162 do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \ 163 do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \ 164 restore(r0,0); \ 165 do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \ 166 restore(r0,1); \ 167 do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */ 168 169// round column values 170// on entry: r2,r1,r4,r5 171// on exit: r0,r1,r4,r5 172#define fwd_rnd2(arg, table) \ 173 save (0,r1); \ 174 save (1,r5); \ 175 \ 176 /* compute new column values */ \ 177 do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \ 178 do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \ 179 restore(r2,0); \ 180 do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \ 181 restore(r2,1); \ 182 do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */ 183 184// These macros performs an inverse encryption cycle. They are entered with 185// the first previous round column values in r0,r1,r4,r5 and 186// exit with the final values in the same registers, using stack 187// for temporary storage 188 189// round column values 190// on entry: r0,r1,r4,r5 191// on exit: r2,r1,r4,r5 192#define inv_rnd1(arg, table) \ 193 save (0,r1); \ 194 save (1,r5); \ 195 \ 196 /* compute new column values */ \ 197 do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \ 198 do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \ 199 restore(r0,0); \ 200 do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \ 201 restore(r0,1); \ 202 do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */ 203 204// round column values 205// on entry: r2,r1,r4,r5 206// on exit: r0,r1,r4,r5 207#define inv_rnd2(arg, table) \ 208 save (0,r1); \ 209 save (1,r5); \ 210 \ 211 /* compute new column values */ \ 212 do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \ 213 do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \ 214 restore(r2,0); \ 215 do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \ 216 restore(r2,1); \ 217 do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ 218 219// AES (Rijndael) Encryption Subroutine 220/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ 221 222.global aes_enc_blk 223 224.extern ft_tab 225.extern fl_tab 226 227.align 4 228 229aes_enc_blk: 230 push %ebp 231 mov tfm(%esp),%ebp 232 233// CAUTION: the order and the values used in these assigns 234// rely on the register mappings 235 2361: push %ebx 237 mov in_blk+4(%esp),%r2 238 push %esi 239 mov nrnd(%ebp),%r3 // number of rounds 240 push %edi 241#if ekey != 0 242 lea ekey(%ebp),%ebp // key pointer 243#endif 244 245// input four columns and xor in first round key 246 247 mov (%r2),%r0 248 mov 4(%r2),%r1 249 mov 8(%r2),%r4 250 mov 12(%r2),%r5 251 xor (%ebp),%r0 252 xor 4(%ebp),%r1 253 xor 8(%ebp),%r4 254 xor 12(%ebp),%r5 255 256 sub $8,%esp // space for register saves on stack 257 add $16,%ebp // increment to next round key 258 cmp $12,%r3 259 jb 4f // 10 rounds for 128-bit key 260 lea 32(%ebp),%ebp 261 je 3f // 12 rounds for 192-bit key 262 lea 32(%ebp),%ebp 263 2642: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 256-bit key 265 fwd_rnd2( -48(%ebp) ,ft_tab) 2663: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 192-bit key 267 fwd_rnd2( -16(%ebp) ,ft_tab) 2684: fwd_rnd1( (%ebp) ,ft_tab) // 10 rounds for 128-bit key 269 fwd_rnd2( +16(%ebp) ,ft_tab) 270 fwd_rnd1( +32(%ebp) ,ft_tab) 271 fwd_rnd2( +48(%ebp) ,ft_tab) 272 fwd_rnd1( +64(%ebp) ,ft_tab) 273 fwd_rnd2( +80(%ebp) ,ft_tab) 274 fwd_rnd1( +96(%ebp) ,ft_tab) 275 fwd_rnd2(+112(%ebp) ,ft_tab) 276 fwd_rnd1(+128(%ebp) ,ft_tab) 277 fwd_rnd2(+144(%ebp) ,fl_tab) // last round uses a different table 278 279// move final values to the output array. CAUTION: the 280// order of these assigns rely on the register mappings 281 282 add $8,%esp 283 mov out_blk+12(%esp),%ebp 284 mov %r5,12(%ebp) 285 pop %edi 286 mov %r4,8(%ebp) 287 pop %esi 288 mov %r1,4(%ebp) 289 pop %ebx 290 mov %r0,(%ebp) 291 pop %ebp 292 mov $1,%eax 293 ret 294 295// AES (Rijndael) Decryption Subroutine 296/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ 297 298.global aes_dec_blk 299 300.extern it_tab 301.extern il_tab 302 303.align 4 304 305aes_dec_blk: 306 push %ebp 307 mov tfm(%esp),%ebp 308 309// CAUTION: the order and the values used in these assigns 310// rely on the register mappings 311 3121: push %ebx 313 mov in_blk+4(%esp),%r2 314 push %esi 315 mov nrnd(%ebp),%r3 // number of rounds 316 push %edi 317#if dkey != 0 318 lea dkey(%ebp),%ebp // key pointer 319#endif 320 mov %r3,%r0 321 shl $4,%r0 322 add %r0,%ebp 323 324// input four columns and xor in first round key 325 326 mov (%r2),%r0 327 mov 4(%r2),%r1 328 mov 8(%r2),%r4 329 mov 12(%r2),%r5 330 xor (%ebp),%r0 331 xor 4(%ebp),%r1 332 xor 8(%ebp),%r4 333 xor 12(%ebp),%r5 334 335 sub $8,%esp // space for register saves on stack 336 sub $16,%ebp // increment to next round key 337 cmp $12,%r3 338 jb 4f // 10 rounds for 128-bit key 339 lea -32(%ebp),%ebp 340 je 3f // 12 rounds for 192-bit key 341 lea -32(%ebp),%ebp 342 3432: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 256-bit key 344 inv_rnd2( +48(%ebp), it_tab) 3453: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 192-bit key 346 inv_rnd2( +16(%ebp), it_tab) 3474: inv_rnd1( (%ebp), it_tab) // 10 rounds for 128-bit key 348 inv_rnd2( -16(%ebp), it_tab) 349 inv_rnd1( -32(%ebp), it_tab) 350 inv_rnd2( -48(%ebp), it_tab) 351 inv_rnd1( -64(%ebp), it_tab) 352 inv_rnd2( -80(%ebp), it_tab) 353 inv_rnd1( -96(%ebp), it_tab) 354 inv_rnd2(-112(%ebp), it_tab) 355 inv_rnd1(-128(%ebp), it_tab) 356 inv_rnd2(-144(%ebp), il_tab) // last round uses a different table 357 358// move final values to the output array. CAUTION: the 359// order of these assigns rely on the register mappings 360 361 add $8,%esp 362 mov out_blk+12(%esp),%ebp 363 mov %r5,12(%ebp) 364 pop %edi 365 mov %r4,8(%ebp) 366 pop %esi 367 mov %r1,4(%ebp) 368 pop %ebx 369 mov %r0,(%ebp) 370 pop %ebp 371 mov $1,%eax 372 ret 373