aesv8-armx.S revision 305153
1/* $FreeBSD: stable/11/secure/lib/libcrypto/arm/aesv8-armx.S 305153 2016-08-31 20:33:59Z jkim $ */ 2/* Do not modify. This file is auto-generated from aesv8-armx.pl. */ 3#include "arm_arch.h" 4 5#if __ARM_MAX_ARCH__>=7 6.text 7.arch armv7-a 8.fpu neon 9.code 32 10.align 5 11rcon: 12.long 0x01,0x01,0x01,0x01 13.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat 14.long 0x1b,0x1b,0x1b,0x1b 15 16.globl aes_v8_set_encrypt_key 17.type aes_v8_set_encrypt_key,%function 18.align 5 19aes_v8_set_encrypt_key: 20.Lenc_key: 21 mov r3,#-1 22 cmp r0,#0 23 beq .Lenc_key_abort 24 cmp r2,#0 25 beq .Lenc_key_abort 26 mov r3,#-2 27 cmp r1,#128 28 blt .Lenc_key_abort 29 cmp r1,#256 30 bgt .Lenc_key_abort 31 tst r1,#0x3f 32 bne .Lenc_key_abort 33 34 adr r3,rcon 35 cmp r1,#192 36 37 veor q0,q0,q0 38 vld1.8 {q3},[r0]! 39 mov r1,#8 @ reuse r1 40 vld1.32 {q1,q2},[r3]! 41 42 blt .Loop128 43 beq .L192 44 b .L256 45 46.align 4 47.Loop128: 48 vtbl.8 d20,{q3},d4 49 vtbl.8 d21,{q3},d5 50 vext.8 q9,q0,q3,#12 51 vst1.32 {q3},[r2]! 52 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 53 subs r1,r1,#1 54 55 veor q3,q3,q9 56 vext.8 q9,q0,q9,#12 57 veor q3,q3,q9 58 vext.8 q9,q0,q9,#12 59 veor q10,q10,q1 60 veor q3,q3,q9 61 vshl.u8 q1,q1,#1 62 veor q3,q3,q10 63 bne .Loop128 64 65 vld1.32 {q1},[r3] 66 67 vtbl.8 d20,{q3},d4 68 vtbl.8 d21,{q3},d5 69 vext.8 q9,q0,q3,#12 70 vst1.32 {q3},[r2]! 71 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 72 73 veor q3,q3,q9 74 vext.8 q9,q0,q9,#12 75 veor q3,q3,q9 76 vext.8 q9,q0,q9,#12 77 veor q10,q10,q1 78 veor q3,q3,q9 79 vshl.u8 q1,q1,#1 80 veor q3,q3,q10 81 82 vtbl.8 d20,{q3},d4 83 vtbl.8 d21,{q3},d5 84 vext.8 q9,q0,q3,#12 85 vst1.32 {q3},[r2]! 86 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 87 88 veor q3,q3,q9 89 vext.8 q9,q0,q9,#12 90 veor q3,q3,q9 91 vext.8 q9,q0,q9,#12 92 veor q10,q10,q1 93 veor q3,q3,q9 94 veor q3,q3,q10 95 vst1.32 {q3},[r2] 96 add r2,r2,#0x50 97 98 mov r12,#10 99 b .Ldone 100 101.align 4 102.L192: 103 vld1.8 {d16},[r0]! 104 vmov.i8 q10,#8 @ borrow q10 105 vst1.32 {q3},[r2]! 106 vsub.i8 q2,q2,q10 @ adjust the mask 107 108.Loop192: 109 vtbl.8 d20,{q8},d4 110 vtbl.8 d21,{q8},d5 111 vext.8 q9,q0,q3,#12 112 vst1.32 {d16},[r2]! 113 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 114 subs r1,r1,#1 115 116 veor q3,q3,q9 117 vext.8 q9,q0,q9,#12 118 veor q3,q3,q9 119 vext.8 q9,q0,q9,#12 120 veor q3,q3,q9 121 122 vdup.32 q9,d7[1] 123 veor q9,q9,q8 124 veor q10,q10,q1 125 vext.8 q8,q0,q8,#12 126 vshl.u8 q1,q1,#1 127 veor q8,q8,q9 128 veor q3,q3,q10 129 veor q8,q8,q10 130 vst1.32 {q3},[r2]! 131 bne .Loop192 132 133 mov r12,#12 134 add r2,r2,#0x20 135 b .Ldone 136 137.align 4 138.L256: 139 vld1.8 {q8},[r0] 140 mov r1,#7 141 mov r12,#14 142 vst1.32 {q3},[r2]! 143 144.Loop256: 145 vtbl.8 d20,{q8},d4 146 vtbl.8 d21,{q8},d5 147 vext.8 q9,q0,q3,#12 148 vst1.32 {q8},[r2]! 149 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 150 subs r1,r1,#1 151 152 veor q3,q3,q9 153 vext.8 q9,q0,q9,#12 154 veor q3,q3,q9 155 vext.8 q9,q0,q9,#12 156 veor q10,q10,q1 157 veor q3,q3,q9 158 vshl.u8 q1,q1,#1 159 veor q3,q3,q10 160 vst1.32 {q3},[r2]! 161 beq .Ldone 162 163 vdup.32 q10,d7[1] 164 vext.8 q9,q0,q8,#12 165 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 166 167 veor q8,q8,q9 168 vext.8 q9,q0,q9,#12 169 veor q8,q8,q9 170 vext.8 q9,q0,q9,#12 171 veor q8,q8,q9 172 173 veor q8,q8,q10 174 b .Loop256 175 176.Ldone: 177 str r12,[r2] 178 mov r3,#0 179 180.Lenc_key_abort: 181 mov r0,r3 @ return value 182 183 bx lr 184.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key 185 186.globl aes_v8_set_decrypt_key 187.type aes_v8_set_decrypt_key,%function 188.align 5 189aes_v8_set_decrypt_key: 190 stmdb sp!,{r4,lr} 191 bl .Lenc_key 192 193 cmp r0,#0 194 bne .Ldec_key_abort 195 196 sub r2,r2,#240 @ restore original r2 197 mov r4,#-16 198 add r0,r2,r12,lsl#4 @ end of key schedule 199 200 vld1.32 {q0},[r2] 201 vld1.32 {q1},[r0] 202 vst1.32 {q0},[r0],r4 203 vst1.32 {q1},[r2]! 204 205.Loop_imc: 206 vld1.32 {q0},[r2] 207 vld1.32 {q1},[r0] 208 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 209 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 210 vst1.32 {q0},[r0],r4 211 vst1.32 {q1},[r2]! 212 cmp r0,r2 213 bhi .Loop_imc 214 215 vld1.32 {q0},[r2] 216 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 217 vst1.32 {q0},[r0] 218 219 eor r0,r0,r0 @ return value 220.Ldec_key_abort: 221 ldmia sp!,{r4,pc} 222.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key 223.globl aes_v8_encrypt 224.type aes_v8_encrypt,%function 225.align 5 226aes_v8_encrypt: 227 ldr r3,[r2,#240] 228 vld1.32 {q0},[r2]! 229 vld1.8 {q2},[r0] 230 sub r3,r3,#2 231 vld1.32 {q1},[r2]! 232 233.Loop_enc: 234 .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 235 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 236 vld1.32 {q0},[r2]! 237 subs r3,r3,#2 238 .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 239 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 240 vld1.32 {q1},[r2]! 241 bgt .Loop_enc 242 243 .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 244 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 245 vld1.32 {q0},[r2] 246 .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 247 veor q2,q2,q0 248 249 vst1.8 {q2},[r1] 250 bx lr 251.size aes_v8_encrypt,.-aes_v8_encrypt 252.globl aes_v8_decrypt 253.type aes_v8_decrypt,%function 254.align 5 255aes_v8_decrypt: 256 ldr r3,[r2,#240] 257 vld1.32 {q0},[r2]! 258 vld1.8 {q2},[r0] 259 sub r3,r3,#2 260 vld1.32 {q1},[r2]! 261 262.Loop_dec: 263 .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 264 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 265 vld1.32 {q0},[r2]! 266 subs r3,r3,#2 267 .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 268 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 269 vld1.32 {q1},[r2]! 270 bgt .Loop_dec 271 272 .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 273 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 274 vld1.32 {q0},[r2] 275 .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 276 veor q2,q2,q0 277 278 vst1.8 {q2},[r1] 279 bx lr 280.size aes_v8_decrypt,.-aes_v8_decrypt 281.globl aes_v8_cbc_encrypt 282.type aes_v8_cbc_encrypt,%function 283.align 5 284aes_v8_cbc_encrypt: 285 mov ip,sp 286 stmdb sp!,{r4-r8,lr} 287 vstmdb sp!,{d8-d15} @ ABI specification says so 288 ldmia ip,{r4-r5} @ load remaining args 289 subs r2,r2,#16 290 mov r8,#16 291 blo .Lcbc_abort 292 moveq r8,#0 293 294 cmp r5,#0 @ en- or decrypting? 295 ldr r5,[r3,#240] 296 and r2,r2,#-16 297 vld1.8 {q6},[r4] 298 vld1.8 {q0},[r0],r8 299 300 vld1.32 {q8-q9},[r3] @ load key schedule... 301 sub r5,r5,#6 302 add r7,r3,r5,lsl#4 @ pointer to last 7 round keys 303 sub r5,r5,#2 304 vld1.32 {q10-q11},[r7]! 305 vld1.32 {q12-q13},[r7]! 306 vld1.32 {q14-q15},[r7]! 307 vld1.32 {q7},[r7] 308 309 add r7,r3,#32 310 mov r6,r5 311 beq .Lcbc_dec 312 313 cmp r5,#2 314 veor q0,q0,q6 315 veor q5,q8,q7 316 beq .Lcbc_enc128 317 318 vld1.32 {q2-q3},[r7] 319 add r7,r3,#16 320 add r6,r3,#16*4 321 add r12,r3,#16*5 322 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 323 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 324 add r14,r3,#16*6 325 add r3,r3,#16*7 326 b .Lenter_cbc_enc 327 328.align 4 329.Loop_cbc_enc: 330 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 331 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 332 vst1.8 {q6},[r1]! 333.Lenter_cbc_enc: 334 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 335 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 336 .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 337 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 338 vld1.32 {q8},[r6] 339 cmp r5,#4 340 .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 341 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 342 vld1.32 {q9},[r12] 343 beq .Lcbc_enc192 344 345 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 346 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 347 vld1.32 {q8},[r14] 348 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 349 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 350 vld1.32 {q9},[r3] 351 nop 352 353.Lcbc_enc192: 354 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 355 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 356 subs r2,r2,#16 357 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 358 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 359 moveq r8,#0 360 .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 361 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 362 .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 363 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 364 vld1.8 {q8},[r0],r8 365 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 366 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 367 veor q8,q8,q5 368 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 369 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 370 vld1.32 {q9},[r7] @ re-pre-load rndkey[1] 371 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 372 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 373 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 374 veor q6,q0,q7 375 bhs .Loop_cbc_enc 376 377 vst1.8 {q6},[r1]! 378 b .Lcbc_done 379 380.align 5 381.Lcbc_enc128: 382 vld1.32 {q2-q3},[r7] 383 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 384 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 385 b .Lenter_cbc_enc128 386.Loop_cbc_enc128: 387 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 388 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 389 vst1.8 {q6},[r1]! 390.Lenter_cbc_enc128: 391 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 392 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 393 subs r2,r2,#16 394 .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 395 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 396 moveq r8,#0 397 .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 398 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 399 .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 400 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 401 .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 402 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 403 vld1.8 {q8},[r0],r8 404 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 405 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 406 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 407 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 408 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 409 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 410 veor q8,q8,q5 411 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 412 veor q6,q0,q7 413 bhs .Loop_cbc_enc128 414 415 vst1.8 {q6},[r1]! 416 b .Lcbc_done 417.align 5 418.Lcbc_dec: 419 vld1.8 {q10},[r0]! 420 subs r2,r2,#32 @ bias 421 add r6,r5,#2 422 vorr q3,q0,q0 423 vorr q1,q0,q0 424 vorr q11,q10,q10 425 blo .Lcbc_dec_tail 426 427 vorr q1,q10,q10 428 vld1.8 {q10},[r0]! 429 vorr q2,q0,q0 430 vorr q3,q1,q1 431 vorr q11,q10,q10 432 433.Loop3x_cbc_dec: 434 .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 435 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 436 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 437 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 438 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 439 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 440 vld1.32 {q8},[r7]! 441 subs r6,r6,#2 442 .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 443 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 444 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 445 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 446 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 447 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 448 vld1.32 {q9},[r7]! 449 bgt .Loop3x_cbc_dec 450 451 .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 452 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 453 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 454 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 455 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 456 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 457 veor q4,q6,q7 458 subs r2,r2,#0x30 459 veor q5,q2,q7 460 movlo r6,r2 @ r6, r6, is zero at this point 461 .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 462 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 463 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 464 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 465 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 466 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 467 veor q9,q3,q7 468 add r0,r0,r6 @ r0 is adjusted in such way that 469 @ at exit from the loop q1-q10 470 @ are loaded with last "words" 471 vorr q6,q11,q11 472 mov r7,r3 473 .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 474 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 475 .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 476 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 477 .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 478 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 479 vld1.8 {q2},[r0]! 480 .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 481 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 482 .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 483 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 484 .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 485 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 486 vld1.8 {q3},[r0]! 487 .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 488 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 489 .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 490 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 491 .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 492 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 493 vld1.8 {q11},[r0]! 494 .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 495 .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 496 .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 497 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 498 add r6,r5,#2 499 veor q4,q4,q0 500 veor q5,q5,q1 501 veor q10,q10,q9 502 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 503 vst1.8 {q4},[r1]! 504 vorr q0,q2,q2 505 vst1.8 {q5},[r1]! 506 vorr q1,q3,q3 507 vst1.8 {q10},[r1]! 508 vorr q10,q11,q11 509 bhs .Loop3x_cbc_dec 510 511 cmn r2,#0x30 512 beq .Lcbc_done 513 nop 514 515.Lcbc_dec_tail: 516 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 517 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 518 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 519 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 520 vld1.32 {q8},[r7]! 521 subs r6,r6,#2 522 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 523 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 524 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 525 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 526 vld1.32 {q9},[r7]! 527 bgt .Lcbc_dec_tail 528 529 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 530 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 531 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 532 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 533 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 534 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 535 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 536 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 537 .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 538 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 539 .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 540 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 541 cmn r2,#0x20 542 .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 543 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 544 .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 545 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 546 veor q5,q6,q7 547 .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 548 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 549 .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 550 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 551 veor q9,q3,q7 552 .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 553 .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 554 beq .Lcbc_dec_one 555 veor q5,q5,q1 556 veor q9,q9,q10 557 vorr q6,q11,q11 558 vst1.8 {q5},[r1]! 559 vst1.8 {q9},[r1]! 560 b .Lcbc_done 561 562.Lcbc_dec_one: 563 veor q5,q5,q10 564 vorr q6,q11,q11 565 vst1.8 {q5},[r1]! 566 567.Lcbc_done: 568 vst1.8 {q6},[r4] 569.Lcbc_abort: 570 vldmia sp!,{d8-d15} 571 ldmia sp!,{r4-r8,pc} 572.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt 573.globl aes_v8_ctr32_encrypt_blocks 574.type aes_v8_ctr32_encrypt_blocks,%function 575.align 5 576aes_v8_ctr32_encrypt_blocks: 577 mov ip,sp 578 stmdb sp!,{r4-r10,lr} 579 vstmdb sp!,{d8-d15} @ ABI specification says so 580 ldr r4, [ip] @ load remaining arg 581 ldr r5,[r3,#240] 582 583 ldr r8, [r4, #12] 584 vld1.32 {q0},[r4] 585 586 vld1.32 {q8-q9},[r3] @ load key schedule... 587 sub r5,r5,#4 588 mov r12,#16 589 cmp r2,#2 590 add r7,r3,r5,lsl#4 @ pointer to last 5 round keys 591 sub r5,r5,#2 592 vld1.32 {q12-q13},[r7]! 593 vld1.32 {q14-q15},[r7]! 594 vld1.32 {q7},[r7] 595 add r7,r3,#32 596 mov r6,r5 597 movlo r12,#0 598#ifndef __ARMEB__ 599 rev r8, r8 600#endif 601 vorr q1,q0,q0 602 add r10, r8, #1 603 vorr q10,q0,q0 604 add r8, r8, #2 605 vorr q6,q0,q0 606 rev r10, r10 607 vmov.32 d3[1],r10 608 bls .Lctr32_tail 609 rev r12, r8 610 sub r2,r2,#3 @ bias 611 vmov.32 d21[1],r12 612 b .Loop3x_ctr32 613 614.align 4 615.Loop3x_ctr32: 616 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 617 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 618 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 619 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 620 .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 621 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 622 vld1.32 {q8},[r7]! 623 subs r6,r6,#2 624 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 625 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 626 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 627 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 628 .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 629 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 630 vld1.32 {q9},[r7]! 631 bgt .Loop3x_ctr32 632 633 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 634 .byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 635 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 636 .byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 637 vld1.8 {q2},[r0]! 638 vorr q0,q6,q6 639 .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 640 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 641 vld1.8 {q3},[r0]! 642 vorr q1,q6,q6 643 .byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 644 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 645 .byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 646 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 647 vld1.8 {q11},[r0]! 648 mov r7,r3 649 .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 650 .byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 651 vorr q10,q6,q6 652 add r9,r8,#1 653 .byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 654 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 655 .byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 656 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 657 veor q2,q2,q7 658 add r10,r8,#2 659 .byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 660 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 661 veor q3,q3,q7 662 add r8,r8,#3 663 .byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 664 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 665 .byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 666 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 667 veor q11,q11,q7 668 rev r9,r9 669 .byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 670 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 671 vmov.32 d1[1], r9 672 rev r10,r10 673 .byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 674 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 675 .byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 676 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 677 vmov.32 d3[1], r10 678 rev r12,r8 679 .byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 680 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 681 vmov.32 d21[1], r12 682 subs r2,r2,#3 683 .byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 684 .byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 685 .byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 686 687 veor q2,q2,q4 688 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 689 vst1.8 {q2},[r1]! 690 veor q3,q3,q5 691 mov r6,r5 692 vst1.8 {q3},[r1]! 693 veor q11,q11,q9 694 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 695 vst1.8 {q11},[r1]! 696 bhs .Loop3x_ctr32 697 698 adds r2,r2,#3 699 beq .Lctr32_done 700 cmp r2,#1 701 mov r12,#16 702 moveq r12,#0 703 704.Lctr32_tail: 705 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 706 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 707 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 708 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 709 vld1.32 {q8},[r7]! 710 subs r6,r6,#2 711 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 712 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 713 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 714 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 715 vld1.32 {q9},[r7]! 716 bgt .Lctr32_tail 717 718 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 719 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 720 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 721 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 722 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 723 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 724 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 725 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 726 vld1.8 {q2},[r0],r12 727 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 728 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 729 .byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 730 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 731 vld1.8 {q3},[r0] 732 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 733 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 734 .byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 735 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 736 veor q2,q2,q7 737 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 738 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 739 .byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 740 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 741 veor q3,q3,q7 742 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 743 .byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 744 745 cmp r2,#1 746 veor q2,q2,q0 747 veor q3,q3,q1 748 vst1.8 {q2},[r1]! 749 beq .Lctr32_done 750 vst1.8 {q3},[r1] 751 752.Lctr32_done: 753 vldmia sp!,{d8-d15} 754 ldmia sp!,{r4-r10,pc} 755.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks 756#endif 757