1183234Ssimon#!/usr/bin/env perl 2183234Ssimon# 3183234Ssimon# ==================================================================== 4183234Ssimon# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5183234Ssimon# project. Rights for redistribution and usage in source and binary 6183234Ssimon# forms are granted according to the OpenSSL license. 7183234Ssimon# ==================================================================== 8183234Ssimon# 9183234Ssimon# Version 1.2. 10183234Ssimon# 11183234Ssimon# aes-*-cbc benchmarks are improved by >70% [compared to gcc 3.3.2 on 12183234Ssimon# Opteron 240 CPU] plus all the bells-n-whistles from 32-bit version 13183234Ssimon# [you'll notice a lot of resemblance], such as compressed S-boxes 14183234Ssimon# in little-endian byte order, prefetch of these tables in CBC mode, 15183234Ssimon# as well as avoiding L1 cache aliasing between stack frame and key 16183234Ssimon# schedule and already mentioned tables, compressed Td4... 17183234Ssimon# 18183234Ssimon# Performance in number of cycles per processed byte for 128-bit key: 19183234Ssimon# 20183234Ssimon# ECB CBC encrypt 21183234Ssimon# AMD64 13.7 13.0(*) 22183234Ssimon# EM64T 20.2 18.6(*) 23183234Ssimon# 24183234Ssimon# (*) CBC benchmarks are better than ECB thanks to custom ABI used 25183234Ssimon# by the private block encryption function. 26183234Ssimon 27183234Ssimon$verticalspin=1; # unlike 32-bit version $verticalspin performs 28183234Ssimon # ~15% better on both AMD and Intel cores 29183234Ssimon$output=shift; 30183234Ssimonopen STDOUT,"| $^X ../perlasm/x86_64-xlate.pl $output"; 31183234Ssimon 32183234Ssimon$code=".text\n"; 33183234Ssimon 34183234Ssimon$s0="%eax"; 35183234Ssimon$s1="%ebx"; 36183234Ssimon$s2="%ecx"; 37183234Ssimon$s3="%edx"; 38183234Ssimon$acc0="%esi"; 39183234Ssimon$acc1="%edi"; 40183234Ssimon$acc2="%ebp"; 41183234Ssimon$inp="%r8"; 42183234Ssimon$out="%r9"; 43183234Ssimon$t0="%r10d"; 44183234Ssimon$t1="%r11d"; 45183234Ssimon$t2="%r12d"; 46183234Ssimon$rnds="%r13d"; 47183234Ssimon$sbox="%r14"; 48183234Ssimon$key="%r15"; 49183234Ssimon 50183234Ssimonsub hi() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1h/; $r; } 51183234Ssimonsub lo() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/; 52183234Ssimon $r =~ s/%[er]([sd]i)/%\1l/; 53183234Ssimon $r =~ s/%(r[0-9]+)[d]?/%\1b/; $r; } 54183234Ssimonsub _data_word() 55183234Ssimon{ my $i; 56183234Ssimon while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; } 57183234Ssimon} 58183234Ssimonsub data_word() 59183234Ssimon{ my $i; 60183234Ssimon my $last=pop(@_); 61183234Ssimon $code.=".long\t"; 62183234Ssimon while(defined($i=shift)) { $code.=sprintf"0x%08x,",$i; } 63183234Ssimon $code.=sprintf"0x%08x\n",$last; 64183234Ssimon} 65183234Ssimon 66183234Ssimonsub data_byte() 67183234Ssimon{ my $i; 68183234Ssimon my $last=pop(@_); 69183234Ssimon $code.=".byte\t"; 70183234Ssimon while(defined($i=shift)) { $code.=sprintf"0x%02x,",$i&0xff; } 71183234Ssimon $code.=sprintf"0x%02x\n",$last&0xff; 72183234Ssimon} 73183234Ssimon 74183234Ssimonsub encvert() 75183234Ssimon{ my $t3="%r8d"; # zaps $inp! 76183234Ssimon 77183234Ssimon$code.=<<___; 78183234Ssimon # favor 3-way issue Opteron pipeline... 79183234Ssimon movzb `&lo("$s0")`,$acc0 80183234Ssimon movzb `&lo("$s1")`,$acc1 81183234Ssimon movzb `&lo("$s2")`,$acc2 82183234Ssimon mov 0($sbox,$acc0,8),$t0 83183234Ssimon mov 0($sbox,$acc1,8),$t1 84183234Ssimon mov 0($sbox,$acc2,8),$t2 85183234Ssimon 86183234Ssimon movzb `&hi("$s1")`,$acc0 87183234Ssimon movzb `&hi("$s2")`,$acc1 88183234Ssimon movzb `&lo("$s3")`,$acc2 89183234Ssimon xor 3($sbox,$acc0,8),$t0 90183234Ssimon xor 3($sbox,$acc1,8),$t1 91183234Ssimon mov 0($sbox,$acc2,8),$t3 92183234Ssimon 93183234Ssimon movzb `&hi("$s3")`,$acc0 94183234Ssimon shr \$16,$s2 95183234Ssimon movzb `&hi("$s0")`,$acc2 96183234Ssimon xor 3($sbox,$acc0,8),$t2 97183234Ssimon shr \$16,$s3 98183234Ssimon xor 3($sbox,$acc2,8),$t3 99183234Ssimon 100183234Ssimon shr \$16,$s1 101183234Ssimon lea 16($key),$key 102183234Ssimon shr \$16,$s0 103183234Ssimon 104183234Ssimon movzb `&lo("$s2")`,$acc0 105183234Ssimon movzb `&lo("$s3")`,$acc1 106183234Ssimon movzb `&lo("$s0")`,$acc2 107183234Ssimon xor 2($sbox,$acc0,8),$t0 108183234Ssimon xor 2($sbox,$acc1,8),$t1 109183234Ssimon xor 2($sbox,$acc2,8),$t2 110183234Ssimon 111183234Ssimon movzb `&hi("$s3")`,$acc0 112183234Ssimon movzb `&hi("$s0")`,$acc1 113183234Ssimon movzb `&lo("$s1")`,$acc2 114183234Ssimon xor 1($sbox,$acc0,8),$t0 115183234Ssimon xor 1($sbox,$acc1,8),$t1 116183234Ssimon xor 2($sbox,$acc2,8),$t3 117183234Ssimon 118183234Ssimon mov 12($key),$s3 119183234Ssimon movzb `&hi("$s1")`,$acc1 120183234Ssimon movzb `&hi("$s2")`,$acc2 121183234Ssimon mov 0($key),$s0 122183234Ssimon xor 1($sbox,$acc1,8),$t2 123183234Ssimon xor 1($sbox,$acc2,8),$t3 124183234Ssimon 125183234Ssimon mov 4($key),$s1 126183234Ssimon mov 8($key),$s2 127183234Ssimon xor $t0,$s0 128183234Ssimon xor $t1,$s1 129183234Ssimon xor $t2,$s2 130183234Ssimon xor $t3,$s3 131183234Ssimon___ 132183234Ssimon} 133183234Ssimon 134183234Ssimonsub enclastvert() 135183234Ssimon{ my $t3="%r8d"; # zaps $inp! 136183234Ssimon 137183234Ssimon$code.=<<___; 138183234Ssimon movzb `&lo("$s0")`,$acc0 139183234Ssimon movzb `&lo("$s1")`,$acc1 140183234Ssimon movzb `&lo("$s2")`,$acc2 141183234Ssimon mov 2($sbox,$acc0,8),$t0 142183234Ssimon mov 2($sbox,$acc1,8),$t1 143183234Ssimon mov 2($sbox,$acc2,8),$t2 144183234Ssimon 145183234Ssimon and \$0x000000ff,$t0 146183234Ssimon and \$0x000000ff,$t1 147183234Ssimon and \$0x000000ff,$t2 148183234Ssimon 149183234Ssimon movzb `&lo("$s3")`,$acc0 150183234Ssimon movzb `&hi("$s1")`,$acc1 151183234Ssimon movzb `&hi("$s2")`,$acc2 152183234Ssimon mov 2($sbox,$acc0,8),$t3 153183234Ssimon mov 0($sbox,$acc1,8),$acc1 #$t0 154183234Ssimon mov 0($sbox,$acc2,8),$acc2 #$t1 155183234Ssimon 156183234Ssimon and \$0x000000ff,$t3 157183234Ssimon and \$0x0000ff00,$acc1 158183234Ssimon and \$0x0000ff00,$acc2 159183234Ssimon 160183234Ssimon xor $acc1,$t0 161183234Ssimon xor $acc2,$t1 162183234Ssimon shr \$16,$s2 163183234Ssimon 164183234Ssimon movzb `&hi("$s3")`,$acc0 165183234Ssimon movzb `&hi("$s0")`,$acc1 166183234Ssimon shr \$16,$s3 167183234Ssimon mov 0($sbox,$acc0,8),$acc0 #$t2 168183234Ssimon mov 0($sbox,$acc1,8),$acc1 #$t3 169183234Ssimon 170183234Ssimon and \$0x0000ff00,$acc0 171183234Ssimon and \$0x0000ff00,$acc1 172183234Ssimon shr \$16,$s1 173183234Ssimon xor $acc0,$t2 174183234Ssimon xor $acc1,$t3 175183234Ssimon shr \$16,$s0 176183234Ssimon 177183234Ssimon movzb `&lo("$s2")`,$acc0 178183234Ssimon movzb `&lo("$s3")`,$acc1 179183234Ssimon movzb `&lo("$s0")`,$acc2 180183234Ssimon mov 0($sbox,$acc0,8),$acc0 #$t0 181183234Ssimon mov 0($sbox,$acc1,8),$acc1 #$t1 182183234Ssimon mov 0($sbox,$acc2,8),$acc2 #$t2 183183234Ssimon 184183234Ssimon and \$0x00ff0000,$acc0 185183234Ssimon and \$0x00ff0000,$acc1 186183234Ssimon and \$0x00ff0000,$acc2 187183234Ssimon 188183234Ssimon xor $acc0,$t0 189183234Ssimon xor $acc1,$t1 190183234Ssimon xor $acc2,$t2 191183234Ssimon 192183234Ssimon movzb `&lo("$s1")`,$acc0 193183234Ssimon movzb `&hi("$s3")`,$acc1 194183234Ssimon movzb `&hi("$s0")`,$acc2 195183234Ssimon mov 0($sbox,$acc0,8),$acc0 #$t3 196183234Ssimon mov 2($sbox,$acc1,8),$acc1 #$t0 197183234Ssimon mov 2($sbox,$acc2,8),$acc2 #$t1 198183234Ssimon 199183234Ssimon and \$0x00ff0000,$acc0 200183234Ssimon and \$0xff000000,$acc1 201183234Ssimon and \$0xff000000,$acc2 202183234Ssimon 203183234Ssimon xor $acc0,$t3 204183234Ssimon xor $acc1,$t0 205183234Ssimon xor $acc2,$t1 206183234Ssimon 207183234Ssimon movzb `&hi("$s1")`,$acc0 208183234Ssimon movzb `&hi("$s2")`,$acc1 209183234Ssimon mov 16+12($key),$s3 210183234Ssimon mov 2($sbox,$acc0,8),$acc0 #$t2 211183234Ssimon mov 2($sbox,$acc1,8),$acc1 #$t3 212183234Ssimon mov 16+0($key),$s0 213183234Ssimon 214183234Ssimon and \$0xff000000,$acc0 215183234Ssimon and \$0xff000000,$acc1 216183234Ssimon 217183234Ssimon xor $acc0,$t2 218183234Ssimon xor $acc1,$t3 219183234Ssimon 220183234Ssimon mov 16+4($key),$s1 221183234Ssimon mov 16+8($key),$s2 222183234Ssimon xor $t0,$s0 223183234Ssimon xor $t1,$s1 224183234Ssimon xor $t2,$s2 225183234Ssimon xor $t3,$s3 226183234Ssimon___ 227183234Ssimon} 228183234Ssimon 229183234Ssimonsub encstep() 230183234Ssimon{ my ($i,@s) = @_; 231183234Ssimon my $tmp0=$acc0; 232183234Ssimon my $tmp1=$acc1; 233183234Ssimon my $tmp2=$acc2; 234183234Ssimon my $out=($t0,$t1,$t2,$s[0])[$i]; 235183234Ssimon 236183234Ssimon if ($i==3) { 237183234Ssimon $tmp0=$s[1]; 238183234Ssimon $tmp1=$s[2]; 239183234Ssimon $tmp2=$s[3]; 240183234Ssimon } 241183234Ssimon $code.=" movzb ".&lo($s[0]).",$out\n"; 242183234Ssimon $code.=" mov $s[2],$tmp1\n" if ($i!=3); 243183234Ssimon $code.=" lea 16($key),$key\n" if ($i==0); 244183234Ssimon 245183234Ssimon $code.=" movzb ".&hi($s[1]).",$tmp0\n"; 246183234Ssimon $code.=" mov 0($sbox,$out,8),$out\n"; 247183234Ssimon 248183234Ssimon $code.=" shr \$16,$tmp1\n"; 249183234Ssimon $code.=" mov $s[3],$tmp2\n" if ($i!=3); 250183234Ssimon $code.=" xor 3($sbox,$tmp0,8),$out\n"; 251183234Ssimon 252183234Ssimon $code.=" movzb ".&lo($tmp1).",$tmp1\n"; 253183234Ssimon $code.=" shr \$24,$tmp2\n"; 254183234Ssimon $code.=" xor 4*$i($key),$out\n"; 255183234Ssimon 256183234Ssimon $code.=" xor 2($sbox,$tmp1,8),$out\n"; 257183234Ssimon $code.=" xor 1($sbox,$tmp2,8),$out\n"; 258183234Ssimon 259183234Ssimon $code.=" mov $t0,$s[1]\n" if ($i==3); 260183234Ssimon $code.=" mov $t1,$s[2]\n" if ($i==3); 261183234Ssimon $code.=" mov $t2,$s[3]\n" if ($i==3); 262183234Ssimon $code.="\n"; 263183234Ssimon} 264183234Ssimon 265183234Ssimonsub enclast() 266183234Ssimon{ my ($i,@s)=@_; 267183234Ssimon my $tmp0=$acc0; 268183234Ssimon my $tmp1=$acc1; 269183234Ssimon my $tmp2=$acc2; 270183234Ssimon my $out=($t0,$t1,$t2,$s[0])[$i]; 271183234Ssimon 272183234Ssimon if ($i==3) { 273183234Ssimon $tmp0=$s[1]; 274183234Ssimon $tmp1=$s[2]; 275183234Ssimon $tmp2=$s[3]; 276183234Ssimon } 277183234Ssimon $code.=" movzb ".&lo($s[0]).",$out\n"; 278183234Ssimon $code.=" mov $s[2],$tmp1\n" if ($i!=3); 279183234Ssimon 280183234Ssimon $code.=" mov 2($sbox,$out,8),$out\n"; 281183234Ssimon $code.=" shr \$16,$tmp1\n"; 282183234Ssimon $code.=" mov $s[3],$tmp2\n" if ($i!=3); 283183234Ssimon 284183234Ssimon $code.=" and \$0x000000ff,$out\n"; 285183234Ssimon $code.=" movzb ".&hi($s[1]).",$tmp0\n"; 286183234Ssimon $code.=" movzb ".&lo($tmp1).",$tmp1\n"; 287183234Ssimon $code.=" shr \$24,$tmp2\n"; 288183234Ssimon 289183234Ssimon $code.=" mov 0($sbox,$tmp0,8),$tmp0\n"; 290183234Ssimon $code.=" mov 0($sbox,$tmp1,8),$tmp1\n"; 291183234Ssimon $code.=" mov 2($sbox,$tmp2,8),$tmp2\n"; 292183234Ssimon 293183234Ssimon $code.=" and \$0x0000ff00,$tmp0\n"; 294183234Ssimon $code.=" and \$0x00ff0000,$tmp1\n"; 295183234Ssimon $code.=" and \$0xff000000,$tmp2\n"; 296183234Ssimon 297183234Ssimon $code.=" xor $tmp0,$out\n"; 298183234Ssimon $code.=" mov $t0,$s[1]\n" if ($i==3); 299183234Ssimon $code.=" xor $tmp1,$out\n"; 300183234Ssimon $code.=" mov $t1,$s[2]\n" if ($i==3); 301183234Ssimon $code.=" xor $tmp2,$out\n"; 302183234Ssimon $code.=" mov $t2,$s[3]\n" if ($i==3); 303183234Ssimon $code.="\n"; 304183234Ssimon} 305183234Ssimon 306183234Ssimon$code.=<<___; 307183234Ssimon.type _x86_64_AES_encrypt,\@abi-omnipotent 308183234Ssimon.align 16 309183234Ssimon_x86_64_AES_encrypt: 310183234Ssimon xor 0($key),$s0 # xor with key 311183234Ssimon xor 4($key),$s1 312183234Ssimon xor 8($key),$s2 313183234Ssimon xor 12($key),$s3 314183234Ssimon 315183234Ssimon mov 240($key),$rnds # load key->rounds 316183234Ssimon sub \$1,$rnds 317183234Ssimon jmp .Lenc_loop 318183234Ssimon.align 16 319183234Ssimon.Lenc_loop: 320183234Ssimon___ 321183234Ssimon if ($verticalspin) { &encvert(); } 322183234Ssimon else { &encstep(0,$s0,$s1,$s2,$s3); 323183234Ssimon &encstep(1,$s1,$s2,$s3,$s0); 324183234Ssimon &encstep(2,$s2,$s3,$s0,$s1); 325183234Ssimon &encstep(3,$s3,$s0,$s1,$s2); 326183234Ssimon } 327183234Ssimon$code.=<<___; 328183234Ssimon sub \$1,$rnds 329183234Ssimon jnz .Lenc_loop 330183234Ssimon___ 331183234Ssimon if ($verticalspin) { &enclastvert(); } 332183234Ssimon else { &enclast(0,$s0,$s1,$s2,$s3); 333183234Ssimon &enclast(1,$s1,$s2,$s3,$s0); 334183234Ssimon &enclast(2,$s2,$s3,$s0,$s1); 335183234Ssimon &enclast(3,$s3,$s0,$s1,$s2); 336183234Ssimon $code.=<<___; 337183234Ssimon xor 16+0($key),$s0 # xor with key 338183234Ssimon xor 16+4($key),$s1 339183234Ssimon xor 16+8($key),$s2 340183234Ssimon xor 16+12($key),$s3 341183234Ssimon___ 342183234Ssimon } 343183234Ssimon$code.=<<___; 344183234Ssimon .byte 0xf3,0xc3 # rep ret 345183234Ssimon.size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt 346183234Ssimon___ 347183234Ssimon 348183234Ssimon# void AES_encrypt (const void *inp,void *out,const AES_KEY *key); 349183234Ssimon$code.=<<___; 350183234Ssimon.globl AES_encrypt 351183234Ssimon.type AES_encrypt,\@function,3 352183234Ssimon.align 16 353183234SsimonAES_encrypt: 354183234Ssimon push %rbx 355183234Ssimon push %rbp 356183234Ssimon push %r12 357183234Ssimon push %r13 358183234Ssimon push %r14 359183234Ssimon push %r15 360183234Ssimon 361183234Ssimon mov %rdx,$key 362183234Ssimon mov %rdi,$inp 363183234Ssimon mov %rsi,$out 364183234Ssimon 365183234Ssimon .picmeup $sbox 366183234Ssimon lea AES_Te-.($sbox),$sbox 367183234Ssimon 368183234Ssimon mov 0($inp),$s0 369183234Ssimon mov 4($inp),$s1 370183234Ssimon mov 8($inp),$s2 371183234Ssimon mov 12($inp),$s3 372183234Ssimon 373183234Ssimon call _x86_64_AES_encrypt 374183234Ssimon 375183234Ssimon mov $s0,0($out) 376183234Ssimon mov $s1,4($out) 377183234Ssimon mov $s2,8($out) 378183234Ssimon mov $s3,12($out) 379183234Ssimon 380183234Ssimon pop %r15 381183234Ssimon pop %r14 382183234Ssimon pop %r13 383183234Ssimon pop %r12 384183234Ssimon pop %rbp 385183234Ssimon pop %rbx 386183234Ssimon ret 387183234Ssimon.size AES_encrypt,.-AES_encrypt 388183234Ssimon___ 389183234Ssimon 390183234Ssimon#------------------------------------------------------------------# 391183234Ssimon 392183234Ssimonsub decvert() 393183234Ssimon{ my $t3="%r8d"; # zaps $inp! 394183234Ssimon 395183234Ssimon$code.=<<___; 396183234Ssimon # favor 3-way issue Opteron pipeline... 397183234Ssimon movzb `&lo("$s0")`,$acc0 398183234Ssimon movzb `&lo("$s1")`,$acc1 399183234Ssimon movzb `&lo("$s2")`,$acc2 400183234Ssimon mov 0($sbox,$acc0,8),$t0 401183234Ssimon mov 0($sbox,$acc1,8),$t1 402183234Ssimon mov 0($sbox,$acc2,8),$t2 403183234Ssimon 404183234Ssimon movzb `&hi("$s3")`,$acc0 405183234Ssimon movzb `&hi("$s0")`,$acc1 406183234Ssimon movzb `&lo("$s3")`,$acc2 407183234Ssimon xor 3($sbox,$acc0,8),$t0 408183234Ssimon xor 3($sbox,$acc1,8),$t1 409183234Ssimon mov 0($sbox,$acc2,8),$t3 410183234Ssimon 411183234Ssimon movzb `&hi("$s1")`,$acc0 412183234Ssimon shr \$16,$s0 413183234Ssimon movzb `&hi("$s2")`,$acc2 414183234Ssimon xor 3($sbox,$acc0,8),$t2 415183234Ssimon shr \$16,$s3 416183234Ssimon xor 3($sbox,$acc2,8),$t3 417183234Ssimon 418183234Ssimon shr \$16,$s1 419183234Ssimon lea 16($key),$key 420183234Ssimon shr \$16,$s2 421183234Ssimon 422183234Ssimon movzb `&lo("$s2")`,$acc0 423183234Ssimon movzb `&lo("$s3")`,$acc1 424183234Ssimon movzb `&lo("$s0")`,$acc2 425183234Ssimon xor 2($sbox,$acc0,8),$t0 426183234Ssimon xor 2($sbox,$acc1,8),$t1 427183234Ssimon xor 2($sbox,$acc2,8),$t2 428183234Ssimon 429183234Ssimon movzb `&hi("$s1")`,$acc0 430183234Ssimon movzb `&hi("$s2")`,$acc1 431183234Ssimon movzb `&lo("$s1")`,$acc2 432183234Ssimon xor 1($sbox,$acc0,8),$t0 433183234Ssimon xor 1($sbox,$acc1,8),$t1 434183234Ssimon xor 2($sbox,$acc2,8),$t3 435183234Ssimon 436183234Ssimon movzb `&hi("$s3")`,$acc0 437183234Ssimon mov 12($key),$s3 438183234Ssimon movzb `&hi("$s0")`,$acc2 439183234Ssimon xor 1($sbox,$acc0,8),$t2 440183234Ssimon mov 0($key),$s0 441183234Ssimon xor 1($sbox,$acc2,8),$t3 442183234Ssimon 443183234Ssimon xor $t0,$s0 444183234Ssimon mov 4($key),$s1 445183234Ssimon mov 8($key),$s2 446183234Ssimon xor $t2,$s2 447183234Ssimon xor $t1,$s1 448183234Ssimon xor $t3,$s3 449183234Ssimon___ 450183234Ssimon} 451183234Ssimon 452183234Ssimonsub declastvert() 453183234Ssimon{ my $t3="%r8d"; # zaps $inp! 454183234Ssimon 455183234Ssimon$code.=<<___; 456183234Ssimon movzb `&lo("$s0")`,$acc0 457183234Ssimon movzb `&lo("$s1")`,$acc1 458183234Ssimon movzb `&lo("$s2")`,$acc2 459183234Ssimon movzb 2048($sbox,$acc0,1),$t0 460183234Ssimon movzb 2048($sbox,$acc1,1),$t1 461183234Ssimon movzb 2048($sbox,$acc2,1),$t2 462183234Ssimon 463183234Ssimon movzb `&lo("$s3")`,$acc0 464183234Ssimon movzb `&hi("$s3")`,$acc1 465183234Ssimon movzb `&hi("$s0")`,$acc2 466183234Ssimon movzb 2048($sbox,$acc0,1),$t3 467183234Ssimon movzb 2048($sbox,$acc1,1),$acc1 #$t0 468183234Ssimon movzb 2048($sbox,$acc2,1),$acc2 #$t1 469183234Ssimon 470183234Ssimon shl \$8,$acc1 471183234Ssimon shl \$8,$acc2 472183234Ssimon 473183234Ssimon xor $acc1,$t0 474183234Ssimon xor $acc2,$t1 475183234Ssimon shr \$16,$s3 476183234Ssimon 477183234Ssimon movzb `&hi("$s1")`,$acc0 478183234Ssimon movzb `&hi("$s2")`,$acc1 479183234Ssimon shr \$16,$s0 480183234Ssimon movzb 2048($sbox,$acc0,1),$acc0 #$t2 481183234Ssimon movzb 2048($sbox,$acc1,1),$acc1 #$t3 482183234Ssimon 483183234Ssimon shl \$8,$acc0 484183234Ssimon shl \$8,$acc1 485183234Ssimon shr \$16,$s1 486183234Ssimon xor $acc0,$t2 487183234Ssimon xor $acc1,$t3 488183234Ssimon shr \$16,$s2 489183234Ssimon 490183234Ssimon movzb `&lo("$s2")`,$acc0 491183234Ssimon movzb `&lo("$s3")`,$acc1 492183234Ssimon movzb `&lo("$s0")`,$acc2 493183234Ssimon movzb 2048($sbox,$acc0,1),$acc0 #$t0 494183234Ssimon movzb 2048($sbox,$acc1,1),$acc1 #$t1 495183234Ssimon movzb 2048($sbox,$acc2,1),$acc2 #$t2 496183234Ssimon 497183234Ssimon shl \$16,$acc0 498183234Ssimon shl \$16,$acc1 499183234Ssimon shl \$16,$acc2 500183234Ssimon 501183234Ssimon xor $acc0,$t0 502183234Ssimon xor $acc1,$t1 503183234Ssimon xor $acc2,$t2 504183234Ssimon 505183234Ssimon movzb `&lo("$s1")`,$acc0 506183234Ssimon movzb `&hi("$s1")`,$acc1 507183234Ssimon movzb `&hi("$s2")`,$acc2 508183234Ssimon movzb 2048($sbox,$acc0,1),$acc0 #$t3 509183234Ssimon movzb 2048($sbox,$acc1,1),$acc1 #$t0 510183234Ssimon movzb 2048($sbox,$acc2,1),$acc2 #$t1 511183234Ssimon 512183234Ssimon shl \$16,$acc0 513183234Ssimon shl \$24,$acc1 514183234Ssimon shl \$24,$acc2 515183234Ssimon 516183234Ssimon xor $acc0,$t3 517183234Ssimon xor $acc1,$t0 518183234Ssimon xor $acc2,$t1 519183234Ssimon 520183234Ssimon movzb `&hi("$s3")`,$acc0 521183234Ssimon movzb `&hi("$s0")`,$acc1 522183234Ssimon mov 16+12($key),$s3 523183234Ssimon movzb 2048($sbox,$acc0,1),$acc0 #$t2 524183234Ssimon movzb 2048($sbox,$acc1,1),$acc1 #$t3 525183234Ssimon mov 16+0($key),$s0 526183234Ssimon 527183234Ssimon shl \$24,$acc0 528183234Ssimon shl \$24,$acc1 529183234Ssimon 530183234Ssimon xor $acc0,$t2 531183234Ssimon xor $acc1,$t3 532183234Ssimon 533183234Ssimon mov 16+4($key),$s1 534183234Ssimon mov 16+8($key),$s2 535183234Ssimon xor $t0,$s0 536183234Ssimon xor $t1,$s1 537183234Ssimon xor $t2,$s2 538183234Ssimon xor $t3,$s3 539183234Ssimon___ 540183234Ssimon} 541183234Ssimon 542183234Ssimonsub decstep() 543183234Ssimon{ my ($i,@s) = @_; 544183234Ssimon my $tmp0=$acc0; 545183234Ssimon my $tmp1=$acc1; 546183234Ssimon my $tmp2=$acc2; 547183234Ssimon my $out=($t0,$t1,$t2,$s[0])[$i]; 548183234Ssimon 549183234Ssimon $code.=" mov $s[0],$out\n" if ($i!=3); 550183234Ssimon $tmp1=$s[2] if ($i==3); 551183234Ssimon $code.=" mov $s[2],$tmp1\n" if ($i!=3); 552183234Ssimon $code.=" and \$0xFF,$out\n"; 553183234Ssimon 554183234Ssimon $code.=" mov 0($sbox,$out,8),$out\n"; 555183234Ssimon $code.=" shr \$16,$tmp1\n"; 556183234Ssimon $tmp2=$s[3] if ($i==3); 557183234Ssimon $code.=" mov $s[3],$tmp2\n" if ($i!=3); 558183234Ssimon 559183234Ssimon $tmp0=$s[1] if ($i==3); 560183234Ssimon $code.=" movzb ".&hi($s[1]).",$tmp0\n"; 561183234Ssimon $code.=" and \$0xFF,$tmp1\n"; 562183234Ssimon $code.=" shr \$24,$tmp2\n"; 563183234Ssimon 564183234Ssimon $code.=" xor 3($sbox,$tmp0,8),$out\n"; 565183234Ssimon $code.=" xor 2($sbox,$tmp1,8),$out\n"; 566183234Ssimon $code.=" xor 1($sbox,$tmp2,8),$out\n"; 567183234Ssimon 568183234Ssimon $code.=" mov $t2,$s[1]\n" if ($i==3); 569183234Ssimon $code.=" mov $t1,$s[2]\n" if ($i==3); 570183234Ssimon $code.=" mov $t0,$s[3]\n" if ($i==3); 571183234Ssimon $code.="\n"; 572183234Ssimon} 573183234Ssimon 574183234Ssimonsub declast() 575183234Ssimon{ my ($i,@s)=@_; 576183234Ssimon my $tmp0=$acc0; 577183234Ssimon my $tmp1=$acc1; 578183234Ssimon my $tmp2=$acc2; 579183234Ssimon my $out=($t0,$t1,$t2,$s[0])[$i]; 580183234Ssimon 581183234Ssimon $code.=" mov $s[0],$out\n" if ($i!=3); 582183234Ssimon $tmp1=$s[2] if ($i==3); 583183234Ssimon $code.=" mov $s[2],$tmp1\n" if ($i!=3); 584183234Ssimon $code.=" and \$0xFF,$out\n"; 585183234Ssimon 586183234Ssimon $code.=" movzb 2048($sbox,$out,1),$out\n"; 587183234Ssimon $code.=" shr \$16,$tmp1\n"; 588183234Ssimon $tmp2=$s[3] if ($i==3); 589183234Ssimon $code.=" mov $s[3],$tmp2\n" if ($i!=3); 590183234Ssimon 591183234Ssimon $tmp0=$s[1] if ($i==3); 592183234Ssimon $code.=" movzb ".&hi($s[1]).",$tmp0\n"; 593183234Ssimon $code.=" and \$0xFF,$tmp1\n"; 594183234Ssimon $code.=" shr \$24,$tmp2\n"; 595183234Ssimon 596183234Ssimon $code.=" movzb 2048($sbox,$tmp0,1),$tmp0\n"; 597183234Ssimon $code.=" movzb 2048($sbox,$tmp1,1),$tmp1\n"; 598183234Ssimon $code.=" movzb 2048($sbox,$tmp2,1),$tmp2\n"; 599183234Ssimon 600183234Ssimon $code.=" shl \$8,$tmp0\n"; 601183234Ssimon $code.=" shl \$16,$tmp1\n"; 602183234Ssimon $code.=" shl \$24,$tmp2\n"; 603183234Ssimon 604183234Ssimon $code.=" xor $tmp0,$out\n"; 605183234Ssimon $code.=" mov $t2,$s[1]\n" if ($i==3); 606183234Ssimon $code.=" xor $tmp1,$out\n"; 607183234Ssimon $code.=" mov $t1,$s[2]\n" if ($i==3); 608183234Ssimon $code.=" xor $tmp2,$out\n"; 609183234Ssimon $code.=" mov $t0,$s[3]\n" if ($i==3); 610183234Ssimon $code.="\n"; 611183234Ssimon} 612183234Ssimon 613183234Ssimon$code.=<<___; 614183234Ssimon.type _x86_64_AES_decrypt,\@abi-omnipotent 615183234Ssimon.align 16 616183234Ssimon_x86_64_AES_decrypt: 617183234Ssimon xor 0($key),$s0 # xor with key 618183234Ssimon xor 4($key),$s1 619183234Ssimon xor 8($key),$s2 620183234Ssimon xor 12($key),$s3 621183234Ssimon 622183234Ssimon mov 240($key),$rnds # load key->rounds 623183234Ssimon sub \$1,$rnds 624183234Ssimon jmp .Ldec_loop 625183234Ssimon.align 16 626183234Ssimon.Ldec_loop: 627183234Ssimon___ 628183234Ssimon if ($verticalspin) { &decvert(); } 629183234Ssimon else { &decstep(0,$s0,$s3,$s2,$s1); 630183234Ssimon &decstep(1,$s1,$s0,$s3,$s2); 631183234Ssimon &decstep(2,$s2,$s1,$s0,$s3); 632183234Ssimon &decstep(3,$s3,$s2,$s1,$s0); 633183234Ssimon $code.=<<___; 634183234Ssimon lea 16($key),$key 635183234Ssimon xor 0($key),$s0 # xor with key 636183234Ssimon xor 4($key),$s1 637183234Ssimon xor 8($key),$s2 638183234Ssimon xor 12($key),$s3 639183234Ssimon___ 640183234Ssimon } 641183234Ssimon$code.=<<___; 642183234Ssimon sub \$1,$rnds 643183234Ssimon jnz .Ldec_loop 644183234Ssimon___ 645183234Ssimon if ($verticalspin) { &declastvert(); } 646183234Ssimon else { &declast(0,$s0,$s3,$s2,$s1); 647183234Ssimon &declast(1,$s1,$s0,$s3,$s2); 648183234Ssimon &declast(2,$s2,$s1,$s0,$s3); 649183234Ssimon &declast(3,$s3,$s2,$s1,$s0); 650183234Ssimon $code.=<<___; 651183234Ssimon xor 16+0($key),$s0 # xor with key 652183234Ssimon xor 16+4($key),$s1 653183234Ssimon xor 16+8($key),$s2 654183234Ssimon xor 16+12($key),$s3 655183234Ssimon___ 656183234Ssimon } 657183234Ssimon$code.=<<___; 658183234Ssimon .byte 0xf3,0xc3 # rep ret 659183234Ssimon.size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt 660183234Ssimon___ 661183234Ssimon 662183234Ssimon# void AES_decrypt (const void *inp,void *out,const AES_KEY *key); 663183234Ssimon$code.=<<___; 664183234Ssimon.globl AES_decrypt 665183234Ssimon.type AES_decrypt,\@function,3 666183234Ssimon.align 16 667183234SsimonAES_decrypt: 668183234Ssimon push %rbx 669183234Ssimon push %rbp 670183234Ssimon push %r12 671183234Ssimon push %r13 672183234Ssimon push %r14 673183234Ssimon push %r15 674183234Ssimon 675183234Ssimon mov %rdx,$key 676183234Ssimon mov %rdi,$inp 677183234Ssimon mov %rsi,$out 678183234Ssimon 679183234Ssimon .picmeup $sbox 680183234Ssimon lea AES_Td-.($sbox),$sbox 681183234Ssimon 682183234Ssimon # prefetch Td4 683183234Ssimon lea 2048+128($sbox),$sbox; 684183234Ssimon mov 0-128($sbox),$s0 685183234Ssimon mov 32-128($sbox),$s1 686183234Ssimon mov 64-128($sbox),$s2 687183234Ssimon mov 96-128($sbox),$s3 688183234Ssimon mov 128-128($sbox),$s0 689183234Ssimon mov 160-128($sbox),$s1 690183234Ssimon mov 192-128($sbox),$s2 691183234Ssimon mov 224-128($sbox),$s3 692183234Ssimon lea -2048-128($sbox),$sbox; 693183234Ssimon 694183234Ssimon mov 0($inp),$s0 695183234Ssimon mov 4($inp),$s1 696183234Ssimon mov 8($inp),$s2 697183234Ssimon mov 12($inp),$s3 698183234Ssimon 699183234Ssimon call _x86_64_AES_decrypt 700183234Ssimon 701183234Ssimon mov $s0,0($out) 702183234Ssimon mov $s1,4($out) 703183234Ssimon mov $s2,8($out) 704183234Ssimon mov $s3,12($out) 705183234Ssimon 706183234Ssimon pop %r15 707183234Ssimon pop %r14 708183234Ssimon pop %r13 709183234Ssimon pop %r12 710183234Ssimon pop %rbp 711183234Ssimon pop %rbx 712183234Ssimon ret 713183234Ssimon.size AES_decrypt,.-AES_decrypt 714183234Ssimon___ 715183234Ssimon#------------------------------------------------------------------# 716183234Ssimon 717183234Ssimonsub enckey() 718183234Ssimon{ 719183234Ssimon$code.=<<___; 720183234Ssimon movz %dl,%esi # rk[i]>>0 721183234Ssimon mov 2(%rbp,%rsi,8),%ebx 722183234Ssimon movz %dh,%esi # rk[i]>>8 723183234Ssimon and \$0xFF000000,%ebx 724183234Ssimon xor %ebx,%eax 725183234Ssimon 726183234Ssimon mov 2(%rbp,%rsi,8),%ebx 727183234Ssimon shr \$16,%edx 728183234Ssimon and \$0x000000FF,%ebx 729183234Ssimon movz %dl,%esi # rk[i]>>16 730183234Ssimon xor %ebx,%eax 731183234Ssimon 732183234Ssimon mov 0(%rbp,%rsi,8),%ebx 733183234Ssimon movz %dh,%esi # rk[i]>>24 734183234Ssimon and \$0x0000FF00,%ebx 735183234Ssimon xor %ebx,%eax 736183234Ssimon 737183234Ssimon mov 0(%rbp,%rsi,8),%ebx 738183234Ssimon and \$0x00FF0000,%ebx 739183234Ssimon xor %ebx,%eax 740183234Ssimon 741183234Ssimon xor 2048(%rbp,%rcx,4),%eax # rcon 742183234Ssimon___ 743183234Ssimon} 744183234Ssimon 745183234Ssimon# int AES_set_encrypt_key(const unsigned char *userKey, const int bits, 746183234Ssimon# AES_KEY *key) 747183234Ssimon$code.=<<___; 748183234Ssimon.globl AES_set_encrypt_key 749183234Ssimon.type AES_set_encrypt_key,\@function,3 750183234Ssimon.align 16 751183234SsimonAES_set_encrypt_key: 752183234Ssimon push %rbx 753183234Ssimon push %rbp 754215697Ssimon sub \$8,%rsp 755183234Ssimon 756215697Ssimon call _x86_64_AES_set_encrypt_key 757215697Ssimon 758215697Ssimon mov 8(%rsp),%rbp 759215697Ssimon mov 16(%rsp),%rbx 760215697Ssimon add \$24,%rsp 761215697Ssimon ret 762215697Ssimon.size AES_set_encrypt_key,.-AES_set_encrypt_key 763215697Ssimon 764215697Ssimon.type _x86_64_AES_set_encrypt_key,\@abi-omnipotent 765215697Ssimon.align 16 766215697Ssimon_x86_64_AES_set_encrypt_key: 767183234Ssimon mov %esi,%ecx # %ecx=bits 768183234Ssimon mov %rdi,%rsi # %rsi=userKey 769183234Ssimon mov %rdx,%rdi # %rdi=key 770183234Ssimon 771183234Ssimon test \$-1,%rsi 772183234Ssimon jz .Lbadpointer 773183234Ssimon test \$-1,%rdi 774183234Ssimon jz .Lbadpointer 775183234Ssimon 776183234Ssimon .picmeup %rbp 777183234Ssimon lea AES_Te-.(%rbp),%rbp 778183234Ssimon 779183234Ssimon cmp \$128,%ecx 780183234Ssimon je .L10rounds 781183234Ssimon cmp \$192,%ecx 782183234Ssimon je .L12rounds 783183234Ssimon cmp \$256,%ecx 784183234Ssimon je .L14rounds 785183234Ssimon mov \$-2,%rax # invalid number of bits 786183234Ssimon jmp .Lexit 787183234Ssimon 788183234Ssimon.L10rounds: 789183234Ssimon mov 0(%rsi),%eax # copy first 4 dwords 790183234Ssimon mov 4(%rsi),%ebx 791183234Ssimon mov 8(%rsi),%ecx 792183234Ssimon mov 12(%rsi),%edx 793183234Ssimon mov %eax,0(%rdi) 794183234Ssimon mov %ebx,4(%rdi) 795183234Ssimon mov %ecx,8(%rdi) 796183234Ssimon mov %edx,12(%rdi) 797183234Ssimon 798183234Ssimon xor %ecx,%ecx 799183234Ssimon jmp .L10shortcut 800183234Ssimon.align 4 801183234Ssimon.L10loop: 802183234Ssimon mov 0(%rdi),%eax # rk[0] 803183234Ssimon mov 12(%rdi),%edx # rk[3] 804183234Ssimon.L10shortcut: 805183234Ssimon___ 806183234Ssimon &enckey (); 807183234Ssimon$code.=<<___; 808183234Ssimon mov %eax,16(%rdi) # rk[4] 809183234Ssimon xor 4(%rdi),%eax 810183234Ssimon mov %eax,20(%rdi) # rk[5] 811183234Ssimon xor 8(%rdi),%eax 812183234Ssimon mov %eax,24(%rdi) # rk[6] 813183234Ssimon xor 12(%rdi),%eax 814183234Ssimon mov %eax,28(%rdi) # rk[7] 815183234Ssimon add \$1,%ecx 816183234Ssimon lea 16(%rdi),%rdi 817183234Ssimon cmp \$10,%ecx 818183234Ssimon jl .L10loop 819183234Ssimon 820183234Ssimon movl \$10,80(%rdi) # setup number of rounds 821183234Ssimon xor %rax,%rax 822183234Ssimon jmp .Lexit 823183234Ssimon 824183234Ssimon.L12rounds: 825183234Ssimon mov 0(%rsi),%eax # copy first 6 dwords 826183234Ssimon mov 4(%rsi),%ebx 827183234Ssimon mov 8(%rsi),%ecx 828183234Ssimon mov 12(%rsi),%edx 829183234Ssimon mov %eax,0(%rdi) 830183234Ssimon mov %ebx,4(%rdi) 831183234Ssimon mov %ecx,8(%rdi) 832183234Ssimon mov %edx,12(%rdi) 833183234Ssimon mov 16(%rsi),%ecx 834183234Ssimon mov 20(%rsi),%edx 835183234Ssimon mov %ecx,16(%rdi) 836183234Ssimon mov %edx,20(%rdi) 837183234Ssimon 838183234Ssimon xor %ecx,%ecx 839183234Ssimon jmp .L12shortcut 840183234Ssimon.align 4 841183234Ssimon.L12loop: 842183234Ssimon mov 0(%rdi),%eax # rk[0] 843183234Ssimon mov 20(%rdi),%edx # rk[5] 844183234Ssimon.L12shortcut: 845183234Ssimon___ 846183234Ssimon &enckey (); 847183234Ssimon$code.=<<___; 848183234Ssimon mov %eax,24(%rdi) # rk[6] 849183234Ssimon xor 4(%rdi),%eax 850183234Ssimon mov %eax,28(%rdi) # rk[7] 851183234Ssimon xor 8(%rdi),%eax 852183234Ssimon mov %eax,32(%rdi) # rk[8] 853183234Ssimon xor 12(%rdi),%eax 854183234Ssimon mov %eax,36(%rdi) # rk[9] 855183234Ssimon 856183234Ssimon cmp \$7,%ecx 857183234Ssimon je .L12break 858183234Ssimon add \$1,%ecx 859183234Ssimon 860183234Ssimon xor 16(%rdi),%eax 861183234Ssimon mov %eax,40(%rdi) # rk[10] 862183234Ssimon xor 20(%rdi),%eax 863183234Ssimon mov %eax,44(%rdi) # rk[11] 864183234Ssimon 865183234Ssimon lea 24(%rdi),%rdi 866183234Ssimon jmp .L12loop 867183234Ssimon.L12break: 868183234Ssimon movl \$12,72(%rdi) # setup number of rounds 869183234Ssimon xor %rax,%rax 870183234Ssimon jmp .Lexit 871183234Ssimon 872183234Ssimon.L14rounds: 873183234Ssimon mov 0(%rsi),%eax # copy first 8 dwords 874183234Ssimon mov 4(%rsi),%ebx 875183234Ssimon mov 8(%rsi),%ecx 876183234Ssimon mov 12(%rsi),%edx 877183234Ssimon mov %eax,0(%rdi) 878183234Ssimon mov %ebx,4(%rdi) 879183234Ssimon mov %ecx,8(%rdi) 880183234Ssimon mov %edx,12(%rdi) 881183234Ssimon mov 16(%rsi),%eax 882183234Ssimon mov 20(%rsi),%ebx 883183234Ssimon mov 24(%rsi),%ecx 884183234Ssimon mov 28(%rsi),%edx 885183234Ssimon mov %eax,16(%rdi) 886183234Ssimon mov %ebx,20(%rdi) 887183234Ssimon mov %ecx,24(%rdi) 888183234Ssimon mov %edx,28(%rdi) 889183234Ssimon 890183234Ssimon xor %ecx,%ecx 891183234Ssimon jmp .L14shortcut 892183234Ssimon.align 4 893183234Ssimon.L14loop: 894183234Ssimon mov 28(%rdi),%edx # rk[4] 895183234Ssimon.L14shortcut: 896183234Ssimon mov 0(%rdi),%eax # rk[0] 897183234Ssimon___ 898183234Ssimon &enckey (); 899183234Ssimon$code.=<<___; 900183234Ssimon mov %eax,32(%rdi) # rk[8] 901183234Ssimon xor 4(%rdi),%eax 902183234Ssimon mov %eax,36(%rdi) # rk[9] 903183234Ssimon xor 8(%rdi),%eax 904183234Ssimon mov %eax,40(%rdi) # rk[10] 905183234Ssimon xor 12(%rdi),%eax 906183234Ssimon mov %eax,44(%rdi) # rk[11] 907183234Ssimon 908183234Ssimon cmp \$6,%ecx 909183234Ssimon je .L14break 910183234Ssimon add \$1,%ecx 911183234Ssimon 912183234Ssimon mov %eax,%edx 913183234Ssimon mov 16(%rdi),%eax # rk[4] 914183234Ssimon movz %dl,%esi # rk[11]>>0 915183234Ssimon mov 2(%rbp,%rsi,8),%ebx 916183234Ssimon movz %dh,%esi # rk[11]>>8 917183234Ssimon and \$0x000000FF,%ebx 918183234Ssimon xor %ebx,%eax 919183234Ssimon 920183234Ssimon mov 0(%rbp,%rsi,8),%ebx 921183234Ssimon shr \$16,%edx 922183234Ssimon and \$0x0000FF00,%ebx 923183234Ssimon movz %dl,%esi # rk[11]>>16 924183234Ssimon xor %ebx,%eax 925183234Ssimon 926183234Ssimon mov 0(%rbp,%rsi,8),%ebx 927183234Ssimon movz %dh,%esi # rk[11]>>24 928183234Ssimon and \$0x00FF0000,%ebx 929183234Ssimon xor %ebx,%eax 930183234Ssimon 931183234Ssimon mov 2(%rbp,%rsi,8),%ebx 932183234Ssimon and \$0xFF000000,%ebx 933183234Ssimon xor %ebx,%eax 934183234Ssimon 935183234Ssimon mov %eax,48(%rdi) # rk[12] 936183234Ssimon xor 20(%rdi),%eax 937183234Ssimon mov %eax,52(%rdi) # rk[13] 938183234Ssimon xor 24(%rdi),%eax 939183234Ssimon mov %eax,56(%rdi) # rk[14] 940183234Ssimon xor 28(%rdi),%eax 941183234Ssimon mov %eax,60(%rdi) # rk[15] 942183234Ssimon 943183234Ssimon lea 32(%rdi),%rdi 944183234Ssimon jmp .L14loop 945183234Ssimon.L14break: 946183234Ssimon movl \$14,48(%rdi) # setup number of rounds 947183234Ssimon xor %rax,%rax 948183234Ssimon jmp .Lexit 949183234Ssimon 950183234Ssimon.Lbadpointer: 951183234Ssimon mov \$-1,%rax 952183234Ssimon.Lexit: 953215697Ssimon .byte 0xf3,0xc3 # rep ret 954215697Ssimon.size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key 955183234Ssimon___ 956183234Ssimon 957183234Ssimonsub deckey() 958183234Ssimon{ my ($i,$ptr,$te,$td) = @_; 959183234Ssimon$code.=<<___; 960183234Ssimon mov $i($ptr),%eax 961183234Ssimon mov %eax,%edx 962183234Ssimon movz %ah,%ebx 963183234Ssimon shr \$16,%edx 964183234Ssimon and \$0xFF,%eax 965183234Ssimon movzb 2($te,%rax,8),%rax 966183234Ssimon movzb 2($te,%rbx,8),%rbx 967183234Ssimon mov 0($td,%rax,8),%eax 968183234Ssimon xor 3($td,%rbx,8),%eax 969183234Ssimon movzb %dh,%ebx 970183234Ssimon and \$0xFF,%edx 971183234Ssimon movzb 2($te,%rdx,8),%rdx 972183234Ssimon movzb 2($te,%rbx,8),%rbx 973183234Ssimon xor 2($td,%rdx,8),%eax 974183234Ssimon xor 1($td,%rbx,8),%eax 975183234Ssimon mov %eax,$i($ptr) 976183234Ssimon___ 977183234Ssimon} 978183234Ssimon 979183234Ssimon# int AES_set_decrypt_key(const unsigned char *userKey, const int bits, 980183234Ssimon# AES_KEY *key) 981183234Ssimon$code.=<<___; 982183234Ssimon.globl AES_set_decrypt_key 983183234Ssimon.type AES_set_decrypt_key,\@function,3 984183234Ssimon.align 16 985183234SsimonAES_set_decrypt_key: 986215697Ssimon push %rbx 987215697Ssimon push %rbp 988215697Ssimon push %rdx # save key schedule 989215697Ssimon 990215697Ssimon call _x86_64_AES_set_encrypt_key 991215697Ssimon mov (%rsp),%r8 # restore key schedule 992183234Ssimon cmp \$0,%eax 993215697Ssimon jne .Labort 994183234Ssimon 995183234Ssimon mov 240(%r8),%ecx # pull number of rounds 996183234Ssimon xor %rdi,%rdi 997183234Ssimon lea (%rdi,%rcx,4),%rcx 998183234Ssimon mov %r8,%rsi 999183234Ssimon lea (%r8,%rcx,4),%rdi # pointer to last chunk 1000183234Ssimon.align 4 1001183234Ssimon.Linvert: 1002183234Ssimon mov 0(%rsi),%rax 1003183234Ssimon mov 8(%rsi),%rbx 1004183234Ssimon mov 0(%rdi),%rcx 1005183234Ssimon mov 8(%rdi),%rdx 1006183234Ssimon mov %rax,0(%rdi) 1007183234Ssimon mov %rbx,8(%rdi) 1008183234Ssimon mov %rcx,0(%rsi) 1009183234Ssimon mov %rdx,8(%rsi) 1010183234Ssimon lea 16(%rsi),%rsi 1011183234Ssimon lea -16(%rdi),%rdi 1012183234Ssimon cmp %rsi,%rdi 1013183234Ssimon jne .Linvert 1014183234Ssimon 1015183234Ssimon .picmeup %r9 1016183234Ssimon lea AES_Td-.(%r9),%rdi 1017183234Ssimon lea AES_Te-AES_Td(%rdi),%r9 1018183234Ssimon 1019183234Ssimon mov %r8,%rsi 1020183234Ssimon mov 240(%r8),%ecx # pull number of rounds 1021183234Ssimon sub \$1,%ecx 1022183234Ssimon.align 4 1023183234Ssimon.Lpermute: 1024183234Ssimon lea 16(%rsi),%rsi 1025183234Ssimon___ 1026183234Ssimon &deckey (0,"%rsi","%r9","%rdi"); 1027183234Ssimon &deckey (4,"%rsi","%r9","%rdi"); 1028183234Ssimon &deckey (8,"%rsi","%r9","%rdi"); 1029183234Ssimon &deckey (12,"%rsi","%r9","%rdi"); 1030183234Ssimon$code.=<<___; 1031183234Ssimon sub \$1,%ecx 1032183234Ssimon jnz .Lpermute 1033183234Ssimon 1034183234Ssimon xor %rax,%rax 1035215697Ssimon.Labort: 1036215697Ssimon mov 8(%rsp),%rbp 1037215697Ssimon mov 16(%rsp),%rbx 1038215697Ssimon add \$24,%rsp 1039183234Ssimon ret 1040183234Ssimon.size AES_set_decrypt_key,.-AES_set_decrypt_key 1041183234Ssimon___ 1042183234Ssimon 1043183234Ssimon# void AES_cbc_encrypt (const void char *inp, unsigned char *out, 1044183234Ssimon# size_t length, const AES_KEY *key, 1045183234Ssimon# unsigned char *ivp,const int enc); 1046183234Ssimon{ 1047183234Ssimon# stack frame layout 1048183234Ssimon# -8(%rsp) return address 1049183234Ssimonmy $_rsp="0(%rsp)"; # saved %rsp 1050183234Ssimonmy $_len="8(%rsp)"; # copy of 3rd parameter, length 1051183234Ssimonmy $_key="16(%rsp)"; # copy of 4th parameter, key 1052183234Ssimonmy $_ivp="24(%rsp)"; # copy of 5th parameter, ivp 1053183234Ssimonmy $keyp="32(%rsp)"; # one to pass as $key 1054183234Ssimonmy $ivec="40(%rsp)"; # ivec[16] 1055183234Ssimonmy $aes_key="56(%rsp)"; # copy of aes_key 1056183234Ssimonmy $mark="56+240(%rsp)"; # copy of aes_key->rounds 1057183234Ssimon 1058183234Ssimon$code.=<<___; 1059183234Ssimon.globl AES_cbc_encrypt 1060183234Ssimon.type AES_cbc_encrypt,\@function,6 1061183234Ssimon.align 16 1062183234SsimonAES_cbc_encrypt: 1063183234Ssimon cmp \$0,%rdx # check length 1064183234Ssimon je .Lcbc_just_ret 1065183234Ssimon push %rbx 1066183234Ssimon push %rbp 1067183234Ssimon push %r12 1068183234Ssimon push %r13 1069183234Ssimon push %r14 1070183234Ssimon push %r15 1071183234Ssimon pushfq 1072183234Ssimon cld 1073183234Ssimon mov %r9d,%r9d # clear upper half of enc 1074183234Ssimon 1075183234Ssimon .picmeup $sbox 1076183234Ssimon.Lcbc_pic_point: 1077183234Ssimon 1078183234Ssimon cmp \$0,%r9 1079183234Ssimon je .LDECRYPT 1080183234Ssimon 1081183234Ssimon lea AES_Te-.Lcbc_pic_point($sbox),$sbox 1082183234Ssimon 1083183234Ssimon # allocate aligned stack frame... 1084183234Ssimon lea -64-248(%rsp),$key 1085183234Ssimon and \$-64,$key 1086183234Ssimon 1087183234Ssimon # ... and make it doesn't alias with AES_Te modulo 4096 1088183234Ssimon mov $sbox,%r10 1089183234Ssimon lea 2048($sbox),%r11 1090183234Ssimon mov $key,%r12 1091183234Ssimon and \$0xFFF,%r10 # s = $sbox&0xfff 1092183234Ssimon and \$0xFFF,%r11 # e = ($sbox+2048)&0xfff 1093183234Ssimon and \$0xFFF,%r12 # p = %rsp&0xfff 1094183234Ssimon 1095183234Ssimon cmp %r11,%r12 # if (p=>e) %rsp =- (p-e); 1096183234Ssimon jb .Lcbc_te_break_out 1097183234Ssimon sub %r11,%r12 1098183234Ssimon sub %r12,$key 1099183234Ssimon jmp .Lcbc_te_ok 1100183234Ssimon.Lcbc_te_break_out: # else %rsp -= (p-s)&0xfff + framesz 1101183234Ssimon sub %r10,%r12 1102183234Ssimon and \$0xFFF,%r12 1103183234Ssimon add \$320,%r12 1104183234Ssimon sub %r12,$key 1105183234Ssimon.align 4 1106183234Ssimon.Lcbc_te_ok: 1107183234Ssimon 1108183234Ssimon xchg %rsp,$key 1109183234Ssimon add \$8,%rsp # reserve for return address! 1110183234Ssimon mov $key,$_rsp # save %rsp 1111183234Ssimon mov %rdx,$_len # save copy of len 1112183234Ssimon mov %rcx,$_key # save copy of key 1113183234Ssimon mov %r8,$_ivp # save copy of ivp 1114183234Ssimon movl \$0,$mark # copy of aes_key->rounds = 0; 1115183234Ssimon mov %r8,%rbp # rearrange input arguments 1116183234Ssimon mov %rsi,$out 1117183234Ssimon mov %rdi,$inp 1118183234Ssimon mov %rcx,$key 1119183234Ssimon 1120183234Ssimon # do we copy key schedule to stack? 1121183234Ssimon mov $key,%r10 1122183234Ssimon sub $sbox,%r10 1123183234Ssimon and \$0xfff,%r10 1124183234Ssimon cmp \$2048,%r10 1125183234Ssimon jb .Lcbc_do_ecopy 1126183234Ssimon cmp \$4096-248,%r10 1127183234Ssimon jb .Lcbc_skip_ecopy 1128183234Ssimon.align 4 1129183234Ssimon.Lcbc_do_ecopy: 1130183234Ssimon mov $key,%rsi 1131183234Ssimon lea $aes_key,%rdi 1132183234Ssimon lea $aes_key,$key 1133183234Ssimon mov \$240/8,%ecx 1134183234Ssimon .long 0x90A548F3 # rep movsq 1135183234Ssimon mov (%rsi),%eax # copy aes_key->rounds 1136183234Ssimon mov %eax,(%rdi) 1137183234Ssimon.Lcbc_skip_ecopy: 1138183234Ssimon mov $key,$keyp # save key pointer 1139183234Ssimon 1140183234Ssimon mov \$16,%ecx 1141183234Ssimon.align 4 1142183234Ssimon.Lcbc_prefetch_te: 1143183234Ssimon mov 0($sbox),%r10 1144183234Ssimon mov 32($sbox),%r11 1145183234Ssimon mov 64($sbox),%r12 1146183234Ssimon mov 96($sbox),%r13 1147183234Ssimon lea 128($sbox),$sbox 1148183234Ssimon sub \$1,%ecx 1149183234Ssimon jnz .Lcbc_prefetch_te 1150183234Ssimon sub \$2048,$sbox 1151183234Ssimon 1152183234Ssimon test \$-16,%rdx # check upon length 1153183234Ssimon mov %rdx,%r10 1154183234Ssimon mov 0(%rbp),$s0 # load iv 1155183234Ssimon mov 4(%rbp),$s1 1156183234Ssimon mov 8(%rbp),$s2 1157183234Ssimon mov 12(%rbp),$s3 1158183234Ssimon jz .Lcbc_enc_tail # short input... 1159183234Ssimon 1160183234Ssimon.align 4 1161183234Ssimon.Lcbc_enc_loop: 1162183234Ssimon xor 0($inp),$s0 1163183234Ssimon xor 4($inp),$s1 1164183234Ssimon xor 8($inp),$s2 1165183234Ssimon xor 12($inp),$s3 1166183234Ssimon mov $inp,$ivec # if ($verticalspin) save inp 1167183234Ssimon 1168183234Ssimon mov $keyp,$key # restore key 1169183234Ssimon call _x86_64_AES_encrypt 1170183234Ssimon 1171183234Ssimon mov $ivec,$inp # if ($verticalspin) restore inp 1172183234Ssimon mov $s0,0($out) 1173183234Ssimon mov $s1,4($out) 1174183234Ssimon mov $s2,8($out) 1175183234Ssimon mov $s3,12($out) 1176183234Ssimon 1177183234Ssimon mov $_len,%r10 1178183234Ssimon lea 16($inp),$inp 1179183234Ssimon lea 16($out),$out 1180183234Ssimon sub \$16,%r10 1181183234Ssimon test \$-16,%r10 1182183234Ssimon mov %r10,$_len 1183183234Ssimon jnz .Lcbc_enc_loop 1184183234Ssimon test \$15,%r10 1185183234Ssimon jnz .Lcbc_enc_tail 1186183234Ssimon mov $_ivp,%rbp # restore ivp 1187183234Ssimon mov $s0,0(%rbp) # save ivec 1188183234Ssimon mov $s1,4(%rbp) 1189183234Ssimon mov $s2,8(%rbp) 1190183234Ssimon mov $s3,12(%rbp) 1191183234Ssimon 1192183234Ssimon.align 4 1193183234Ssimon.Lcbc_cleanup: 1194183234Ssimon cmpl \$0,$mark # was the key schedule copied? 1195183234Ssimon lea $aes_key,%rdi 1196183234Ssimon je .Lcbc_exit 1197183234Ssimon mov \$240/8,%ecx 1198183234Ssimon xor %rax,%rax 1199183234Ssimon .long 0x90AB48F3 # rep stosq 1200183234Ssimon.Lcbc_exit: 1201205128Ssimon mov $_rsp,%rsp 1202183234Ssimon popfq 1203183234Ssimon pop %r15 1204183234Ssimon pop %r14 1205183234Ssimon pop %r13 1206183234Ssimon pop %r12 1207183234Ssimon pop %rbp 1208183234Ssimon pop %rbx 1209183234Ssimon.Lcbc_just_ret: 1210183234Ssimon ret 1211183234Ssimon.align 4 1212183234Ssimon.Lcbc_enc_tail: 1213193645Ssimon mov %rax,%r11 1214193645Ssimon mov %rcx,%r12 1215183234Ssimon mov %r10,%rcx 1216183234Ssimon mov $inp,%rsi 1217183234Ssimon mov $out,%rdi 1218183234Ssimon .long 0xF689A4F3 # rep movsb 1219183234Ssimon mov \$16,%rcx # zero tail 1220183234Ssimon sub %r10,%rcx 1221183234Ssimon xor %rax,%rax 1222183234Ssimon .long 0xF689AAF3 # rep stosb 1223183234Ssimon mov $out,$inp # this is not a mistake! 1224183234Ssimon movq \$16,$_len # len=16 1225193645Ssimon mov %r11,%rax 1226193645Ssimon mov %r12,%rcx 1227183234Ssimon jmp .Lcbc_enc_loop # one more spin... 1228183234Ssimon#----------------------------- DECRYPT -----------------------------# 1229183234Ssimon.align 16 1230183234Ssimon.LDECRYPT: 1231183234Ssimon lea AES_Td-.Lcbc_pic_point($sbox),$sbox 1232183234Ssimon 1233183234Ssimon # allocate aligned stack frame... 1234183234Ssimon lea -64-248(%rsp),$key 1235183234Ssimon and \$-64,$key 1236183234Ssimon 1237183234Ssimon # ... and make it doesn't alias with AES_Td modulo 4096 1238183234Ssimon mov $sbox,%r10 1239183234Ssimon lea 2304($sbox),%r11 1240183234Ssimon mov $key,%r12 1241183234Ssimon and \$0xFFF,%r10 # s = $sbox&0xfff 1242183234Ssimon and \$0xFFF,%r11 # e = ($sbox+2048+256)&0xfff 1243183234Ssimon and \$0xFFF,%r12 # p = %rsp&0xfff 1244183234Ssimon 1245183234Ssimon cmp %r11,%r12 # if (p=>e) %rsp =- (p-e); 1246183234Ssimon jb .Lcbc_td_break_out 1247183234Ssimon sub %r11,%r12 1248183234Ssimon sub %r12,$key 1249183234Ssimon jmp .Lcbc_td_ok 1250183234Ssimon.Lcbc_td_break_out: # else %rsp -= (p-s)&0xfff + framesz 1251183234Ssimon sub %r10,%r12 1252183234Ssimon and \$0xFFF,%r12 1253183234Ssimon add \$320,%r12 1254183234Ssimon sub %r12,$key 1255183234Ssimon.align 4 1256183234Ssimon.Lcbc_td_ok: 1257183234Ssimon 1258183234Ssimon xchg %rsp,$key 1259183234Ssimon add \$8,%rsp # reserve for return address! 1260183234Ssimon mov $key,$_rsp # save %rsp 1261183234Ssimon mov %rdx,$_len # save copy of len 1262183234Ssimon mov %rcx,$_key # save copy of key 1263183234Ssimon mov %r8,$_ivp # save copy of ivp 1264183234Ssimon movl \$0,$mark # copy of aes_key->rounds = 0; 1265183234Ssimon mov %r8,%rbp # rearrange input arguments 1266183234Ssimon mov %rsi,$out 1267183234Ssimon mov %rdi,$inp 1268183234Ssimon mov %rcx,$key 1269183234Ssimon 1270183234Ssimon # do we copy key schedule to stack? 1271183234Ssimon mov $key,%r10 1272183234Ssimon sub $sbox,%r10 1273183234Ssimon and \$0xfff,%r10 1274183234Ssimon cmp \$2304,%r10 1275183234Ssimon jb .Lcbc_do_dcopy 1276183234Ssimon cmp \$4096-248,%r10 1277183234Ssimon jb .Lcbc_skip_dcopy 1278183234Ssimon.align 4 1279183234Ssimon.Lcbc_do_dcopy: 1280183234Ssimon mov $key,%rsi 1281183234Ssimon lea $aes_key,%rdi 1282183234Ssimon lea $aes_key,$key 1283183234Ssimon mov \$240/8,%ecx 1284183234Ssimon .long 0x90A548F3 # rep movsq 1285183234Ssimon mov (%rsi),%eax # copy aes_key->rounds 1286183234Ssimon mov %eax,(%rdi) 1287183234Ssimon.Lcbc_skip_dcopy: 1288183234Ssimon mov $key,$keyp # save key pointer 1289183234Ssimon 1290183234Ssimon mov \$18,%ecx 1291183234Ssimon.align 4 1292183234Ssimon.Lcbc_prefetch_td: 1293183234Ssimon mov 0($sbox),%r10 1294183234Ssimon mov 32($sbox),%r11 1295183234Ssimon mov 64($sbox),%r12 1296183234Ssimon mov 96($sbox),%r13 1297183234Ssimon lea 128($sbox),$sbox 1298183234Ssimon sub \$1,%ecx 1299183234Ssimon jnz .Lcbc_prefetch_td 1300183234Ssimon sub \$2304,$sbox 1301183234Ssimon 1302183234Ssimon cmp $inp,$out 1303183234Ssimon je .Lcbc_dec_in_place 1304183234Ssimon 1305183234Ssimon mov %rbp,$ivec 1306183234Ssimon.align 4 1307183234Ssimon.Lcbc_dec_loop: 1308183234Ssimon mov 0($inp),$s0 # read input 1309183234Ssimon mov 4($inp),$s1 1310183234Ssimon mov 8($inp),$s2 1311183234Ssimon mov 12($inp),$s3 1312183234Ssimon mov $inp,8+$ivec # if ($verticalspin) save inp 1313183234Ssimon 1314183234Ssimon mov $keyp,$key # restore key 1315183234Ssimon call _x86_64_AES_decrypt 1316183234Ssimon 1317183234Ssimon mov $ivec,%rbp # load ivp 1318183234Ssimon mov 8+$ivec,$inp # if ($verticalspin) restore inp 1319183234Ssimon xor 0(%rbp),$s0 # xor iv 1320183234Ssimon xor 4(%rbp),$s1 1321183234Ssimon xor 8(%rbp),$s2 1322183234Ssimon xor 12(%rbp),$s3 1323183234Ssimon mov $inp,%rbp # current input, next iv 1324183234Ssimon 1325183234Ssimon mov $_len,%r10 # load len 1326183234Ssimon sub \$16,%r10 1327183234Ssimon jc .Lcbc_dec_partial 1328183234Ssimon mov %r10,$_len # update len 1329183234Ssimon mov %rbp,$ivec # update ivp 1330183234Ssimon 1331183234Ssimon mov $s0,0($out) # write output 1332183234Ssimon mov $s1,4($out) 1333183234Ssimon mov $s2,8($out) 1334183234Ssimon mov $s3,12($out) 1335183234Ssimon 1336183234Ssimon lea 16($inp),$inp 1337183234Ssimon lea 16($out),$out 1338183234Ssimon jnz .Lcbc_dec_loop 1339183234Ssimon.Lcbc_dec_end: 1340183234Ssimon mov $_ivp,%r12 # load user ivp 1341183234Ssimon mov 0(%rbp),%r10 # load iv 1342183234Ssimon mov 8(%rbp),%r11 1343183234Ssimon mov %r10,0(%r12) # copy back to user 1344183234Ssimon mov %r11,8(%r12) 1345183234Ssimon jmp .Lcbc_cleanup 1346183234Ssimon 1347183234Ssimon.align 4 1348183234Ssimon.Lcbc_dec_partial: 1349183234Ssimon mov $s0,0+$ivec # dump output to stack 1350183234Ssimon mov $s1,4+$ivec 1351183234Ssimon mov $s2,8+$ivec 1352183234Ssimon mov $s3,12+$ivec 1353183234Ssimon mov $out,%rdi 1354183234Ssimon lea $ivec,%rsi 1355183234Ssimon mov \$16,%rcx 1356183234Ssimon add %r10,%rcx # number of bytes to copy 1357183234Ssimon .long 0xF689A4F3 # rep movsb 1358183234Ssimon jmp .Lcbc_dec_end 1359183234Ssimon 1360183234Ssimon.align 16 1361183234Ssimon.Lcbc_dec_in_place: 1362183234Ssimon mov 0($inp),$s0 # load input 1363183234Ssimon mov 4($inp),$s1 1364183234Ssimon mov 8($inp),$s2 1365183234Ssimon mov 12($inp),$s3 1366183234Ssimon 1367183234Ssimon mov $inp,$ivec # if ($verticalspin) save inp 1368183234Ssimon mov $keyp,$key 1369183234Ssimon call _x86_64_AES_decrypt 1370183234Ssimon 1371183234Ssimon mov $ivec,$inp # if ($verticalspin) restore inp 1372183234Ssimon mov $_ivp,%rbp 1373183234Ssimon xor 0(%rbp),$s0 1374183234Ssimon xor 4(%rbp),$s1 1375183234Ssimon xor 8(%rbp),$s2 1376183234Ssimon xor 12(%rbp),$s3 1377183234Ssimon 1378183234Ssimon mov 0($inp),%r10 # copy input to iv 1379183234Ssimon mov 8($inp),%r11 1380183234Ssimon mov %r10,0(%rbp) 1381183234Ssimon mov %r11,8(%rbp) 1382183234Ssimon 1383183234Ssimon mov $s0,0($out) # save output [zaps input] 1384183234Ssimon mov $s1,4($out) 1385183234Ssimon mov $s2,8($out) 1386183234Ssimon mov $s3,12($out) 1387183234Ssimon 1388183234Ssimon mov $_len,%rcx 1389183234Ssimon lea 16($inp),$inp 1390183234Ssimon lea 16($out),$out 1391183234Ssimon sub \$16,%rcx 1392183234Ssimon jc .Lcbc_dec_in_place_partial 1393183234Ssimon mov %rcx,$_len 1394183234Ssimon jnz .Lcbc_dec_in_place 1395183234Ssimon jmp .Lcbc_cleanup 1396183234Ssimon 1397183234Ssimon.align 4 1398183234Ssimon.Lcbc_dec_in_place_partial: 1399183234Ssimon # one can argue if this is actually required 1400183234Ssimon lea ($out,%rcx),%rdi 1401183234Ssimon lea (%rbp,%rcx),%rsi 1402183234Ssimon neg %rcx 1403183234Ssimon .long 0xF689A4F3 # rep movsb # restore tail 1404183234Ssimon jmp .Lcbc_cleanup 1405183234Ssimon.size AES_cbc_encrypt,.-AES_cbc_encrypt 1406183234Ssimon___ 1407183234Ssimon} 1408183234Ssimon 1409183234Ssimon$code.=<<___; 1410183234Ssimon.globl AES_Te 1411183234Ssimon.align 64 1412183234SsimonAES_Te: 1413183234Ssimon___ 1414183234Ssimon &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6); 1415183234Ssimon &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591); 1416183234Ssimon &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56); 1417183234Ssimon &_data_word(0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec); 1418183234Ssimon &_data_word(0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa); 1419183234Ssimon &_data_word(0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb); 1420183234Ssimon &_data_word(0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45); 1421183234Ssimon &_data_word(0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b); 1422183234Ssimon &_data_word(0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c); 1423183234Ssimon &_data_word(0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83); 1424183234Ssimon &_data_word(0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9); 1425183234Ssimon &_data_word(0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a); 1426183234Ssimon &_data_word(0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d); 1427183234Ssimon &_data_word(0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f); 1428183234Ssimon &_data_word(0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df); 1429183234Ssimon &_data_word(0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea); 1430183234Ssimon &_data_word(0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34); 1431183234Ssimon &_data_word(0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b); 1432183234Ssimon &_data_word(0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d); 1433183234Ssimon &_data_word(0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413); 1434183234Ssimon &_data_word(0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1); 1435183234Ssimon &_data_word(0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6); 1436183234Ssimon &_data_word(0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972); 1437183234Ssimon &_data_word(0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85); 1438183234Ssimon &_data_word(0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed); 1439183234Ssimon &_data_word(0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511); 1440183234Ssimon &_data_word(0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe); 1441183234Ssimon &_data_word(0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b); 1442183234Ssimon &_data_word(0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05); 1443183234Ssimon &_data_word(0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1); 1444183234Ssimon &_data_word(0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142); 1445183234Ssimon &_data_word(0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf); 1446183234Ssimon &_data_word(0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3); 1447183234Ssimon &_data_word(0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e); 1448183234Ssimon &_data_word(0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a); 1449183234Ssimon &_data_word(0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6); 1450183234Ssimon &_data_word(0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3); 1451183234Ssimon &_data_word(0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b); 1452183234Ssimon &_data_word(0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428); 1453183234Ssimon &_data_word(0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad); 1454183234Ssimon &_data_word(0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14); 1455183234Ssimon &_data_word(0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8); 1456183234Ssimon &_data_word(0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4); 1457183234Ssimon &_data_word(0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2); 1458183234Ssimon &_data_word(0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda); 1459183234Ssimon &_data_word(0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949); 1460183234Ssimon &_data_word(0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf); 1461183234Ssimon &_data_word(0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810); 1462183234Ssimon &_data_word(0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c); 1463183234Ssimon &_data_word(0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697); 1464183234Ssimon &_data_word(0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e); 1465183234Ssimon &_data_word(0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f); 1466183234Ssimon &_data_word(0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc); 1467183234Ssimon &_data_word(0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c); 1468183234Ssimon &_data_word(0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969); 1469183234Ssimon &_data_word(0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27); 1470183234Ssimon &_data_word(0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122); 1471183234Ssimon &_data_word(0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433); 1472183234Ssimon &_data_word(0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9); 1473183234Ssimon &_data_word(0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5); 1474183234Ssimon &_data_word(0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a); 1475183234Ssimon &_data_word(0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0); 1476183234Ssimon &_data_word(0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e); 1477183234Ssimon &_data_word(0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c); 1478183234Ssimon#rcon: 1479183234Ssimon$code.=<<___; 1480183234Ssimon .long 0x00000001, 0x00000002, 0x00000004, 0x00000008 1481183234Ssimon .long 0x00000010, 0x00000020, 0x00000040, 0x00000080 1482183234Ssimon .long 0x0000001b, 0x00000036, 0, 0, 0, 0, 0, 0 1483183234Ssimon___ 1484183234Ssimon$code.=<<___; 1485183234Ssimon.globl AES_Td 1486183234Ssimon.align 64 1487183234SsimonAES_Td: 1488183234Ssimon___ 1489183234Ssimon &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a); 1490183234Ssimon &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b); 1491183234Ssimon &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5); 1492183234Ssimon &_data_word(0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5); 1493183234Ssimon &_data_word(0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d); 1494183234Ssimon &_data_word(0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b); 1495183234Ssimon &_data_word(0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295); 1496183234Ssimon &_data_word(0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e); 1497183234Ssimon &_data_word(0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927); 1498183234Ssimon &_data_word(0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d); 1499183234Ssimon &_data_word(0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362); 1500183234Ssimon &_data_word(0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9); 1501183234Ssimon &_data_word(0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52); 1502183234Ssimon &_data_word(0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566); 1503183234Ssimon &_data_word(0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3); 1504183234Ssimon &_data_word(0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed); 1505183234Ssimon &_data_word(0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e); 1506183234Ssimon &_data_word(0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4); 1507183234Ssimon &_data_word(0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4); 1508183234Ssimon &_data_word(0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd); 1509183234Ssimon &_data_word(0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d); 1510183234Ssimon &_data_word(0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060); 1511183234Ssimon &_data_word(0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967); 1512183234Ssimon &_data_word(0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879); 1513183234Ssimon &_data_word(0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000); 1514183234Ssimon &_data_word(0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c); 1515183234Ssimon &_data_word(0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36); 1516183234Ssimon &_data_word(0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624); 1517183234Ssimon &_data_word(0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b); 1518183234Ssimon &_data_word(0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c); 1519183234Ssimon &_data_word(0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12); 1520183234Ssimon &_data_word(0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14); 1521183234Ssimon &_data_word(0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3); 1522183234Ssimon &_data_word(0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b); 1523183234Ssimon &_data_word(0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8); 1524183234Ssimon &_data_word(0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684); 1525183234Ssimon &_data_word(0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7); 1526183234Ssimon &_data_word(0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177); 1527183234Ssimon &_data_word(0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947); 1528183234Ssimon &_data_word(0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322); 1529183234Ssimon &_data_word(0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498); 1530183234Ssimon &_data_word(0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f); 1531183234Ssimon &_data_word(0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54); 1532183234Ssimon &_data_word(0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382); 1533183234Ssimon &_data_word(0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf); 1534183234Ssimon &_data_word(0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb); 1535183234Ssimon &_data_word(0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83); 1536183234Ssimon &_data_word(0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef); 1537183234Ssimon &_data_word(0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029); 1538183234Ssimon &_data_word(0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235); 1539183234Ssimon &_data_word(0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733); 1540183234Ssimon &_data_word(0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117); 1541183234Ssimon &_data_word(0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4); 1542183234Ssimon &_data_word(0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546); 1543183234Ssimon &_data_word(0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb); 1544183234Ssimon &_data_word(0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d); 1545183234Ssimon &_data_word(0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb); 1546183234Ssimon &_data_word(0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a); 1547183234Ssimon &_data_word(0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773); 1548183234Ssimon &_data_word(0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478); 1549183234Ssimon &_data_word(0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2); 1550183234Ssimon &_data_word(0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff); 1551183234Ssimon &_data_word(0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664); 1552183234Ssimon &_data_word(0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0); 1553183234Ssimon#Td4: 1554183234Ssimon &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38); 1555183234Ssimon &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb); 1556183234Ssimon &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87); 1557183234Ssimon &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb); 1558183234Ssimon &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d); 1559183234Ssimon &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e); 1560183234Ssimon &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2); 1561183234Ssimon &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25); 1562183234Ssimon &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16); 1563183234Ssimon &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92); 1564183234Ssimon &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda); 1565183234Ssimon &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84); 1566183234Ssimon &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a); 1567183234Ssimon &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06); 1568183234Ssimon &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02); 1569183234Ssimon &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b); 1570183234Ssimon &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea); 1571183234Ssimon &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73); 1572183234Ssimon &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85); 1573183234Ssimon &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e); 1574183234Ssimon &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89); 1575183234Ssimon &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b); 1576183234Ssimon &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20); 1577183234Ssimon &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4); 1578183234Ssimon &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31); 1579183234Ssimon &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f); 1580183234Ssimon &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d); 1581183234Ssimon &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef); 1582183234Ssimon &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0); 1583183234Ssimon &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61); 1584183234Ssimon &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26); 1585183234Ssimon &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d); 1586183234Ssimon 1587183234Ssimon$code =~ s/\`([^\`]*)\`/eval($1)/gem; 1588183234Ssimon 1589183234Ssimonprint $code; 1590183234Ssimon 1591183234Ssimonclose STDOUT; 1592