1160814Ssimon// ==================================================================== 2160814Ssimon// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 3160814Ssimon// project. Rights for redistribution and usage in source and binary 4160814Ssimon// forms are granted according to the OpenSSL license. 5160814Ssimon// ==================================================================== 6160814Ssimon// 7160814Ssimon// What's wrong with compiler generated code? Compiler never uses 8160814Ssimon// variable 'shr' which is pairable with 'extr'/'dep' instructions. 9160814Ssimon// Then it uses 'zxt' which is an I-type, but can be replaced with 10160814Ssimon// 'and' which in turn can be assigned to M-port [there're double as 11160814Ssimon// much M-ports as there're I-ports on Itanium 2]. By sacrificing few 12160814Ssimon// registers for small constants (255, 24 and 16) to be used with 13160814Ssimon// 'shr' and 'and' instructions I can achieve better ILP, Intruction 14160814Ssimon// Level Parallelism, and performance. This code outperforms GCC 3.3 15160814Ssimon// generated code by over factor of 2 (two), GCC 3.4 - by 70% and 16160814Ssimon// HP C - by 40%. Measured best-case scenario, i.e. aligned 17160814Ssimon// big-endian input, ECB timing on Itanium 2 is (18 + 13*rounds) 18160814Ssimon// ticks per block, or 9.25 CPU cycles per byte for 128 bit key. 19160814Ssimon 20194206Ssimon// Version 1.2 mitigates the hazard of cache-timing attacks by 21194206Ssimon// a) compressing S-boxes from 8KB to 2KB+256B, b) scheduling 22194206Ssimon// references to S-boxes for L2 cache latency, c) prefetching T[ed]4 23194206Ssimon// prior last round. As result performance dropped to (26 + 15*rounds) 24194206Ssimon// ticks per block or 11 cycles per byte processed with 128-bit key. 25194206Ssimon// This is ~16% deterioration. For reference Itanium 2 L1 cache has 26194206Ssimon// 64 bytes line size and L2 - 128 bytes... 27194206Ssimon 28194206Ssimon.ident "aes-ia64.S, version 1.2" 29160814Ssimon.ident "IA-64 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" 30160814Ssimon.explicit 31160814Ssimon.text 32160814Ssimon 33160814Ssimonrk0=r8; rk1=r9; 34160814Ssimon 35194206Ssimonpfssave=r2; 36194206Ssimonlcsave=r10; 37194206Ssimonprsave=r3; 38160814Ssimonmaskff=r11; 39160814Ssimontwenty4=r14; 40160814Ssimonsixteen=r15; 41160814Ssimon 42160814Ssimonte00=r16; te11=r17; te22=r18; te33=r19; 43160814Ssimonte01=r20; te12=r21; te23=r22; te30=r23; 44160814Ssimonte02=r24; te13=r25; te20=r26; te31=r27; 45160814Ssimonte03=r28; te10=r29; te21=r30; te32=r31; 46160814Ssimon 47160814Ssimon// these are rotating... 48160814Ssimont0=r32; s0=r33; 49160814Ssimont1=r34; s1=r35; 50160814Ssimont2=r36; s2=r37; 51160814Ssimont3=r38; s3=r39; 52160814Ssimon 53160814Ssimonte0=r40; te1=r41; te2=r42; te3=r43; 54160814Ssimon 55160814Ssimon#if defined(_HPUX_SOURCE) && !defined(_LP64) 56160814Ssimon# define ADDP addp4 57160814Ssimon#else 58160814Ssimon# define ADDP add 59160814Ssimon#endif 60160814Ssimon 61194206Ssimon// Offsets from Te0 62194206Ssimon#define TE0 0 63194206Ssimon#define TE2 2 64194206Ssimon#if defined(_HPUX_SOURCE) || defined(B_ENDIAN) 65194206Ssimon#define TE1 3 66194206Ssimon#define TE3 1 67194206Ssimon#else 68194206Ssimon#define TE1 1 69194206Ssimon#define TE3 3 70194206Ssimon#endif 71194206Ssimon 72160814Ssimon// This implies that AES_KEY comprises 32-bit key schedule elements 73160814Ssimon// even on LP64 platforms. 74160814Ssimon#ifndef KSZ 75160814Ssimon# define KSZ 4 76160814Ssimon# define LDKEY ld4 77160814Ssimon#endif 78160814Ssimon 79160814Ssimon.proc _ia64_AES_encrypt# 80160814Ssimon// Input: rk0-rk1 81160814Ssimon// te0 82160814Ssimon// te3 as AES_KEY->rounds!!! 83160814Ssimon// s0-s3 84160814Ssimon// maskff,twenty4,sixteen 85160814Ssimon// Output: r16,r20,r24,r28 as s0-s3 86160814Ssimon// Clobber: r16-r31,rk0-rk1,r32-r43 87160814Ssimon.align 32 88160814Ssimon_ia64_AES_encrypt: 89194206Ssimon .prologue 90194206Ssimon .altrp b6 91194206Ssimon .body 92160814Ssimon{ .mmi; alloc r16=ar.pfs,12,0,0,8 93160814Ssimon LDKEY t0=[rk0],2*KSZ 94160814Ssimon mov pr.rot=1<<16 } 95160814Ssimon{ .mmi; LDKEY t1=[rk1],2*KSZ 96194206Ssimon add te1=TE1,te0 97160814Ssimon add te3=-3,te3 };; 98160814Ssimon{ .mib; LDKEY t2=[rk0],2*KSZ 99194206Ssimon mov ar.ec=2 } 100160814Ssimon{ .mib; LDKEY t3=[rk1],2*KSZ 101194206Ssimon add te2=TE2,te0 102160814Ssimon brp.loop.imp .Le_top,.Le_end-16 };; 103160814Ssimon 104160814Ssimon{ .mmi; xor s0=s0,t0 105160814Ssimon xor s1=s1,t1 106160814Ssimon mov ar.lc=te3 } 107160814Ssimon{ .mmi; xor s2=s2,t2 108160814Ssimon xor s3=s3,t3 109194206Ssimon add te3=TE3,te0 };; 110194206Ssimon 111160814Ssimon.align 32 112160814Ssimon.Le_top: 113160814Ssimon{ .mmi; (p0) LDKEY t0=[rk0],2*KSZ // 0/0:rk[0] 114160814Ssimon (p0) and te33=s3,maskff // 0/0:s3&0xff 115160814Ssimon (p0) extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff 116160814Ssimon{ .mmi; (p0) LDKEY t1=[rk1],2*KSZ // 0/1:rk[1] 117160814Ssimon (p0) and te30=s0,maskff // 0/1:s0&0xff 118160814Ssimon (p0) shr.u te00=s0,twenty4 };; // 0/0:s0>>24 119160814Ssimon{ .mmi; (p0) LDKEY t2=[rk0],2*KSZ // 1/2:rk[2] 120194206Ssimon (p0) shladd te33=te33,3,te3 // 1/0:te0+s0>>24 121160814Ssimon (p0) extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff 122160814Ssimon{ .mmi; (p0) LDKEY t3=[rk1],2*KSZ // 1/3:rk[3] 123194206Ssimon (p0) shladd te30=te30,3,te3 // 1/1:te3+s0 124160814Ssimon (p0) shr.u te01=s1,twenty4 };; // 1/1:s1>>24 125160814Ssimon{ .mmi; (p0) ld4 te33=[te33] // 2/0:te3[s3&0xff] 126194206Ssimon (p0) shladd te22=te22,3,te2 // 2/0:te2+s2>>8&0xff 127160814Ssimon (p0) extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff 128160814Ssimon{ .mmi; (p0) ld4 te30=[te30] // 2/1:te3[s0] 129194206Ssimon (p0) shladd te23=te23,3,te2 // 2/1:te2+s3>>8 130160814Ssimon (p0) shr.u te02=s2,twenty4 };; // 2/2:s2>>24 131160814Ssimon{ .mmi; (p0) ld4 te22=[te22] // 3/0:te2[s2>>8] 132194206Ssimon (p0) shladd te20=te20,3,te2 // 3/2:te2+s0>>8 133160814Ssimon (p0) extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff 134160814Ssimon{ .mmi; (p0) ld4 te23=[te23] // 3/1:te2[s3>>8] 135194206Ssimon (p0) shladd te00=te00,3,te0 // 3/0:te0+s0>>24 136160814Ssimon (p0) shr.u te03=s3,twenty4 };; // 3/3:s3>>24 137160814Ssimon{ .mmi; (p0) ld4 te20=[te20] // 4/2:te2[s0>>8] 138194206Ssimon (p0) shladd te21=te21,3,te2 // 4/3:te3+s2 139160814Ssimon (p0) extr.u te11=s1,16,8 } // 4/0:s1>>16&0xff 140160814Ssimon{ .mmi; (p0) ld4 te00=[te00] // 4/0:te0[s0>>24] 141194206Ssimon (p0) shladd te01=te01,3,te0 // 4/1:te0+s1>>24 142160814Ssimon (p0) shr.u te13=s3,sixteen };; // 4/2:s3>>16 143160814Ssimon{ .mmi; (p0) ld4 te21=[te21] // 5/3:te2[s1>>8] 144194206Ssimon (p0) shladd te11=te11,3,te1 // 5/0:te1+s1>>16 145160814Ssimon (p0) extr.u te12=s2,16,8 } // 5/1:s2>>16&0xff 146160814Ssimon{ .mmi; (p0) ld4 te01=[te01] // 5/1:te0[s1>>24] 147194206Ssimon (p0) shladd te02=te02,3,te0 // 5/2:te0+s2>>24 148160814Ssimon (p0) and te31=s1,maskff };; // 5/2:s1&0xff 149160814Ssimon{ .mmi; (p0) ld4 te11=[te11] // 6/0:te1[s1>>16] 150194206Ssimon (p0) shladd te12=te12,3,te1 // 6/1:te1+s2>>16 151160814Ssimon (p0) extr.u te10=s0,16,8 } // 6/3:s0>>16&0xff 152160814Ssimon{ .mmi; (p0) ld4 te02=[te02] // 6/2:te0[s2>>24] 153194206Ssimon (p0) shladd te03=te03,3,te0 // 6/3:te1+s0>>16 154160814Ssimon (p0) and te32=s2,maskff };; // 6/3:s2&0xff 155194206Ssimon 156160814Ssimon{ .mmi; (p0) ld4 te12=[te12] // 7/1:te1[s2>>16] 157194206Ssimon (p0) shladd te31=te31,3,te3 // 7/2:te3+s1&0xff 158160814Ssimon (p0) and te13=te13,maskff} // 7/2:s3>>16&0xff 159160814Ssimon{ .mmi; (p0) ld4 te03=[te03] // 7/3:te0[s3>>24] 160194206Ssimon (p0) shladd te32=te32,3,te3 // 7/3:te3+s2 161160814Ssimon (p0) xor t0=t0,te33 };; // 7/0: 162160814Ssimon{ .mmi; (p0) ld4 te31=[te31] // 8/2:te3[s1] 163194206Ssimon (p0) shladd te13=te13,3,te1 // 8/2:te1+s3>>16 164160814Ssimon (p0) xor t0=t0,te22 } // 8/0: 165160814Ssimon{ .mmi; (p0) ld4 te32=[te32] // 8/3:te3[s2] 166194206Ssimon (p0) shladd te10=te10,3,te1 // 8/3:te1+s0>>16 167160814Ssimon (p0) xor t1=t1,te30 };; // 8/1: 168160814Ssimon{ .mmi; (p0) ld4 te13=[te13] // 9/2:te1[s3>>16] 169194206Ssimon (p0) ld4 te10=[te10] // 9/3:te1[s0>>16] 170194206Ssimon (p0) xor t0=t0,te00 };; // 9/0: !L2 scheduling 171194206Ssimon{ .mmi; (p0) xor t1=t1,te23 // 10[9]/1: 172194206Ssimon (p0) xor t2=t2,te20 // 10[9]/2: 173194206Ssimon (p0) xor t3=t3,te21 };; // 10[9]/3: 174194206Ssimon{ .mmi; (p0) xor t0=t0,te11 // 11[10]/0:done! 175194206Ssimon (p0) xor t1=t1,te01 // 11[10]/1: 176194206Ssimon (p0) xor t2=t2,te02 };; // 11[10]/2: !L2 scheduling 177194206Ssimon{ .mmi; (p0) xor t3=t3,te03 // 12[10]/3: 178194206Ssimon (p16) cmp.eq p0,p17=r0,r0 };; // 12[10]/clear (p17) 179194206Ssimon{ .mmi; (p0) xor t1=t1,te12 // 13[11]/1:done! 180194206Ssimon (p0) xor t2=t2,te31 // 13[11]/2: 181194206Ssimon (p0) xor t3=t3,te32 } // 13[11]/3: 182194206Ssimon{ .mmi; (p17) add te0=2048,te0 // 13[11]/ 183194206Ssimon (p17) add te1=2048+64-TE1,te1};; // 13[11]/ 184194206Ssimon{ .mib; (p0) xor t2=t2,te13 // 14[12]/2:done! 185194206Ssimon (p17) add te2=2048+128-TE2,te2} // 14[12]/ 186194206Ssimon{ .mib; (p0) xor t3=t3,te10 // 14[12]/3:done! 187194206Ssimon (p17) add te3=2048+192-TE3,te3 // 14[12]/ 188160814Ssimon br.ctop.sptk .Le_top };; 189160814Ssimon.Le_end: 190194206Ssimon 191194206Ssimon 192194206Ssimon{ .mmi; ld8 te12=[te0] // prefetch Te4 193194206Ssimon ld8 te31=[te1] } 194194206Ssimon{ .mmi; ld8 te10=[te2] 195194206Ssimon ld8 te32=[te3] } 196194206Ssimon 197194206Ssimon{ .mmi; LDKEY t0=[rk0],2*KSZ // 0/0:rk[0] 198194206Ssimon and te33=s3,maskff // 0/0:s3&0xff 199194206Ssimon extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff 200194206Ssimon{ .mmi; LDKEY t1=[rk1],2*KSZ // 0/1:rk[1] 201194206Ssimon and te30=s0,maskff // 0/1:s0&0xff 202194206Ssimon shr.u te00=s0,twenty4 };; // 0/0:s0>>24 203194206Ssimon{ .mmi; LDKEY t2=[rk0],2*KSZ // 1/2:rk[2] 204194206Ssimon add te33=te33,te0 // 1/0:te0+s0>>24 205194206Ssimon extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff 206194206Ssimon{ .mmi; LDKEY t3=[rk1],2*KSZ // 1/3:rk[3] 207194206Ssimon add te30=te30,te0 // 1/1:te0+s0 208194206Ssimon shr.u te01=s1,twenty4 };; // 1/1:s1>>24 209194206Ssimon{ .mmi; ld1 te33=[te33] // 2/0:te0[s3&0xff] 210194206Ssimon add te22=te22,te0 // 2/0:te0+s2>>8&0xff 211194206Ssimon extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff 212194206Ssimon{ .mmi; ld1 te30=[te30] // 2/1:te0[s0] 213194206Ssimon add te23=te23,te0 // 2/1:te0+s3>>8 214194206Ssimon shr.u te02=s2,twenty4 };; // 2/2:s2>>24 215194206Ssimon{ .mmi; ld1 te22=[te22] // 3/0:te0[s2>>8] 216194206Ssimon add te20=te20,te0 // 3/2:te0+s0>>8 217194206Ssimon extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff 218194206Ssimon{ .mmi; ld1 te23=[te23] // 3/1:te0[s3>>8] 219194206Ssimon add te00=te00,te0 // 3/0:te0+s0>>24 220194206Ssimon shr.u te03=s3,twenty4 };; // 3/3:s3>>24 221194206Ssimon{ .mmi; ld1 te20=[te20] // 4/2:te0[s0>>8] 222194206Ssimon add te21=te21,te0 // 4/3:te0+s2 223194206Ssimon extr.u te11=s1,16,8 } // 4/0:s1>>16&0xff 224194206Ssimon{ .mmi; ld1 te00=[te00] // 4/0:te0[s0>>24] 225194206Ssimon add te01=te01,te0 // 4/1:te0+s1>>24 226194206Ssimon shr.u te13=s3,sixteen };; // 4/2:s3>>16 227194206Ssimon{ .mmi; ld1 te21=[te21] // 5/3:te0[s1>>8] 228194206Ssimon add te11=te11,te0 // 5/0:te0+s1>>16 229194206Ssimon extr.u te12=s2,16,8 } // 5/1:s2>>16&0xff 230194206Ssimon{ .mmi; ld1 te01=[te01] // 5/1:te0[s1>>24] 231194206Ssimon add te02=te02,te0 // 5/2:te0+s2>>24 232194206Ssimon and te31=s1,maskff };; // 5/2:s1&0xff 233194206Ssimon{ .mmi; ld1 te11=[te11] // 6/0:te0[s1>>16] 234194206Ssimon add te12=te12,te0 // 6/1:te0+s2>>16 235194206Ssimon extr.u te10=s0,16,8 } // 6/3:s0>>16&0xff 236194206Ssimon{ .mmi; ld1 te02=[te02] // 6/2:te0[s2>>24] 237194206Ssimon add te03=te03,te0 // 6/3:te0+s0>>16 238194206Ssimon and te32=s2,maskff };; // 6/3:s2&0xff 239194206Ssimon 240194206Ssimon{ .mmi; ld1 te12=[te12] // 7/1:te0[s2>>16] 241194206Ssimon add te31=te31,te0 // 7/2:te0+s1&0xff 242194206Ssimon dep te33=te22,te33,8,8} // 7/0: 243194206Ssimon{ .mmi; ld1 te03=[te03] // 7/3:te0[s3>>24] 244194206Ssimon add te32=te32,te0 // 7/3:te0+s2 245194206Ssimon and te13=te13,maskff};; // 7/2:s3>>16&0xff 246194206Ssimon{ .mmi; ld1 te31=[te31] // 8/2:te0[s1] 247194206Ssimon add te13=te13,te0 // 8/2:te0+s3>>16 248194206Ssimon dep te30=te23,te30,8,8} // 8/1: 249194206Ssimon{ .mmi; ld1 te32=[te32] // 8/3:te0[s2] 250194206Ssimon add te10=te10,te0 // 8/3:te0+s0>>16 251194206Ssimon shl te00=te00,twenty4};; // 8/0: 252194206Ssimon{ .mii; ld1 te13=[te13] // 9/2:te0[s3>>16] 253194206Ssimon dep te33=te11,te33,16,8 // 9/0: 254194206Ssimon shl te01=te01,twenty4};; // 9/1: 255194206Ssimon{ .mii; ld1 te10=[te10] // 10/3:te0[s0>>16] 256194206Ssimon dep te31=te20,te31,8,8 // 10/2: 257194206Ssimon shl te02=te02,twenty4};; // 10/2: 258194206Ssimon{ .mii; xor t0=t0,te33 // 11/0: 259194206Ssimon dep te32=te21,te32,8,8 // 11/3: 260194206Ssimon shl te12=te12,sixteen};; // 11/1: 261194206Ssimon{ .mii; xor r16=t0,te00 // 12/0:done! 262194206Ssimon dep te31=te13,te31,16,8 // 12/2: 263194206Ssimon shl te03=te03,twenty4};; // 12/3: 264194206Ssimon{ .mmi; xor t1=t1,te01 // 13/1: 265194206Ssimon xor t2=t2,te02 // 13/2: 266194206Ssimon dep te32=te10,te32,16,8};; // 13/3: 267194206Ssimon{ .mmi; xor t1=t1,te30 // 14/1: 268194206Ssimon xor r24=t2,te31 // 14/2:done! 269194206Ssimon xor t3=t3,te32 };; // 14/3: 270194206Ssimon{ .mib; xor r20=t1,te12 // 15/1:done! 271194206Ssimon xor r28=t3,te03 // 15/3:done! 272194206Ssimon br.ret.sptk b6 };; 273160814Ssimon.endp _ia64_AES_encrypt# 274160814Ssimon 275160814Ssimon// void AES_encrypt (const void *in,void *out,const AES_KEY *key); 276160814Ssimon.global AES_encrypt# 277160814Ssimon.proc AES_encrypt# 278160814Ssimon.align 32 279160814SsimonAES_encrypt: 280160814Ssimon .prologue 281194206Ssimon .save ar.pfs,pfssave 282194206Ssimon{ .mmi; alloc pfssave=ar.pfs,3,1,12,0 283194206Ssimon and out0=3,in0 284194206Ssimon mov r3=ip } 285194206Ssimon{ .mmi; ADDP in0=0,in0 286194206Ssimon mov loc0=psr.um 287160814Ssimon ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds 288160814Ssimon 289194206Ssimon{ .mmi; ld4 out11=[out11] // AES_KEY->rounds 290194206Ssimon add out8=(AES_Te#-AES_encrypt#),r3 // Te0 291194206Ssimon .save pr,prsave 292160814Ssimon mov prsave=pr } 293194206Ssimon{ .mmi; rum 1<<3 // clear um.ac 294194206Ssimon .save ar.lc,lcsave 295194206Ssimon mov lcsave=ar.lc };; 296160814Ssimon 297194206Ssimon .body 298160814Ssimon#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles... 299160814Ssimon{ .mib; cmp.ne p6,p0=out0,r0 300160814Ssimon add out0=4,in0 301160814Ssimon(p6) br.dpnt.many .Le_i_unaligned };; 302160814Ssimon 303160814Ssimon{ .mmi; ld4 out1=[in0],8 // s0 304160814Ssimon and out9=3,in1 305160814Ssimon mov twenty4=24 } 306160814Ssimon{ .mmi; ld4 out3=[out0],8 // s1 307160814Ssimon ADDP rk0=0,in2 308160814Ssimon mov sixteen=16 };; 309160814Ssimon{ .mmi; ld4 out5=[in0] // s2 310160814Ssimon cmp.ne p6,p0=out9,r0 311160814Ssimon mov maskff=0xff } 312160814Ssimon{ .mmb; ld4 out7=[out0] // s3 313160814Ssimon ADDP rk1=KSZ,in2 314160814Ssimon br.call.sptk.many b6=_ia64_AES_encrypt };; 315160814Ssimon 316160814Ssimon{ .mib; ADDP in0=4,in1 317160814Ssimon ADDP in1=0,in1 318160814Ssimon(p6) br.spnt .Le_o_unaligned };; 319160814Ssimon 320194206Ssimon{ .mii; mov psr.um=loc0 321194206Ssimon mov ar.pfs=pfssave 322194206Ssimon mov ar.lc=lcsave };; 323160814Ssimon{ .mmi; st4 [in1]=r16,8 // s0 324160814Ssimon st4 [in0]=r20,8 // s1 325160814Ssimon mov pr=prsave,0x1ffff };; 326160814Ssimon{ .mmb; st4 [in1]=r24 // s2 327160814Ssimon st4 [in0]=r28 // s3 328160814Ssimon br.ret.sptk.many b0 };; 329160814Ssimon#endif 330160814Ssimon 331160814Ssimon.align 32 332160814Ssimon.Le_i_unaligned: 333160814Ssimon{ .mmi; add out0=1,in0 334160814Ssimon add out2=2,in0 335160814Ssimon add out4=3,in0 };; 336160814Ssimon{ .mmi; ld1 r16=[in0],4 337160814Ssimon ld1 r17=[out0],4 }//;; 338160814Ssimon{ .mmi; ld1 r18=[out2],4 339160814Ssimon ld1 out1=[out4],4 };; // s0 340160814Ssimon{ .mmi; ld1 r20=[in0],4 341160814Ssimon ld1 r21=[out0],4 }//;; 342160814Ssimon{ .mmi; ld1 r22=[out2],4 343160814Ssimon ld1 out3=[out4],4 };; // s1 344160814Ssimon{ .mmi; ld1 r24=[in0],4 345160814Ssimon ld1 r25=[out0],4 }//;; 346160814Ssimon{ .mmi; ld1 r26=[out2],4 347160814Ssimon ld1 out5=[out4],4 };; // s2 348160814Ssimon{ .mmi; ld1 r28=[in0] 349160814Ssimon ld1 r29=[out0] }//;; 350160814Ssimon{ .mmi; ld1 r30=[out2] 351160814Ssimon ld1 out7=[out4] };; // s3 352160814Ssimon 353160814Ssimon{ .mii; 354160814Ssimon dep out1=r16,out1,24,8 //;; 355160814Ssimon dep out3=r20,out3,24,8 }//;; 356160814Ssimon{ .mii; ADDP rk0=0,in2 357160814Ssimon dep out5=r24,out5,24,8 //;; 358160814Ssimon dep out7=r28,out7,24,8 };; 359160814Ssimon{ .mii; ADDP rk1=KSZ,in2 360160814Ssimon dep out1=r17,out1,16,8 //;; 361160814Ssimon dep out3=r21,out3,16,8 }//;; 362160814Ssimon{ .mii; mov twenty4=24 363160814Ssimon dep out5=r25,out5,16,8 //;; 364160814Ssimon dep out7=r29,out7,16,8 };; 365160814Ssimon{ .mii; mov sixteen=16 366160814Ssimon dep out1=r18,out1,8,8 //;; 367160814Ssimon dep out3=r22,out3,8,8 }//;; 368160814Ssimon{ .mii; mov maskff=0xff 369160814Ssimon dep out5=r26,out5,8,8 //;; 370160814Ssimon dep out7=r30,out7,8,8 };; 371160814Ssimon 372160814Ssimon{ .mib; br.call.sptk.many b6=_ia64_AES_encrypt };; 373160814Ssimon 374160814Ssimon.Le_o_unaligned: 375160814Ssimon{ .mii; ADDP out0=0,in1 376160814Ssimon extr.u r17=r16,8,8 // s0 377160814Ssimon shr.u r19=r16,twenty4 }//;; 378160814Ssimon{ .mii; ADDP out1=1,in1 379160814Ssimon extr.u r18=r16,16,8 380160814Ssimon shr.u r23=r20,twenty4 }//;; // s1 381160814Ssimon{ .mii; ADDP out2=2,in1 382160814Ssimon extr.u r21=r20,8,8 383194206Ssimon shr.u r22=r20,sixteen }//;; 384160814Ssimon{ .mii; ADDP out3=3,in1 385160814Ssimon extr.u r25=r24,8,8 // s2 386160814Ssimon shr.u r27=r24,twenty4 };; 387160814Ssimon{ .mii; st1 [out3]=r16,4 388160814Ssimon extr.u r26=r24,16,8 389194206Ssimon shr.u r31=r28,twenty4 }//;; // s3 390160814Ssimon{ .mii; st1 [out2]=r17,4 391160814Ssimon extr.u r29=r28,8,8 392160814Ssimon shr.u r30=r28,sixteen }//;; 393160814Ssimon 394160814Ssimon{ .mmi; st1 [out1]=r18,4 395160814Ssimon st1 [out0]=r19,4 };; 396160814Ssimon{ .mmi; st1 [out3]=r20,4 397160814Ssimon st1 [out2]=r21,4 }//;; 398160814Ssimon{ .mmi; st1 [out1]=r22,4 399160814Ssimon st1 [out0]=r23,4 };; 400160814Ssimon{ .mmi; st1 [out3]=r24,4 401160814Ssimon st1 [out2]=r25,4 402160814Ssimon mov pr=prsave,0x1ffff }//;; 403160814Ssimon{ .mmi; st1 [out1]=r26,4 404160814Ssimon st1 [out0]=r27,4 405194206Ssimon mov ar.pfs=pfssave };; 406160814Ssimon{ .mmi; st1 [out3]=r28 407160814Ssimon st1 [out2]=r29 408194206Ssimon mov ar.lc=lcsave }//;; 409194206Ssimon{ .mmi; st1 [out1]=r30 410194206Ssimon st1 [out0]=r31 } 411194206Ssimon{ .mfb; mov psr.um=loc0 // restore user mask 412160814Ssimon br.ret.sptk.many b0 };; 413160814Ssimon.endp AES_encrypt# 414160814Ssimon 415160814Ssimon// *AES_decrypt are autogenerated by the following script: 416160814Ssimon#if 0 417160814Ssimon#!/usr/bin/env perl 418160814Ssimonprint "// *AES_decrypt are autogenerated by the following script:\n#if 0\n"; 419160814Ssimonopen(PROG,'<'.$0); while(<PROG>) { print; } close(PROG); 420160814Ssimonprint "#endif\n"; 421160814Ssimonwhile(<>) { 422160814Ssimon $process=1 if (/\.proc\s+_ia64_AES_encrypt/); 423160814Ssimon next if (!$process); 424160814Ssimon 425160814Ssimon #s/te00=s0/td00=s0/; s/te00/td00/g; 426160814Ssimon s/te11=s1/td13=s3/; s/te11/td13/g; 427160814Ssimon #s/te22=s2/td22=s2/; s/te22/td22/g; 428160814Ssimon s/te33=s3/td31=s1/; s/te33/td31/g; 429160814Ssimon 430160814Ssimon #s/te01=s1/td01=s1/; s/te01/td01/g; 431160814Ssimon s/te12=s2/td10=s0/; s/te12/td10/g; 432160814Ssimon #s/te23=s3/td23=s3/; s/te23/td23/g; 433160814Ssimon s/te30=s0/td32=s2/; s/te30/td32/g; 434160814Ssimon 435160814Ssimon #s/te02=s2/td02=s2/; s/te02/td02/g; 436160814Ssimon s/te13=s3/td11=s1/; s/te13/td11/g; 437160814Ssimon #s/te20=s0/td20=s0/; s/te20/td20/g; 438160814Ssimon s/te31=s1/td33=s3/; s/te31/td33/g; 439160814Ssimon 440160814Ssimon #s/te03=s3/td03=s3/; s/te03/td03/g; 441160814Ssimon s/te10=s0/td12=s2/; s/te10/td12/g; 442160814Ssimon #s/te21=s1/td21=s1/; s/te21/td21/g; 443160814Ssimon s/te32=s2/td30=s0/; s/te32/td30/g; 444160814Ssimon 445160814Ssimon s/td/te/g; 446160814Ssimon 447160814Ssimon s/AES_encrypt/AES_decrypt/g; 448160814Ssimon s/\.Le_/.Ld_/g; 449160814Ssimon s/AES_Te#/AES_Td#/g; 450160814Ssimon 451160814Ssimon print; 452160814Ssimon 453160814Ssimon exit if (/\.endp\s+AES_decrypt/); 454160814Ssimon} 455160814Ssimon#endif 456160814Ssimon.proc _ia64_AES_decrypt# 457160814Ssimon// Input: rk0-rk1 458160814Ssimon// te0 459160814Ssimon// te3 as AES_KEY->rounds!!! 460160814Ssimon// s0-s3 461160814Ssimon// maskff,twenty4,sixteen 462160814Ssimon// Output: r16,r20,r24,r28 as s0-s3 463160814Ssimon// Clobber: r16-r31,rk0-rk1,r32-r43 464160814Ssimon.align 32 465160814Ssimon_ia64_AES_decrypt: 466194206Ssimon .prologue 467194206Ssimon .altrp b6 468194206Ssimon .body 469160814Ssimon{ .mmi; alloc r16=ar.pfs,12,0,0,8 470160814Ssimon LDKEY t0=[rk0],2*KSZ 471160814Ssimon mov pr.rot=1<<16 } 472160814Ssimon{ .mmi; LDKEY t1=[rk1],2*KSZ 473194206Ssimon add te1=TE1,te0 474160814Ssimon add te3=-3,te3 };; 475160814Ssimon{ .mib; LDKEY t2=[rk0],2*KSZ 476194206Ssimon mov ar.ec=2 } 477160814Ssimon{ .mib; LDKEY t3=[rk1],2*KSZ 478194206Ssimon add te2=TE2,te0 479160814Ssimon brp.loop.imp .Ld_top,.Ld_end-16 };; 480160814Ssimon 481160814Ssimon{ .mmi; xor s0=s0,t0 482160814Ssimon xor s1=s1,t1 483160814Ssimon mov ar.lc=te3 } 484160814Ssimon{ .mmi; xor s2=s2,t2 485160814Ssimon xor s3=s3,t3 486194206Ssimon add te3=TE3,te0 };; 487194206Ssimon 488160814Ssimon.align 32 489160814Ssimon.Ld_top: 490160814Ssimon{ .mmi; (p0) LDKEY t0=[rk0],2*KSZ // 0/0:rk[0] 491160814Ssimon (p0) and te31=s1,maskff // 0/0:s3&0xff 492160814Ssimon (p0) extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff 493160814Ssimon{ .mmi; (p0) LDKEY t1=[rk1],2*KSZ // 0/1:rk[1] 494160814Ssimon (p0) and te32=s2,maskff // 0/1:s0&0xff 495160814Ssimon (p0) shr.u te00=s0,twenty4 };; // 0/0:s0>>24 496160814Ssimon{ .mmi; (p0) LDKEY t2=[rk0],2*KSZ // 1/2:rk[2] 497194206Ssimon (p0) shladd te31=te31,3,te3 // 1/0:te0+s0>>24 498160814Ssimon (p0) extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff 499160814Ssimon{ .mmi; (p0) LDKEY t3=[rk1],2*KSZ // 1/3:rk[3] 500194206Ssimon (p0) shladd te32=te32,3,te3 // 1/1:te3+s0 501160814Ssimon (p0) shr.u te01=s1,twenty4 };; // 1/1:s1>>24 502160814Ssimon{ .mmi; (p0) ld4 te31=[te31] // 2/0:te3[s3&0xff] 503194206Ssimon (p0) shladd te22=te22,3,te2 // 2/0:te2+s2>>8&0xff 504160814Ssimon (p0) extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff 505160814Ssimon{ .mmi; (p0) ld4 te32=[te32] // 2/1:te3[s0] 506194206Ssimon (p0) shladd te23=te23,3,te2 // 2/1:te2+s3>>8 507160814Ssimon (p0) shr.u te02=s2,twenty4 };; // 2/2:s2>>24 508160814Ssimon{ .mmi; (p0) ld4 te22=[te22] // 3/0:te2[s2>>8] 509194206Ssimon (p0) shladd te20=te20,3,te2 // 3/2:te2+s0>>8 510160814Ssimon (p0) extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff 511160814Ssimon{ .mmi; (p0) ld4 te23=[te23] // 3/1:te2[s3>>8] 512194206Ssimon (p0) shladd te00=te00,3,te0 // 3/0:te0+s0>>24 513160814Ssimon (p0) shr.u te03=s3,twenty4 };; // 3/3:s3>>24 514160814Ssimon{ .mmi; (p0) ld4 te20=[te20] // 4/2:te2[s0>>8] 515194206Ssimon (p0) shladd te21=te21,3,te2 // 4/3:te3+s2 516160814Ssimon (p0) extr.u te13=s3,16,8 } // 4/0:s1>>16&0xff 517160814Ssimon{ .mmi; (p0) ld4 te00=[te00] // 4/0:te0[s0>>24] 518194206Ssimon (p0) shladd te01=te01,3,te0 // 4/1:te0+s1>>24 519160814Ssimon (p0) shr.u te11=s1,sixteen };; // 4/2:s3>>16 520160814Ssimon{ .mmi; (p0) ld4 te21=[te21] // 5/3:te2[s1>>8] 521194206Ssimon (p0) shladd te13=te13,3,te1 // 5/0:te1+s1>>16 522160814Ssimon (p0) extr.u te10=s0,16,8 } // 5/1:s2>>16&0xff 523160814Ssimon{ .mmi; (p0) ld4 te01=[te01] // 5/1:te0[s1>>24] 524194206Ssimon (p0) shladd te02=te02,3,te0 // 5/2:te0+s2>>24 525160814Ssimon (p0) and te33=s3,maskff };; // 5/2:s1&0xff 526160814Ssimon{ .mmi; (p0) ld4 te13=[te13] // 6/0:te1[s1>>16] 527194206Ssimon (p0) shladd te10=te10,3,te1 // 6/1:te1+s2>>16 528160814Ssimon (p0) extr.u te12=s2,16,8 } // 6/3:s0>>16&0xff 529160814Ssimon{ .mmi; (p0) ld4 te02=[te02] // 6/2:te0[s2>>24] 530194206Ssimon (p0) shladd te03=te03,3,te0 // 6/3:te1+s0>>16 531160814Ssimon (p0) and te30=s0,maskff };; // 6/3:s2&0xff 532194206Ssimon 533160814Ssimon{ .mmi; (p0) ld4 te10=[te10] // 7/1:te1[s2>>16] 534194206Ssimon (p0) shladd te33=te33,3,te3 // 7/2:te3+s1&0xff 535160814Ssimon (p0) and te11=te11,maskff} // 7/2:s3>>16&0xff 536160814Ssimon{ .mmi; (p0) ld4 te03=[te03] // 7/3:te0[s3>>24] 537194206Ssimon (p0) shladd te30=te30,3,te3 // 7/3:te3+s2 538160814Ssimon (p0) xor t0=t0,te31 };; // 7/0: 539160814Ssimon{ .mmi; (p0) ld4 te33=[te33] // 8/2:te3[s1] 540194206Ssimon (p0) shladd te11=te11,3,te1 // 8/2:te1+s3>>16 541160814Ssimon (p0) xor t0=t0,te22 } // 8/0: 542160814Ssimon{ .mmi; (p0) ld4 te30=[te30] // 8/3:te3[s2] 543194206Ssimon (p0) shladd te12=te12,3,te1 // 8/3:te1+s0>>16 544160814Ssimon (p0) xor t1=t1,te32 };; // 8/1: 545160814Ssimon{ .mmi; (p0) ld4 te11=[te11] // 9/2:te1[s3>>16] 546194206Ssimon (p0) ld4 te12=[te12] // 9/3:te1[s0>>16] 547194206Ssimon (p0) xor t0=t0,te00 };; // 9/0: !L2 scheduling 548194206Ssimon{ .mmi; (p0) xor t1=t1,te23 // 10[9]/1: 549194206Ssimon (p0) xor t2=t2,te20 // 10[9]/2: 550194206Ssimon (p0) xor t3=t3,te21 };; // 10[9]/3: 551194206Ssimon{ .mmi; (p0) xor t0=t0,te13 // 11[10]/0:done! 552194206Ssimon (p0) xor t1=t1,te01 // 11[10]/1: 553194206Ssimon (p0) xor t2=t2,te02 };; // 11[10]/2: !L2 scheduling 554194206Ssimon{ .mmi; (p0) xor t3=t3,te03 // 12[10]/3: 555194206Ssimon (p16) cmp.eq p0,p17=r0,r0 };; // 12[10]/clear (p17) 556194206Ssimon{ .mmi; (p0) xor t1=t1,te10 // 13[11]/1:done! 557194206Ssimon (p0) xor t2=t2,te33 // 13[11]/2: 558194206Ssimon (p0) xor t3=t3,te30 } // 13[11]/3: 559194206Ssimon{ .mmi; (p17) add te0=2048,te0 // 13[11]/ 560194206Ssimon (p17) add te1=2048+64-TE1,te1};; // 13[11]/ 561194206Ssimon{ .mib; (p0) xor t2=t2,te11 // 14[12]/2:done! 562194206Ssimon (p17) add te2=2048+128-TE2,te2} // 14[12]/ 563194206Ssimon{ .mib; (p0) xor t3=t3,te12 // 14[12]/3:done! 564194206Ssimon (p17) add te3=2048+192-TE3,te3 // 14[12]/ 565160814Ssimon br.ctop.sptk .Ld_top };; 566160814Ssimon.Ld_end: 567194206Ssimon 568194206Ssimon 569194206Ssimon{ .mmi; ld8 te10=[te0] // prefetch Td4 570194206Ssimon ld8 te33=[te1] } 571194206Ssimon{ .mmi; ld8 te12=[te2] 572194206Ssimon ld8 te30=[te3] } 573194206Ssimon 574194206Ssimon{ .mmi; LDKEY t0=[rk0],2*KSZ // 0/0:rk[0] 575194206Ssimon and te31=s1,maskff // 0/0:s3&0xff 576194206Ssimon extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff 577194206Ssimon{ .mmi; LDKEY t1=[rk1],2*KSZ // 0/1:rk[1] 578194206Ssimon and te32=s2,maskff // 0/1:s0&0xff 579194206Ssimon shr.u te00=s0,twenty4 };; // 0/0:s0>>24 580194206Ssimon{ .mmi; LDKEY t2=[rk0],2*KSZ // 1/2:rk[2] 581194206Ssimon add te31=te31,te0 // 1/0:te0+s0>>24 582194206Ssimon extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff 583194206Ssimon{ .mmi; LDKEY t3=[rk1],2*KSZ // 1/3:rk[3] 584194206Ssimon add te32=te32,te0 // 1/1:te0+s0 585194206Ssimon shr.u te01=s1,twenty4 };; // 1/1:s1>>24 586194206Ssimon{ .mmi; ld1 te31=[te31] // 2/0:te0[s3&0xff] 587194206Ssimon add te22=te22,te0 // 2/0:te0+s2>>8&0xff 588194206Ssimon extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff 589194206Ssimon{ .mmi; ld1 te32=[te32] // 2/1:te0[s0] 590194206Ssimon add te23=te23,te0 // 2/1:te0+s3>>8 591194206Ssimon shr.u te02=s2,twenty4 };; // 2/2:s2>>24 592194206Ssimon{ .mmi; ld1 te22=[te22] // 3/0:te0[s2>>8] 593194206Ssimon add te20=te20,te0 // 3/2:te0+s0>>8 594194206Ssimon extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff 595194206Ssimon{ .mmi; ld1 te23=[te23] // 3/1:te0[s3>>8] 596194206Ssimon add te00=te00,te0 // 3/0:te0+s0>>24 597194206Ssimon shr.u te03=s3,twenty4 };; // 3/3:s3>>24 598194206Ssimon{ .mmi; ld1 te20=[te20] // 4/2:te0[s0>>8] 599194206Ssimon add te21=te21,te0 // 4/3:te0+s2 600194206Ssimon extr.u te13=s3,16,8 } // 4/0:s1>>16&0xff 601194206Ssimon{ .mmi; ld1 te00=[te00] // 4/0:te0[s0>>24] 602194206Ssimon add te01=te01,te0 // 4/1:te0+s1>>24 603194206Ssimon shr.u te11=s1,sixteen };; // 4/2:s3>>16 604194206Ssimon{ .mmi; ld1 te21=[te21] // 5/3:te0[s1>>8] 605194206Ssimon add te13=te13,te0 // 5/0:te0+s1>>16 606194206Ssimon extr.u te10=s0,16,8 } // 5/1:s2>>16&0xff 607194206Ssimon{ .mmi; ld1 te01=[te01] // 5/1:te0[s1>>24] 608194206Ssimon add te02=te02,te0 // 5/2:te0+s2>>24 609194206Ssimon and te33=s3,maskff };; // 5/2:s1&0xff 610194206Ssimon{ .mmi; ld1 te13=[te13] // 6/0:te0[s1>>16] 611194206Ssimon add te10=te10,te0 // 6/1:te0+s2>>16 612194206Ssimon extr.u te12=s2,16,8 } // 6/3:s0>>16&0xff 613194206Ssimon{ .mmi; ld1 te02=[te02] // 6/2:te0[s2>>24] 614194206Ssimon add te03=te03,te0 // 6/3:te0+s0>>16 615194206Ssimon and te30=s0,maskff };; // 6/3:s2&0xff 616194206Ssimon 617194206Ssimon{ .mmi; ld1 te10=[te10] // 7/1:te0[s2>>16] 618194206Ssimon add te33=te33,te0 // 7/2:te0+s1&0xff 619194206Ssimon dep te31=te22,te31,8,8} // 7/0: 620194206Ssimon{ .mmi; ld1 te03=[te03] // 7/3:te0[s3>>24] 621194206Ssimon add te30=te30,te0 // 7/3:te0+s2 622194206Ssimon and te11=te11,maskff};; // 7/2:s3>>16&0xff 623194206Ssimon{ .mmi; ld1 te33=[te33] // 8/2:te0[s1] 624194206Ssimon add te11=te11,te0 // 8/2:te0+s3>>16 625194206Ssimon dep te32=te23,te32,8,8} // 8/1: 626194206Ssimon{ .mmi; ld1 te30=[te30] // 8/3:te0[s2] 627194206Ssimon add te12=te12,te0 // 8/3:te0+s0>>16 628194206Ssimon shl te00=te00,twenty4};; // 8/0: 629194206Ssimon{ .mii; ld1 te11=[te11] // 9/2:te0[s3>>16] 630194206Ssimon dep te31=te13,te31,16,8 // 9/0: 631194206Ssimon shl te01=te01,twenty4};; // 9/1: 632194206Ssimon{ .mii; ld1 te12=[te12] // 10/3:te0[s0>>16] 633194206Ssimon dep te33=te20,te33,8,8 // 10/2: 634194206Ssimon shl te02=te02,twenty4};; // 10/2: 635194206Ssimon{ .mii; xor t0=t0,te31 // 11/0: 636194206Ssimon dep te30=te21,te30,8,8 // 11/3: 637194206Ssimon shl te10=te10,sixteen};; // 11/1: 638194206Ssimon{ .mii; xor r16=t0,te00 // 12/0:done! 639194206Ssimon dep te33=te11,te33,16,8 // 12/2: 640194206Ssimon shl te03=te03,twenty4};; // 12/3: 641194206Ssimon{ .mmi; xor t1=t1,te01 // 13/1: 642194206Ssimon xor t2=t2,te02 // 13/2: 643194206Ssimon dep te30=te12,te30,16,8};; // 13/3: 644194206Ssimon{ .mmi; xor t1=t1,te32 // 14/1: 645194206Ssimon xor r24=t2,te33 // 14/2:done! 646194206Ssimon xor t3=t3,te30 };; // 14/3: 647194206Ssimon{ .mib; xor r20=t1,te10 // 15/1:done! 648194206Ssimon xor r28=t3,te03 // 15/3:done! 649194206Ssimon br.ret.sptk b6 };; 650160814Ssimon.endp _ia64_AES_decrypt# 651160814Ssimon 652160814Ssimon// void AES_decrypt (const void *in,void *out,const AES_KEY *key); 653160814Ssimon.global AES_decrypt# 654160814Ssimon.proc AES_decrypt# 655160814Ssimon.align 32 656160814SsimonAES_decrypt: 657160814Ssimon .prologue 658194206Ssimon .save ar.pfs,pfssave 659194206Ssimon{ .mmi; alloc pfssave=ar.pfs,3,1,12,0 660194206Ssimon and out0=3,in0 661194206Ssimon mov r3=ip } 662194206Ssimon{ .mmi; ADDP in0=0,in0 663194206Ssimon mov loc0=psr.um 664160814Ssimon ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds 665160814Ssimon 666194206Ssimon{ .mmi; ld4 out11=[out11] // AES_KEY->rounds 667194206Ssimon add out8=(AES_Td#-AES_decrypt#),r3 // Te0 668194206Ssimon .save pr,prsave 669160814Ssimon mov prsave=pr } 670194206Ssimon{ .mmi; rum 1<<3 // clear um.ac 671194206Ssimon .save ar.lc,lcsave 672194206Ssimon mov lcsave=ar.lc };; 673160814Ssimon 674194206Ssimon .body 675160814Ssimon#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles... 676160814Ssimon{ .mib; cmp.ne p6,p0=out0,r0 677160814Ssimon add out0=4,in0 678160814Ssimon(p6) br.dpnt.many .Ld_i_unaligned };; 679160814Ssimon 680160814Ssimon{ .mmi; ld4 out1=[in0],8 // s0 681160814Ssimon and out9=3,in1 682160814Ssimon mov twenty4=24 } 683160814Ssimon{ .mmi; ld4 out3=[out0],8 // s1 684160814Ssimon ADDP rk0=0,in2 685160814Ssimon mov sixteen=16 };; 686160814Ssimon{ .mmi; ld4 out5=[in0] // s2 687160814Ssimon cmp.ne p6,p0=out9,r0 688160814Ssimon mov maskff=0xff } 689160814Ssimon{ .mmb; ld4 out7=[out0] // s3 690160814Ssimon ADDP rk1=KSZ,in2 691160814Ssimon br.call.sptk.many b6=_ia64_AES_decrypt };; 692160814Ssimon 693160814Ssimon{ .mib; ADDP in0=4,in1 694160814Ssimon ADDP in1=0,in1 695160814Ssimon(p6) br.spnt .Ld_o_unaligned };; 696160814Ssimon 697194206Ssimon{ .mii; mov psr.um=loc0 698194206Ssimon mov ar.pfs=pfssave 699194206Ssimon mov ar.lc=lcsave };; 700160814Ssimon{ .mmi; st4 [in1]=r16,8 // s0 701160814Ssimon st4 [in0]=r20,8 // s1 702160814Ssimon mov pr=prsave,0x1ffff };; 703160814Ssimon{ .mmb; st4 [in1]=r24 // s2 704160814Ssimon st4 [in0]=r28 // s3 705160814Ssimon br.ret.sptk.many b0 };; 706160814Ssimon#endif 707160814Ssimon 708160814Ssimon.align 32 709160814Ssimon.Ld_i_unaligned: 710160814Ssimon{ .mmi; add out0=1,in0 711160814Ssimon add out2=2,in0 712160814Ssimon add out4=3,in0 };; 713160814Ssimon{ .mmi; ld1 r16=[in0],4 714160814Ssimon ld1 r17=[out0],4 }//;; 715160814Ssimon{ .mmi; ld1 r18=[out2],4 716160814Ssimon ld1 out1=[out4],4 };; // s0 717160814Ssimon{ .mmi; ld1 r20=[in0],4 718160814Ssimon ld1 r21=[out0],4 }//;; 719160814Ssimon{ .mmi; ld1 r22=[out2],4 720160814Ssimon ld1 out3=[out4],4 };; // s1 721160814Ssimon{ .mmi; ld1 r24=[in0],4 722160814Ssimon ld1 r25=[out0],4 }//;; 723160814Ssimon{ .mmi; ld1 r26=[out2],4 724160814Ssimon ld1 out5=[out4],4 };; // s2 725160814Ssimon{ .mmi; ld1 r28=[in0] 726160814Ssimon ld1 r29=[out0] }//;; 727160814Ssimon{ .mmi; ld1 r30=[out2] 728160814Ssimon ld1 out7=[out4] };; // s3 729160814Ssimon 730160814Ssimon{ .mii; 731160814Ssimon dep out1=r16,out1,24,8 //;; 732160814Ssimon dep out3=r20,out3,24,8 }//;; 733160814Ssimon{ .mii; ADDP rk0=0,in2 734160814Ssimon dep out5=r24,out5,24,8 //;; 735160814Ssimon dep out7=r28,out7,24,8 };; 736160814Ssimon{ .mii; ADDP rk1=KSZ,in2 737160814Ssimon dep out1=r17,out1,16,8 //;; 738160814Ssimon dep out3=r21,out3,16,8 }//;; 739160814Ssimon{ .mii; mov twenty4=24 740160814Ssimon dep out5=r25,out5,16,8 //;; 741160814Ssimon dep out7=r29,out7,16,8 };; 742160814Ssimon{ .mii; mov sixteen=16 743160814Ssimon dep out1=r18,out1,8,8 //;; 744160814Ssimon dep out3=r22,out3,8,8 }//;; 745160814Ssimon{ .mii; mov maskff=0xff 746160814Ssimon dep out5=r26,out5,8,8 //;; 747160814Ssimon dep out7=r30,out7,8,8 };; 748160814Ssimon 749160814Ssimon{ .mib; br.call.sptk.many b6=_ia64_AES_decrypt };; 750160814Ssimon 751160814Ssimon.Ld_o_unaligned: 752160814Ssimon{ .mii; ADDP out0=0,in1 753160814Ssimon extr.u r17=r16,8,8 // s0 754160814Ssimon shr.u r19=r16,twenty4 }//;; 755160814Ssimon{ .mii; ADDP out1=1,in1 756160814Ssimon extr.u r18=r16,16,8 757160814Ssimon shr.u r23=r20,twenty4 }//;; // s1 758160814Ssimon{ .mii; ADDP out2=2,in1 759160814Ssimon extr.u r21=r20,8,8 760194206Ssimon shr.u r22=r20,sixteen }//;; 761160814Ssimon{ .mii; ADDP out3=3,in1 762160814Ssimon extr.u r25=r24,8,8 // s2 763160814Ssimon shr.u r27=r24,twenty4 };; 764160814Ssimon{ .mii; st1 [out3]=r16,4 765160814Ssimon extr.u r26=r24,16,8 766194206Ssimon shr.u r31=r28,twenty4 }//;; // s3 767160814Ssimon{ .mii; st1 [out2]=r17,4 768160814Ssimon extr.u r29=r28,8,8 769160814Ssimon shr.u r30=r28,sixteen }//;; 770160814Ssimon 771160814Ssimon{ .mmi; st1 [out1]=r18,4 772160814Ssimon st1 [out0]=r19,4 };; 773160814Ssimon{ .mmi; st1 [out3]=r20,4 774160814Ssimon st1 [out2]=r21,4 }//;; 775160814Ssimon{ .mmi; st1 [out1]=r22,4 776160814Ssimon st1 [out0]=r23,4 };; 777160814Ssimon{ .mmi; st1 [out3]=r24,4 778160814Ssimon st1 [out2]=r25,4 779160814Ssimon mov pr=prsave,0x1ffff }//;; 780160814Ssimon{ .mmi; st1 [out1]=r26,4 781160814Ssimon st1 [out0]=r27,4 782194206Ssimon mov ar.pfs=pfssave };; 783160814Ssimon{ .mmi; st1 [out3]=r28 784160814Ssimon st1 [out2]=r29 785194206Ssimon mov ar.lc=lcsave }//;; 786194206Ssimon{ .mmi; st1 [out1]=r30 787194206Ssimon st1 [out0]=r31 } 788194206Ssimon{ .mfb; mov psr.um=loc0 // restore user mask 789160814Ssimon br.ret.sptk.many b0 };; 790160814Ssimon.endp AES_decrypt# 791160814Ssimon 792160814Ssimon// leave it in .text segment... 793160814Ssimon.align 64 794160814Ssimon.global AES_Te# 795160814Ssimon.type AES_Te#,@object 796194206SsimonAES_Te: data4 0xc66363a5,0xc66363a5, 0xf87c7c84,0xf87c7c84 797194206Ssimon data4 0xee777799,0xee777799, 0xf67b7b8d,0xf67b7b8d 798194206Ssimon data4 0xfff2f20d,0xfff2f20d, 0xd66b6bbd,0xd66b6bbd 799194206Ssimon data4 0xde6f6fb1,0xde6f6fb1, 0x91c5c554,0x91c5c554 800194206Ssimon data4 0x60303050,0x60303050, 0x02010103,0x02010103 801194206Ssimon data4 0xce6767a9,0xce6767a9, 0x562b2b7d,0x562b2b7d 802194206Ssimon data4 0xe7fefe19,0xe7fefe19, 0xb5d7d762,0xb5d7d762 803194206Ssimon data4 0x4dababe6,0x4dababe6, 0xec76769a,0xec76769a 804194206Ssimon data4 0x8fcaca45,0x8fcaca45, 0x1f82829d,0x1f82829d 805194206Ssimon data4 0x89c9c940,0x89c9c940, 0xfa7d7d87,0xfa7d7d87 806194206Ssimon data4 0xeffafa15,0xeffafa15, 0xb25959eb,0xb25959eb 807194206Ssimon data4 0x8e4747c9,0x8e4747c9, 0xfbf0f00b,0xfbf0f00b 808194206Ssimon data4 0x41adadec,0x41adadec, 0xb3d4d467,0xb3d4d467 809194206Ssimon data4 0x5fa2a2fd,0x5fa2a2fd, 0x45afafea,0x45afafea 810194206Ssimon data4 0x239c9cbf,0x239c9cbf, 0x53a4a4f7,0x53a4a4f7 811194206Ssimon data4 0xe4727296,0xe4727296, 0x9bc0c05b,0x9bc0c05b 812194206Ssimon data4 0x75b7b7c2,0x75b7b7c2, 0xe1fdfd1c,0xe1fdfd1c 813194206Ssimon data4 0x3d9393ae,0x3d9393ae, 0x4c26266a,0x4c26266a 814194206Ssimon data4 0x6c36365a,0x6c36365a, 0x7e3f3f41,0x7e3f3f41 815194206Ssimon data4 0xf5f7f702,0xf5f7f702, 0x83cccc4f,0x83cccc4f 816194206Ssimon data4 0x6834345c,0x6834345c, 0x51a5a5f4,0x51a5a5f4 817194206Ssimon data4 0xd1e5e534,0xd1e5e534, 0xf9f1f108,0xf9f1f108 818194206Ssimon data4 0xe2717193,0xe2717193, 0xabd8d873,0xabd8d873 819194206Ssimon data4 0x62313153,0x62313153, 0x2a15153f,0x2a15153f 820194206Ssimon data4 0x0804040c,0x0804040c, 0x95c7c752,0x95c7c752 821194206Ssimon data4 0x46232365,0x46232365, 0x9dc3c35e,0x9dc3c35e 822194206Ssimon data4 0x30181828,0x30181828, 0x379696a1,0x379696a1 823194206Ssimon data4 0x0a05050f,0x0a05050f, 0x2f9a9ab5,0x2f9a9ab5 824194206Ssimon data4 0x0e070709,0x0e070709, 0x24121236,0x24121236 825194206Ssimon data4 0x1b80809b,0x1b80809b, 0xdfe2e23d,0xdfe2e23d 826194206Ssimon data4 0xcdebeb26,0xcdebeb26, 0x4e272769,0x4e272769 827194206Ssimon data4 0x7fb2b2cd,0x7fb2b2cd, 0xea75759f,0xea75759f 828194206Ssimon data4 0x1209091b,0x1209091b, 0x1d83839e,0x1d83839e 829194206Ssimon data4 0x582c2c74,0x582c2c74, 0x341a1a2e,0x341a1a2e 830194206Ssimon data4 0x361b1b2d,0x361b1b2d, 0xdc6e6eb2,0xdc6e6eb2 831194206Ssimon data4 0xb45a5aee,0xb45a5aee, 0x5ba0a0fb,0x5ba0a0fb 832194206Ssimon data4 0xa45252f6,0xa45252f6, 0x763b3b4d,0x763b3b4d 833194206Ssimon data4 0xb7d6d661,0xb7d6d661, 0x7db3b3ce,0x7db3b3ce 834194206Ssimon data4 0x5229297b,0x5229297b, 0xdde3e33e,0xdde3e33e 835194206Ssimon data4 0x5e2f2f71,0x5e2f2f71, 0x13848497,0x13848497 836194206Ssimon data4 0xa65353f5,0xa65353f5, 0xb9d1d168,0xb9d1d168 837194206Ssimon data4 0x00000000,0x00000000, 0xc1eded2c,0xc1eded2c 838194206Ssimon data4 0x40202060,0x40202060, 0xe3fcfc1f,0xe3fcfc1f 839194206Ssimon data4 0x79b1b1c8,0x79b1b1c8, 0xb65b5bed,0xb65b5bed 840194206Ssimon data4 0xd46a6abe,0xd46a6abe, 0x8dcbcb46,0x8dcbcb46 841194206Ssimon data4 0x67bebed9,0x67bebed9, 0x7239394b,0x7239394b 842194206Ssimon data4 0x944a4ade,0x944a4ade, 0x984c4cd4,0x984c4cd4 843194206Ssimon data4 0xb05858e8,0xb05858e8, 0x85cfcf4a,0x85cfcf4a 844194206Ssimon data4 0xbbd0d06b,0xbbd0d06b, 0xc5efef2a,0xc5efef2a 845194206Ssimon data4 0x4faaaae5,0x4faaaae5, 0xedfbfb16,0xedfbfb16 846194206Ssimon data4 0x864343c5,0x864343c5, 0x9a4d4dd7,0x9a4d4dd7 847194206Ssimon data4 0x66333355,0x66333355, 0x11858594,0x11858594 848194206Ssimon data4 0x8a4545cf,0x8a4545cf, 0xe9f9f910,0xe9f9f910 849194206Ssimon data4 0x04020206,0x04020206, 0xfe7f7f81,0xfe7f7f81 850194206Ssimon data4 0xa05050f0,0xa05050f0, 0x783c3c44,0x783c3c44 851194206Ssimon data4 0x259f9fba,0x259f9fba, 0x4ba8a8e3,0x4ba8a8e3 852194206Ssimon data4 0xa25151f3,0xa25151f3, 0x5da3a3fe,0x5da3a3fe 853194206Ssimon data4 0x804040c0,0x804040c0, 0x058f8f8a,0x058f8f8a 854194206Ssimon data4 0x3f9292ad,0x3f9292ad, 0x219d9dbc,0x219d9dbc 855194206Ssimon data4 0x70383848,0x70383848, 0xf1f5f504,0xf1f5f504 856194206Ssimon data4 0x63bcbcdf,0x63bcbcdf, 0x77b6b6c1,0x77b6b6c1 857194206Ssimon data4 0xafdada75,0xafdada75, 0x42212163,0x42212163 858194206Ssimon data4 0x20101030,0x20101030, 0xe5ffff1a,0xe5ffff1a 859194206Ssimon data4 0xfdf3f30e,0xfdf3f30e, 0xbfd2d26d,0xbfd2d26d 860194206Ssimon data4 0x81cdcd4c,0x81cdcd4c, 0x180c0c14,0x180c0c14 861194206Ssimon data4 0x26131335,0x26131335, 0xc3ecec2f,0xc3ecec2f 862194206Ssimon data4 0xbe5f5fe1,0xbe5f5fe1, 0x359797a2,0x359797a2 863194206Ssimon data4 0x884444cc,0x884444cc, 0x2e171739,0x2e171739 864194206Ssimon data4 0x93c4c457,0x93c4c457, 0x55a7a7f2,0x55a7a7f2 865194206Ssimon data4 0xfc7e7e82,0xfc7e7e82, 0x7a3d3d47,0x7a3d3d47 866194206Ssimon data4 0xc86464ac,0xc86464ac, 0xba5d5de7,0xba5d5de7 867194206Ssimon data4 0x3219192b,0x3219192b, 0xe6737395,0xe6737395 868194206Ssimon data4 0xc06060a0,0xc06060a0, 0x19818198,0x19818198 869194206Ssimon data4 0x9e4f4fd1,0x9e4f4fd1, 0xa3dcdc7f,0xa3dcdc7f 870194206Ssimon data4 0x44222266,0x44222266, 0x542a2a7e,0x542a2a7e 871194206Ssimon data4 0x3b9090ab,0x3b9090ab, 0x0b888883,0x0b888883 872194206Ssimon data4 0x8c4646ca,0x8c4646ca, 0xc7eeee29,0xc7eeee29 873194206Ssimon data4 0x6bb8b8d3,0x6bb8b8d3, 0x2814143c,0x2814143c 874194206Ssimon data4 0xa7dede79,0xa7dede79, 0xbc5e5ee2,0xbc5e5ee2 875194206Ssimon data4 0x160b0b1d,0x160b0b1d, 0xaddbdb76,0xaddbdb76 876194206Ssimon data4 0xdbe0e03b,0xdbe0e03b, 0x64323256,0x64323256 877194206Ssimon data4 0x743a3a4e,0x743a3a4e, 0x140a0a1e,0x140a0a1e 878194206Ssimon data4 0x924949db,0x924949db, 0x0c06060a,0x0c06060a 879194206Ssimon data4 0x4824246c,0x4824246c, 0xb85c5ce4,0xb85c5ce4 880194206Ssimon data4 0x9fc2c25d,0x9fc2c25d, 0xbdd3d36e,0xbdd3d36e 881194206Ssimon data4 0x43acacef,0x43acacef, 0xc46262a6,0xc46262a6 882194206Ssimon data4 0x399191a8,0x399191a8, 0x319595a4,0x319595a4 883194206Ssimon data4 0xd3e4e437,0xd3e4e437, 0xf279798b,0xf279798b 884194206Ssimon data4 0xd5e7e732,0xd5e7e732, 0x8bc8c843,0x8bc8c843 885194206Ssimon data4 0x6e373759,0x6e373759, 0xda6d6db7,0xda6d6db7 886194206Ssimon data4 0x018d8d8c,0x018d8d8c, 0xb1d5d564,0xb1d5d564 887194206Ssimon data4 0x9c4e4ed2,0x9c4e4ed2, 0x49a9a9e0,0x49a9a9e0 888194206Ssimon data4 0xd86c6cb4,0xd86c6cb4, 0xac5656fa,0xac5656fa 889194206Ssimon data4 0xf3f4f407,0xf3f4f407, 0xcfeaea25,0xcfeaea25 890194206Ssimon data4 0xca6565af,0xca6565af, 0xf47a7a8e,0xf47a7a8e 891194206Ssimon data4 0x47aeaee9,0x47aeaee9, 0x10080818,0x10080818 892194206Ssimon data4 0x6fbabad5,0x6fbabad5, 0xf0787888,0xf0787888 893194206Ssimon data4 0x4a25256f,0x4a25256f, 0x5c2e2e72,0x5c2e2e72 894194206Ssimon data4 0x381c1c24,0x381c1c24, 0x57a6a6f1,0x57a6a6f1 895194206Ssimon data4 0x73b4b4c7,0x73b4b4c7, 0x97c6c651,0x97c6c651 896194206Ssimon data4 0xcbe8e823,0xcbe8e823, 0xa1dddd7c,0xa1dddd7c 897194206Ssimon data4 0xe874749c,0xe874749c, 0x3e1f1f21,0x3e1f1f21 898194206Ssimon data4 0x964b4bdd,0x964b4bdd, 0x61bdbddc,0x61bdbddc 899194206Ssimon data4 0x0d8b8b86,0x0d8b8b86, 0x0f8a8a85,0x0f8a8a85 900194206Ssimon data4 0xe0707090,0xe0707090, 0x7c3e3e42,0x7c3e3e42 901194206Ssimon data4 0x71b5b5c4,0x71b5b5c4, 0xcc6666aa,0xcc6666aa 902194206Ssimon data4 0x904848d8,0x904848d8, 0x06030305,0x06030305 903194206Ssimon data4 0xf7f6f601,0xf7f6f601, 0x1c0e0e12,0x1c0e0e12 904194206Ssimon data4 0xc26161a3,0xc26161a3, 0x6a35355f,0x6a35355f 905194206Ssimon data4 0xae5757f9,0xae5757f9, 0x69b9b9d0,0x69b9b9d0 906194206Ssimon data4 0x17868691,0x17868691, 0x99c1c158,0x99c1c158 907194206Ssimon data4 0x3a1d1d27,0x3a1d1d27, 0x279e9eb9,0x279e9eb9 908194206Ssimon data4 0xd9e1e138,0xd9e1e138, 0xebf8f813,0xebf8f813 909194206Ssimon data4 0x2b9898b3,0x2b9898b3, 0x22111133,0x22111133 910194206Ssimon data4 0xd26969bb,0xd26969bb, 0xa9d9d970,0xa9d9d970 911194206Ssimon data4 0x078e8e89,0x078e8e89, 0x339494a7,0x339494a7 912194206Ssimon data4 0x2d9b9bb6,0x2d9b9bb6, 0x3c1e1e22,0x3c1e1e22 913194206Ssimon data4 0x15878792,0x15878792, 0xc9e9e920,0xc9e9e920 914194206Ssimon data4 0x87cece49,0x87cece49, 0xaa5555ff,0xaa5555ff 915194206Ssimon data4 0x50282878,0x50282878, 0xa5dfdf7a,0xa5dfdf7a 916194206Ssimon data4 0x038c8c8f,0x038c8c8f, 0x59a1a1f8,0x59a1a1f8 917194206Ssimon data4 0x09898980,0x09898980, 0x1a0d0d17,0x1a0d0d17 918194206Ssimon data4 0x65bfbfda,0x65bfbfda, 0xd7e6e631,0xd7e6e631 919194206Ssimon data4 0x844242c6,0x844242c6, 0xd06868b8,0xd06868b8 920194206Ssimon data4 0x824141c3,0x824141c3, 0x299999b0,0x299999b0 921194206Ssimon data4 0x5a2d2d77,0x5a2d2d77, 0x1e0f0f11,0x1e0f0f11 922194206Ssimon data4 0x7bb0b0cb,0x7bb0b0cb, 0xa85454fc,0xa85454fc 923194206Ssimon data4 0x6dbbbbd6,0x6dbbbbd6, 0x2c16163a,0x2c16163a 924160814Ssimon// Te4: 925194206Ssimon data1 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 926194206Ssimon data1 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 927194206Ssimon data1 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 928194206Ssimon data1 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 929194206Ssimon data1 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 930194206Ssimon data1 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 931194206Ssimon data1 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 932194206Ssimon data1 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 933194206Ssimon data1 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 934194206Ssimon data1 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 935194206Ssimon data1 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 936194206Ssimon data1 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 937194206Ssimon data1 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 938194206Ssimon data1 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 939194206Ssimon data1 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 940194206Ssimon data1 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 941194206Ssimon data1 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 942194206Ssimon data1 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 943194206Ssimon data1 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 944194206Ssimon data1 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 945194206Ssimon data1 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 946194206Ssimon data1 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 947194206Ssimon data1 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 948194206Ssimon data1 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 949194206Ssimon data1 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 950194206Ssimon data1 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 951194206Ssimon data1 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 952194206Ssimon data1 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 953194206Ssimon data1 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 954194206Ssimon data1 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 955194206Ssimon data1 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 956194206Ssimon data1 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 957194206Ssimon.size AES_Te#,2048+256 // HP-UX assembler fails to ".-AES_Te#" 958160814Ssimon 959160814Ssimon.align 64 960160814Ssimon.global AES_Td# 961160814Ssimon.type AES_Td#,@object 962194206SsimonAES_Td: data4 0x51f4a750,0x51f4a750, 0x7e416553,0x7e416553 963194206Ssimon data4 0x1a17a4c3,0x1a17a4c3, 0x3a275e96,0x3a275e96 964194206Ssimon data4 0x3bab6bcb,0x3bab6bcb, 0x1f9d45f1,0x1f9d45f1 965194206Ssimon data4 0xacfa58ab,0xacfa58ab, 0x4be30393,0x4be30393 966194206Ssimon data4 0x2030fa55,0x2030fa55, 0xad766df6,0xad766df6 967194206Ssimon data4 0x88cc7691,0x88cc7691, 0xf5024c25,0xf5024c25 968194206Ssimon data4 0x4fe5d7fc,0x4fe5d7fc, 0xc52acbd7,0xc52acbd7 969194206Ssimon data4 0x26354480,0x26354480, 0xb562a38f,0xb562a38f 970194206Ssimon data4 0xdeb15a49,0xdeb15a49, 0x25ba1b67,0x25ba1b67 971194206Ssimon data4 0x45ea0e98,0x45ea0e98, 0x5dfec0e1,0x5dfec0e1 972194206Ssimon data4 0xc32f7502,0xc32f7502, 0x814cf012,0x814cf012 973194206Ssimon data4 0x8d4697a3,0x8d4697a3, 0x6bd3f9c6,0x6bd3f9c6 974194206Ssimon data4 0x038f5fe7,0x038f5fe7, 0x15929c95,0x15929c95 975194206Ssimon data4 0xbf6d7aeb,0xbf6d7aeb, 0x955259da,0x955259da 976194206Ssimon data4 0xd4be832d,0xd4be832d, 0x587421d3,0x587421d3 977194206Ssimon data4 0x49e06929,0x49e06929, 0x8ec9c844,0x8ec9c844 978194206Ssimon data4 0x75c2896a,0x75c2896a, 0xf48e7978,0xf48e7978 979194206Ssimon data4 0x99583e6b,0x99583e6b, 0x27b971dd,0x27b971dd 980194206Ssimon data4 0xbee14fb6,0xbee14fb6, 0xf088ad17,0xf088ad17 981194206Ssimon data4 0xc920ac66,0xc920ac66, 0x7dce3ab4,0x7dce3ab4 982194206Ssimon data4 0x63df4a18,0x63df4a18, 0xe51a3182,0xe51a3182 983194206Ssimon data4 0x97513360,0x97513360, 0x62537f45,0x62537f45 984194206Ssimon data4 0xb16477e0,0xb16477e0, 0xbb6bae84,0xbb6bae84 985194206Ssimon data4 0xfe81a01c,0xfe81a01c, 0xf9082b94,0xf9082b94 986194206Ssimon data4 0x70486858,0x70486858, 0x8f45fd19,0x8f45fd19 987194206Ssimon data4 0x94de6c87,0x94de6c87, 0x527bf8b7,0x527bf8b7 988194206Ssimon data4 0xab73d323,0xab73d323, 0x724b02e2,0x724b02e2 989194206Ssimon data4 0xe31f8f57,0xe31f8f57, 0x6655ab2a,0x6655ab2a 990194206Ssimon data4 0xb2eb2807,0xb2eb2807, 0x2fb5c203,0x2fb5c203 991194206Ssimon data4 0x86c57b9a,0x86c57b9a, 0xd33708a5,0xd33708a5 992194206Ssimon data4 0x302887f2,0x302887f2, 0x23bfa5b2,0x23bfa5b2 993194206Ssimon data4 0x02036aba,0x02036aba, 0xed16825c,0xed16825c 994194206Ssimon data4 0x8acf1c2b,0x8acf1c2b, 0xa779b492,0xa779b492 995194206Ssimon data4 0xf307f2f0,0xf307f2f0, 0x4e69e2a1,0x4e69e2a1 996194206Ssimon data4 0x65daf4cd,0x65daf4cd, 0x0605bed5,0x0605bed5 997194206Ssimon data4 0xd134621f,0xd134621f, 0xc4a6fe8a,0xc4a6fe8a 998194206Ssimon data4 0x342e539d,0x342e539d, 0xa2f355a0,0xa2f355a0 999194206Ssimon data4 0x058ae132,0x058ae132, 0xa4f6eb75,0xa4f6eb75 1000194206Ssimon data4 0x0b83ec39,0x0b83ec39, 0x4060efaa,0x4060efaa 1001194206Ssimon data4 0x5e719f06,0x5e719f06, 0xbd6e1051,0xbd6e1051 1002194206Ssimon data4 0x3e218af9,0x3e218af9, 0x96dd063d,0x96dd063d 1003194206Ssimon data4 0xdd3e05ae,0xdd3e05ae, 0x4de6bd46,0x4de6bd46 1004194206Ssimon data4 0x91548db5,0x91548db5, 0x71c45d05,0x71c45d05 1005194206Ssimon data4 0x0406d46f,0x0406d46f, 0x605015ff,0x605015ff 1006194206Ssimon data4 0x1998fb24,0x1998fb24, 0xd6bde997,0xd6bde997 1007194206Ssimon data4 0x894043cc,0x894043cc, 0x67d99e77,0x67d99e77 1008194206Ssimon data4 0xb0e842bd,0xb0e842bd, 0x07898b88,0x07898b88 1009194206Ssimon data4 0xe7195b38,0xe7195b38, 0x79c8eedb,0x79c8eedb 1010194206Ssimon data4 0xa17c0a47,0xa17c0a47, 0x7c420fe9,0x7c420fe9 1011194206Ssimon data4 0xf8841ec9,0xf8841ec9, 0x00000000,0x00000000 1012194206Ssimon data4 0x09808683,0x09808683, 0x322bed48,0x322bed48 1013194206Ssimon data4 0x1e1170ac,0x1e1170ac, 0x6c5a724e,0x6c5a724e 1014194206Ssimon data4 0xfd0efffb,0xfd0efffb, 0x0f853856,0x0f853856 1015194206Ssimon data4 0x3daed51e,0x3daed51e, 0x362d3927,0x362d3927 1016194206Ssimon data4 0x0a0fd964,0x0a0fd964, 0x685ca621,0x685ca621 1017194206Ssimon data4 0x9b5b54d1,0x9b5b54d1, 0x24362e3a,0x24362e3a 1018194206Ssimon data4 0x0c0a67b1,0x0c0a67b1, 0x9357e70f,0x9357e70f 1019194206Ssimon data4 0xb4ee96d2,0xb4ee96d2, 0x1b9b919e,0x1b9b919e 1020194206Ssimon data4 0x80c0c54f,0x80c0c54f, 0x61dc20a2,0x61dc20a2 1021194206Ssimon data4 0x5a774b69,0x5a774b69, 0x1c121a16,0x1c121a16 1022194206Ssimon data4 0xe293ba0a,0xe293ba0a, 0xc0a02ae5,0xc0a02ae5 1023194206Ssimon data4 0x3c22e043,0x3c22e043, 0x121b171d,0x121b171d 1024194206Ssimon data4 0x0e090d0b,0x0e090d0b, 0xf28bc7ad,0xf28bc7ad 1025194206Ssimon data4 0x2db6a8b9,0x2db6a8b9, 0x141ea9c8,0x141ea9c8 1026194206Ssimon data4 0x57f11985,0x57f11985, 0xaf75074c,0xaf75074c 1027194206Ssimon data4 0xee99ddbb,0xee99ddbb, 0xa37f60fd,0xa37f60fd 1028194206Ssimon data4 0xf701269f,0xf701269f, 0x5c72f5bc,0x5c72f5bc 1029194206Ssimon data4 0x44663bc5,0x44663bc5, 0x5bfb7e34,0x5bfb7e34 1030194206Ssimon data4 0x8b432976,0x8b432976, 0xcb23c6dc,0xcb23c6dc 1031194206Ssimon data4 0xb6edfc68,0xb6edfc68, 0xb8e4f163,0xb8e4f163 1032194206Ssimon data4 0xd731dcca,0xd731dcca, 0x42638510,0x42638510 1033194206Ssimon data4 0x13972240,0x13972240, 0x84c61120,0x84c61120 1034194206Ssimon data4 0x854a247d,0x854a247d, 0xd2bb3df8,0xd2bb3df8 1035194206Ssimon data4 0xaef93211,0xaef93211, 0xc729a16d,0xc729a16d 1036194206Ssimon data4 0x1d9e2f4b,0x1d9e2f4b, 0xdcb230f3,0xdcb230f3 1037194206Ssimon data4 0x0d8652ec,0x0d8652ec, 0x77c1e3d0,0x77c1e3d0 1038194206Ssimon data4 0x2bb3166c,0x2bb3166c, 0xa970b999,0xa970b999 1039194206Ssimon data4 0x119448fa,0x119448fa, 0x47e96422,0x47e96422 1040194206Ssimon data4 0xa8fc8cc4,0xa8fc8cc4, 0xa0f03f1a,0xa0f03f1a 1041194206Ssimon data4 0x567d2cd8,0x567d2cd8, 0x223390ef,0x223390ef 1042194206Ssimon data4 0x87494ec7,0x87494ec7, 0xd938d1c1,0xd938d1c1 1043194206Ssimon data4 0x8ccaa2fe,0x8ccaa2fe, 0x98d40b36,0x98d40b36 1044194206Ssimon data4 0xa6f581cf,0xa6f581cf, 0xa57ade28,0xa57ade28 1045194206Ssimon data4 0xdab78e26,0xdab78e26, 0x3fadbfa4,0x3fadbfa4 1046194206Ssimon data4 0x2c3a9de4,0x2c3a9de4, 0x5078920d,0x5078920d 1047194206Ssimon data4 0x6a5fcc9b,0x6a5fcc9b, 0x547e4662,0x547e4662 1048194206Ssimon data4 0xf68d13c2,0xf68d13c2, 0x90d8b8e8,0x90d8b8e8 1049194206Ssimon data4 0x2e39f75e,0x2e39f75e, 0x82c3aff5,0x82c3aff5 1050194206Ssimon data4 0x9f5d80be,0x9f5d80be, 0x69d0937c,0x69d0937c 1051194206Ssimon data4 0x6fd52da9,0x6fd52da9, 0xcf2512b3,0xcf2512b3 1052194206Ssimon data4 0xc8ac993b,0xc8ac993b, 0x10187da7,0x10187da7 1053194206Ssimon data4 0xe89c636e,0xe89c636e, 0xdb3bbb7b,0xdb3bbb7b 1054194206Ssimon data4 0xcd267809,0xcd267809, 0x6e5918f4,0x6e5918f4 1055194206Ssimon data4 0xec9ab701,0xec9ab701, 0x834f9aa8,0x834f9aa8 1056194206Ssimon data4 0xe6956e65,0xe6956e65, 0xaaffe67e,0xaaffe67e 1057194206Ssimon data4 0x21bccf08,0x21bccf08, 0xef15e8e6,0xef15e8e6 1058194206Ssimon data4 0xbae79bd9,0xbae79bd9, 0x4a6f36ce,0x4a6f36ce 1059194206Ssimon data4 0xea9f09d4,0xea9f09d4, 0x29b07cd6,0x29b07cd6 1060194206Ssimon data4 0x31a4b2af,0x31a4b2af, 0x2a3f2331,0x2a3f2331 1061194206Ssimon data4 0xc6a59430,0xc6a59430, 0x35a266c0,0x35a266c0 1062194206Ssimon data4 0x744ebc37,0x744ebc37, 0xfc82caa6,0xfc82caa6 1063194206Ssimon data4 0xe090d0b0,0xe090d0b0, 0x33a7d815,0x33a7d815 1064194206Ssimon data4 0xf104984a,0xf104984a, 0x41ecdaf7,0x41ecdaf7 1065194206Ssimon data4 0x7fcd500e,0x7fcd500e, 0x1791f62f,0x1791f62f 1066194206Ssimon data4 0x764dd68d,0x764dd68d, 0x43efb04d,0x43efb04d 1067194206Ssimon data4 0xccaa4d54,0xccaa4d54, 0xe49604df,0xe49604df 1068194206Ssimon data4 0x9ed1b5e3,0x9ed1b5e3, 0x4c6a881b,0x4c6a881b 1069194206Ssimon data4 0xc12c1fb8,0xc12c1fb8, 0x4665517f,0x4665517f 1070194206Ssimon data4 0x9d5eea04,0x9d5eea04, 0x018c355d,0x018c355d 1071194206Ssimon data4 0xfa877473,0xfa877473, 0xfb0b412e,0xfb0b412e 1072194206Ssimon data4 0xb3671d5a,0xb3671d5a, 0x92dbd252,0x92dbd252 1073194206Ssimon data4 0xe9105633,0xe9105633, 0x6dd64713,0x6dd64713 1074194206Ssimon data4 0x9ad7618c,0x9ad7618c, 0x37a10c7a,0x37a10c7a 1075194206Ssimon data4 0x59f8148e,0x59f8148e, 0xeb133c89,0xeb133c89 1076194206Ssimon data4 0xcea927ee,0xcea927ee, 0xb761c935,0xb761c935 1077194206Ssimon data4 0xe11ce5ed,0xe11ce5ed, 0x7a47b13c,0x7a47b13c 1078194206Ssimon data4 0x9cd2df59,0x9cd2df59, 0x55f2733f,0x55f2733f 1079194206Ssimon data4 0x1814ce79,0x1814ce79, 0x73c737bf,0x73c737bf 1080194206Ssimon data4 0x53f7cdea,0x53f7cdea, 0x5ffdaa5b,0x5ffdaa5b 1081194206Ssimon data4 0xdf3d6f14,0xdf3d6f14, 0x7844db86,0x7844db86 1082194206Ssimon data4 0xcaaff381,0xcaaff381, 0xb968c43e,0xb968c43e 1083194206Ssimon data4 0x3824342c,0x3824342c, 0xc2a3405f,0xc2a3405f 1084194206Ssimon data4 0x161dc372,0x161dc372, 0xbce2250c,0xbce2250c 1085194206Ssimon data4 0x283c498b,0x283c498b, 0xff0d9541,0xff0d9541 1086194206Ssimon data4 0x39a80171,0x39a80171, 0x080cb3de,0x080cb3de 1087194206Ssimon data4 0xd8b4e49c,0xd8b4e49c, 0x6456c190,0x6456c190 1088194206Ssimon data4 0x7bcb8461,0x7bcb8461, 0xd532b670,0xd532b670 1089194206Ssimon data4 0x486c5c74,0x486c5c74, 0xd0b85742,0xd0b85742 1090160814Ssimon// Td4: 1091194206Ssimon data1 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 1092194206Ssimon data1 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 1093194206Ssimon data1 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 1094194206Ssimon data1 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 1095194206Ssimon data1 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 1096194206Ssimon data1 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 1097194206Ssimon data1 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 1098194206Ssimon data1 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 1099194206Ssimon data1 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 1100194206Ssimon data1 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 1101194206Ssimon data1 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 1102194206Ssimon data1 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 1103194206Ssimon data1 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 1104194206Ssimon data1 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 1105194206Ssimon data1 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 1106194206Ssimon data1 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 1107194206Ssimon data1 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 1108194206Ssimon data1 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 1109194206Ssimon data1 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 1110194206Ssimon data1 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 1111194206Ssimon data1 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 1112194206Ssimon data1 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 1113194206Ssimon data1 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 1114194206Ssimon data1 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 1115194206Ssimon data1 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 1116194206Ssimon data1 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 1117194206Ssimon data1 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 1118194206Ssimon data1 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 1119194206Ssimon data1 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 1120194206Ssimon data1 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 1121194206Ssimon data1 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 1122194206Ssimon data1 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 1123194206Ssimon.size AES_Td#,2048+256 // HP-UX assembler fails to ".-AES_Td#" 1124