1160814Ssimon// Works on all IA-64 platforms: Linux, HP-UX, Win64i... 2160814Ssimon// On Win64i compile with ias.exe. 3160814Ssimon.text 4238405Sjkim 5238405Sjkim.global OPENSSL_cpuid_setup# 6238405Sjkim.proc OPENSSL_cpuid_setup# 7238405SjkimOPENSSL_cpuid_setup: 8238405Sjkim{ .mib; br.ret.sptk.many b0 };; 9238405Sjkim.endp OPENSSL_cpuid_setup# 10238405Sjkim 11160814Ssimon.global OPENSSL_rdtsc# 12160814Ssimon.proc OPENSSL_rdtsc# 13160814SsimonOPENSSL_rdtsc: 14160814Ssimon{ .mib; mov r8=ar.itc 15160814Ssimon br.ret.sptk.many b0 };; 16160814Ssimon.endp OPENSSL_rdtsc# 17160814Ssimon 18160814Ssimon.global OPENSSL_atomic_add# 19160814Ssimon.proc OPENSSL_atomic_add# 20160814Ssimon.align 32 21160814SsimonOPENSSL_atomic_add: 22160814Ssimon{ .mii; ld4 r2=[r32] 23160814Ssimon nop.i 0 24160814Ssimon nop.i 0 };; 25160814Ssimon.Lspin: 26160814Ssimon{ .mii; mov ar.ccv=r2 27160814Ssimon add r8=r2,r33 28160814Ssimon mov r3=r2 };; 29238405Sjkim{ .mmi; mf;; 30160814Ssimon cmpxchg4.acq r2=[r32],r8,ar.ccv 31160814Ssimon nop.i 0 };; 32160814Ssimon{ .mib; cmp.ne p6,p0=r2,r3 33160814Ssimon nop.i 0 34160814Ssimon(p6) br.dpnt .Lspin };; 35160814Ssimon{ .mib; nop.m 0 36160814Ssimon sxt4 r8=r8 37160814Ssimon br.ret.sptk.many b0 };; 38160814Ssimon.endp OPENSSL_atomic_add# 39160814Ssimon 40160814Ssimon// Returns a structure comprising pointer to the top of stack of 41160814Ssimon// the caller and pointer beyond backing storage for the current 42160814Ssimon// register frame. The latter is required, because it might be 43160814Ssimon// insufficient to wipe backing storage for the current frame 44160814Ssimon// (as this procedure does), one might have to go further, toward 45160814Ssimon// higher addresses to reach for whole "retroactively" saved 46160814Ssimon// context... 47160814Ssimon.global OPENSSL_wipe_cpu# 48160814Ssimon.proc OPENSSL_wipe_cpu# 49160814Ssimon.align 32 50160814SsimonOPENSSL_wipe_cpu: 51160814Ssimon .prologue 52160814Ssimon .fframe 0 53160814Ssimon .save ar.pfs,r2 54160814Ssimon .save ar.lc,r3 55160814Ssimon{ .mib; alloc r2=ar.pfs,0,96,0,96 56160814Ssimon mov r3=ar.lc 57160814Ssimon brp.loop.imp .L_wipe_top,.L_wipe_end-16 58160814Ssimon };; 59160814Ssimon{ .mii; mov r9=ar.bsp 60160814Ssimon mov r8=pr 61160814Ssimon mov ar.lc=96 };; 62160814Ssimon .body 63160814Ssimon{ .mii; add r9=96*8-8,r9 64160814Ssimon mov ar.ec=1 };; 65160814Ssimon 66160814Ssimon// One can sweep double as fast, but then we can't quarantee 67160814Ssimon// that backing storage is wiped... 68160814Ssimon.L_wipe_top: 69160814Ssimon{ .mfi; st8 [r9]=r0,-8 70160814Ssimon mov f127=f0 71160814Ssimon mov r127=r0 } 72160814Ssimon{ .mfb; nop.m 0 73160814Ssimon nop.f 0 74160814Ssimon br.ctop.sptk .L_wipe_top };; 75160814Ssimon.L_wipe_end: 76160814Ssimon 77160814Ssimon{ .mfi; mov r11=r0 78160814Ssimon mov f6=f0 79160814Ssimon mov r14=r0 } 80160814Ssimon{ .mfi; mov r15=r0 81160814Ssimon mov f7=f0 82160814Ssimon mov r16=r0 } 83160814Ssimon{ .mfi; mov r17=r0 84160814Ssimon mov f8=f0 85160814Ssimon mov r18=r0 } 86160814Ssimon{ .mfi; mov r19=r0 87160814Ssimon mov f9=f0 88160814Ssimon mov r20=r0 } 89160814Ssimon{ .mfi; mov r21=r0 90160814Ssimon mov f10=f0 91160814Ssimon mov r22=r0 } 92160814Ssimon{ .mfi; mov r23=r0 93160814Ssimon mov f11=f0 94160814Ssimon mov r24=r0 } 95160814Ssimon{ .mfi; mov r25=r0 96160814Ssimon mov f12=f0 97160814Ssimon mov r26=r0 } 98160814Ssimon{ .mfi; mov r27=r0 99160814Ssimon mov f13=f0 100160814Ssimon mov r28=r0 } 101160814Ssimon{ .mfi; mov r29=r0 102160814Ssimon mov f14=f0 103160814Ssimon mov r30=r0 } 104160814Ssimon{ .mfi; mov r31=r0 105160814Ssimon mov f15=f0 106160814Ssimon nop.i 0 } 107160814Ssimon{ .mfi; mov f16=f0 } 108160814Ssimon{ .mfi; mov f17=f0 } 109160814Ssimon{ .mfi; mov f18=f0 } 110160814Ssimon{ .mfi; mov f19=f0 } 111160814Ssimon{ .mfi; mov f20=f0 } 112160814Ssimon{ .mfi; mov f21=f0 } 113160814Ssimon{ .mfi; mov f22=f0 } 114160814Ssimon{ .mfi; mov f23=f0 } 115160814Ssimon{ .mfi; mov f24=f0 } 116160814Ssimon{ .mfi; mov f25=f0 } 117160814Ssimon{ .mfi; mov f26=f0 } 118160814Ssimon{ .mfi; mov f27=f0 } 119160814Ssimon{ .mfi; mov f28=f0 } 120160814Ssimon{ .mfi; mov f29=f0 } 121160814Ssimon{ .mfi; mov f30=f0 } 122160814Ssimon{ .mfi; add r9=96*8+8,r9 123160814Ssimon mov f31=f0 124160814Ssimon mov pr=r8,0x1ffff } 125160814Ssimon{ .mib; mov r8=sp 126160814Ssimon mov ar.lc=r3 127160814Ssimon br.ret.sptk b0 };; 128160814Ssimon.endp OPENSSL_wipe_cpu# 129238405Sjkim 130238405Sjkim.global OPENSSL_cleanse# 131238405Sjkim.proc OPENSSL_cleanse# 132238405SjkimOPENSSL_cleanse: 133238405Sjkim{ .mib; cmp.eq p6,p0=0,r33 // len==0 134238405Sjkim#if defined(_HPUX_SOURCE) && !defined(_LP64) 135238405Sjkim addp4 r32=0,r32 136238405Sjkim#endif 137238405Sjkim(p6) br.ret.spnt b0 };; 138238405Sjkim{ .mib; and r2=7,r32 139238405Sjkim cmp.leu p6,p0=15,r33 // len>=15 140238405Sjkim(p6) br.cond.dptk .Lot };; 141238405Sjkim 142238405Sjkim.Little: 143238405Sjkim{ .mib; st1 [r32]=r0,1 144238405Sjkim cmp.ltu p6,p7=1,r33 } // len>1 145238405Sjkim{ .mbb; add r33=-1,r33 // len-- 146238405Sjkim(p6) br.cond.dptk .Little 147238405Sjkim(p7) br.ret.sptk.many b0 };; 148238405Sjkim 149238405Sjkim.Lot: 150238405Sjkim{ .mib; cmp.eq p6,p0=0,r2 151238405Sjkim(p6) br.cond.dptk .Laligned };; 152238405Sjkim{ .mmi; st1 [r32]=r0,1;; 153238405Sjkim and r2=7,r32 } 154238405Sjkim{ .mib; add r33=-1,r33 155238405Sjkim br .Lot };; 156238405Sjkim 157238405Sjkim.Laligned: 158238405Sjkim{ .mmi; st8 [r32]=r0,8 159238405Sjkim and r2=-8,r33 // len&~7 160238405Sjkim add r33=-8,r33 };; // len-=8 161238405Sjkim{ .mib; cmp.ltu p6,p0=8,r2 // ((len+8)&~7)>8 162238405Sjkim(p6) br.cond.dptk .Laligned };; 163238405Sjkim 164238405Sjkim{ .mbb; cmp.eq p6,p7=r0,r33 165238405Sjkim(p7) br.cond.dpnt .Little 166238405Sjkim(p6) br.ret.sptk.many b0 };; 167238405Sjkim.endp OPENSSL_cleanse# 168