1160814Ssimon// Works on all IA-64 platforms: Linux, HP-UX, Win64i...
2160814Ssimon// On Win64i compile with ias.exe.
3160814Ssimon.text
4238405Sjkim
5238405Sjkim.global	OPENSSL_cpuid_setup#
6238405Sjkim.proc	OPENSSL_cpuid_setup#
7238405SjkimOPENSSL_cpuid_setup:
8238405Sjkim{ .mib;	br.ret.sptk.many	b0		};;
9238405Sjkim.endp	OPENSSL_cpuid_setup#
10238405Sjkim
11160814Ssimon.global	OPENSSL_rdtsc#
12160814Ssimon.proc	OPENSSL_rdtsc#
13160814SsimonOPENSSL_rdtsc:
14160814Ssimon{ .mib;	mov			r8=ar.itc
15160814Ssimon	br.ret.sptk.many	b0		};;
16160814Ssimon.endp   OPENSSL_rdtsc#
17160814Ssimon
18160814Ssimon.global	OPENSSL_atomic_add#
19160814Ssimon.proc	OPENSSL_atomic_add#
20160814Ssimon.align	32
21160814SsimonOPENSSL_atomic_add:
22160814Ssimon{ .mii;	ld4		r2=[r32]
23160814Ssimon	nop.i		0
24160814Ssimon	nop.i		0		};;
25160814Ssimon.Lspin:
26160814Ssimon{ .mii;	mov		ar.ccv=r2
27160814Ssimon	add		r8=r2,r33
28160814Ssimon	mov		r3=r2		};;
29238405Sjkim{ .mmi;	mf;;
30160814Ssimon	cmpxchg4.acq	r2=[r32],r8,ar.ccv
31160814Ssimon	nop.i		0		};;
32160814Ssimon{ .mib;	cmp.ne		p6,p0=r2,r3
33160814Ssimon	nop.i		0
34160814Ssimon(p6)	br.dpnt		.Lspin		};;
35160814Ssimon{ .mib;	nop.m		0
36160814Ssimon	sxt4		r8=r8
37160814Ssimon	br.ret.sptk.many	b0	};;
38160814Ssimon.endp	OPENSSL_atomic_add#
39160814Ssimon
40160814Ssimon// Returns a structure comprising pointer to the top of stack of
41160814Ssimon// the caller and pointer beyond backing storage for the current
42160814Ssimon// register frame. The latter is required, because it might be
43160814Ssimon// insufficient to wipe backing storage for the current frame
44160814Ssimon// (as this procedure does), one might have to go further, toward
45160814Ssimon// higher addresses to reach for whole "retroactively" saved
46160814Ssimon// context...
47160814Ssimon.global	OPENSSL_wipe_cpu#
48160814Ssimon.proc	OPENSSL_wipe_cpu#
49160814Ssimon.align	32
50160814SsimonOPENSSL_wipe_cpu:
51160814Ssimon	.prologue
52160814Ssimon	.fframe	0
53160814Ssimon	.save	ar.pfs,r2
54160814Ssimon	.save	ar.lc,r3
55160814Ssimon{ .mib;	alloc		r2=ar.pfs,0,96,0,96
56160814Ssimon	mov		r3=ar.lc
57160814Ssimon	brp.loop.imp	.L_wipe_top,.L_wipe_end-16
58160814Ssimon					};;
59160814Ssimon{ .mii;	mov		r9=ar.bsp
60160814Ssimon	mov		r8=pr
61160814Ssimon	mov		ar.lc=96	};;
62160814Ssimon	.body
63160814Ssimon{ .mii;	add		r9=96*8-8,r9
64160814Ssimon	mov		ar.ec=1		};;
65160814Ssimon
66160814Ssimon// One can sweep double as fast, but then we can't quarantee
67160814Ssimon// that backing storage is wiped...
68160814Ssimon.L_wipe_top:
69160814Ssimon{ .mfi;	st8		[r9]=r0,-8
70160814Ssimon	mov		f127=f0
71160814Ssimon	mov		r127=r0		}
72160814Ssimon{ .mfb;	nop.m		0
73160814Ssimon	nop.f		0
74160814Ssimon	br.ctop.sptk	.L_wipe_top	};;
75160814Ssimon.L_wipe_end:
76160814Ssimon
77160814Ssimon{ .mfi;	mov		r11=r0
78160814Ssimon	mov		f6=f0
79160814Ssimon	mov		r14=r0		}
80160814Ssimon{ .mfi;	mov		r15=r0
81160814Ssimon	mov		f7=f0
82160814Ssimon	mov		r16=r0		}
83160814Ssimon{ .mfi;	mov		r17=r0
84160814Ssimon	mov		f8=f0
85160814Ssimon	mov		r18=r0		}
86160814Ssimon{ .mfi;	mov		r19=r0
87160814Ssimon	mov		f9=f0
88160814Ssimon	mov		r20=r0		}
89160814Ssimon{ .mfi;	mov		r21=r0
90160814Ssimon	mov		f10=f0
91160814Ssimon	mov		r22=r0		}
92160814Ssimon{ .mfi;	mov		r23=r0
93160814Ssimon	mov		f11=f0
94160814Ssimon	mov		r24=r0		}
95160814Ssimon{ .mfi;	mov		r25=r0
96160814Ssimon	mov		f12=f0
97160814Ssimon	mov		r26=r0		}
98160814Ssimon{ .mfi;	mov		r27=r0
99160814Ssimon	mov		f13=f0
100160814Ssimon	mov		r28=r0		}
101160814Ssimon{ .mfi;	mov		r29=r0
102160814Ssimon	mov		f14=f0
103160814Ssimon	mov		r30=r0		}
104160814Ssimon{ .mfi;	mov		r31=r0
105160814Ssimon	mov		f15=f0
106160814Ssimon	nop.i		0		}
107160814Ssimon{ .mfi;	mov		f16=f0		}
108160814Ssimon{ .mfi;	mov		f17=f0		}
109160814Ssimon{ .mfi;	mov		f18=f0		}
110160814Ssimon{ .mfi;	mov		f19=f0		}
111160814Ssimon{ .mfi;	mov		f20=f0		}
112160814Ssimon{ .mfi;	mov		f21=f0		}
113160814Ssimon{ .mfi;	mov		f22=f0		}
114160814Ssimon{ .mfi;	mov		f23=f0		}
115160814Ssimon{ .mfi;	mov		f24=f0		}
116160814Ssimon{ .mfi;	mov		f25=f0		}
117160814Ssimon{ .mfi;	mov		f26=f0		}
118160814Ssimon{ .mfi;	mov		f27=f0		}
119160814Ssimon{ .mfi;	mov		f28=f0		}
120160814Ssimon{ .mfi;	mov		f29=f0		}
121160814Ssimon{ .mfi;	mov		f30=f0		}
122160814Ssimon{ .mfi;	add		r9=96*8+8,r9
123160814Ssimon	mov		f31=f0
124160814Ssimon	mov		pr=r8,0x1ffff	}
125160814Ssimon{ .mib;	mov		r8=sp
126160814Ssimon	mov		ar.lc=r3
127160814Ssimon	br.ret.sptk	b0		};;
128160814Ssimon.endp	OPENSSL_wipe_cpu#
129238405Sjkim
130238405Sjkim.global	OPENSSL_cleanse#
131238405Sjkim.proc	OPENSSL_cleanse#
132238405SjkimOPENSSL_cleanse:
133238405Sjkim{ .mib;	cmp.eq		p6,p0=0,r33	    // len==0
134238405Sjkim#if defined(_HPUX_SOURCE) && !defined(_LP64)
135238405Sjkim	addp4		r32=0,r32
136238405Sjkim#endif
137238405Sjkim(p6)	br.ret.spnt	b0		};;
138238405Sjkim{ .mib;	and		r2=7,r32
139238405Sjkim	cmp.leu		p6,p0=15,r33	    // len>=15
140238405Sjkim(p6)	br.cond.dptk	.Lot		};;
141238405Sjkim
142238405Sjkim.Little:
143238405Sjkim{ .mib;	st1		[r32]=r0,1
144238405Sjkim	cmp.ltu		p6,p7=1,r33	}  // len>1
145238405Sjkim{ .mbb;	add		r33=-1,r33	   // len--
146238405Sjkim(p6)	br.cond.dptk	.Little
147238405Sjkim(p7)	br.ret.sptk.many	b0	};;
148238405Sjkim
149238405Sjkim.Lot:
150238405Sjkim{ .mib;	cmp.eq		p6,p0=0,r2
151238405Sjkim(p6)	br.cond.dptk	.Laligned	};;
152238405Sjkim{ .mmi;	st1		[r32]=r0,1;;
153238405Sjkim	and		r2=7,r32	}
154238405Sjkim{ .mib;	add		r33=-1,r33
155238405Sjkim	br		.Lot		};;
156238405Sjkim
157238405Sjkim.Laligned:
158238405Sjkim{ .mmi;	st8		[r32]=r0,8
159238405Sjkim	and		r2=-8,r33	    // len&~7
160238405Sjkim	add		r33=-8,r33	};; // len-=8
161238405Sjkim{ .mib;	cmp.ltu		p6,p0=8,r2	    // ((len+8)&~7)>8
162238405Sjkim(p6)	br.cond.dptk	.Laligned	};;
163238405Sjkim
164238405Sjkim{ .mbb;	cmp.eq		p6,p7=r0,r33
165238405Sjkim(p7)	br.cond.dpnt	.Little
166238405Sjkim(p6)	br.ret.sptk.many	b0	};;
167238405Sjkim.endp	OPENSSL_cleanse#
168