1// Works on all IA-64 platforms: Linux, HP-UX, Win64i...
2// On Win64i compile with ias.exe.
3.text
4.global	OPENSSL_rdtsc#
5.proc	OPENSSL_rdtsc#
6OPENSSL_rdtsc:
7{ .mib;	mov			r8=ar.itc
8	br.ret.sptk.many	b0		};;
9.endp   OPENSSL_rdtsc#
10
11.global	OPENSSL_atomic_add#
12.proc	OPENSSL_atomic_add#
13.align	32
14OPENSSL_atomic_add:
15{ .mii;	ld4		r2=[r32]
16	nop.i		0
17	nop.i		0		};;
18.Lspin:
19{ .mii;	mov		ar.ccv=r2
20	add		r8=r2,r33
21	mov		r3=r2		};;
22{ .mmi;	mf
23	cmpxchg4.acq	r2=[r32],r8,ar.ccv
24	nop.i		0		};;
25{ .mib;	cmp.ne		p6,p0=r2,r3
26	nop.i		0
27(p6)	br.dpnt		.Lspin		};;
28{ .mib;	nop.m		0
29	sxt4		r8=r8
30	br.ret.sptk.many	b0	};;
31.endp	OPENSSL_atomic_add#
32
33// Returns a structure comprising pointer to the top of stack of
34// the caller and pointer beyond backing storage for the current
35// register frame. The latter is required, because it might be
36// insufficient to wipe backing storage for the current frame
37// (as this procedure does), one might have to go further, toward
38// higher addresses to reach for whole "retroactively" saved
39// context...
40.global	OPENSSL_wipe_cpu#
41.proc	OPENSSL_wipe_cpu#
42.align	32
43OPENSSL_wipe_cpu:
44	.prologue
45	.fframe	0
46	.save	ar.pfs,r2
47	.save	ar.lc,r3
48{ .mib;	alloc		r2=ar.pfs,0,96,0,96
49	mov		r3=ar.lc
50	brp.loop.imp	.L_wipe_top,.L_wipe_end-16
51					};;
52{ .mii;	mov		r9=ar.bsp
53	mov		r8=pr
54	mov		ar.lc=96	};;
55	.body
56{ .mii;	add		r9=96*8-8,r9
57	mov		ar.ec=1		};;
58
59// One can sweep double as fast, but then we can't quarantee
60// that backing storage is wiped...
61.L_wipe_top:
62{ .mfi;	st8		[r9]=r0,-8
63	mov		f127=f0
64	mov		r127=r0		}
65{ .mfb;	nop.m		0
66	nop.f		0
67	br.ctop.sptk	.L_wipe_top	};;
68.L_wipe_end:
69
70{ .mfi;	mov		r11=r0
71	mov		f6=f0
72	mov		r14=r0		}
73{ .mfi;	mov		r15=r0
74	mov		f7=f0
75	mov		r16=r0		}
76{ .mfi;	mov		r17=r0
77	mov		f8=f0
78	mov		r18=r0		}
79{ .mfi;	mov		r19=r0
80	mov		f9=f0
81	mov		r20=r0		}
82{ .mfi;	mov		r21=r0
83	mov		f10=f0
84	mov		r22=r0		}
85{ .mfi;	mov		r23=r0
86	mov		f11=f0
87	mov		r24=r0		}
88{ .mfi;	mov		r25=r0
89	mov		f12=f0
90	mov		r26=r0		}
91{ .mfi;	mov		r27=r0
92	mov		f13=f0
93	mov		r28=r0		}
94{ .mfi;	mov		r29=r0
95	mov		f14=f0
96	mov		r30=r0		}
97{ .mfi;	mov		r31=r0
98	mov		f15=f0
99	nop.i		0		}
100{ .mfi;	mov		f16=f0		}
101{ .mfi;	mov		f17=f0		}
102{ .mfi;	mov		f18=f0		}
103{ .mfi;	mov		f19=f0		}
104{ .mfi;	mov		f20=f0		}
105{ .mfi;	mov		f21=f0		}
106{ .mfi;	mov		f22=f0		}
107{ .mfi;	mov		f23=f0		}
108{ .mfi;	mov		f24=f0		}
109{ .mfi;	mov		f25=f0		}
110{ .mfi;	mov		f26=f0		}
111{ .mfi;	mov		f27=f0		}
112{ .mfi;	mov		f28=f0		}
113{ .mfi;	mov		f29=f0		}
114{ .mfi;	mov		f30=f0		}
115{ .mfi;	add		r9=96*8+8,r9
116	mov		f31=f0
117	mov		pr=r8,0x1ffff	}
118{ .mib;	mov		r8=sp
119	mov		ar.lc=r3
120	br.ret.sptk	b0		};;
121.endp	OPENSSL_wipe_cpu#
122