1160157Smarcel// Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P.
2129059Smarcel// Permission is hereby granted, free of charge, to any person
3129059Smarcel// obtaining a copy of this software and associated documentation
4129059Smarcel// files (the "Software"), to deal in the Software without
5129059Smarcel// restriction, including without limitation the rights to use,
6129059Smarcel// copy, modify, merge, publish, distribute, sublicense, and/or sell
7129059Smarcel// copies of the Software, and to permit persons to whom the
8129059Smarcel// Software is furnished to do so, subject to the following
9129059Smarcel// conditions:
10129059Smarcel//
11129059Smarcel// The above copyright notice and this permission notice shall be
12129059Smarcel// included in all copies or substantial portions of the Software.
13129059Smarcel//
14129059Smarcel// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15129059Smarcel// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16129059Smarcel// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17129059Smarcel// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18129059Smarcel// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19129059Smarcel// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20129059Smarcel// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21129059Smarcel// OTHER DEALINGS IN THE SOFTWARE.
22115013Smarcel
23115013Smarcel#ifdef _LP64
24115013Smarcel#define SWIZZLE add
25115013Smarcel#define STPTR st8
26115013Smarcel#else
27115013Smarcel#define SWIZZLE addp4
28115013Smarcel#define STPTR st4
29115013Smarcel#endif
30115013Smarcel
31115013SmarcelrRP	= r14
32115013SmarcelrPFS	= r15
33115013SmarcelrUNAT	= r16
34115013SmarcelrRNAT	= r17
35115013SmarcelrENV0	= r18
36115013SmarcelrENV1	= r19
37115013SmarcelrENV2	= r20
38115013SmarcelrNSLOT	= r21
39115013SmarcelrBSP	= r22
40115013SmarcelrPBSP	= r23
41115013SmarcelrRSC	= r24
42115013SmarcelrNATP	= r25
43115013SmarcelrBIAS	= r26
44115013SmarcelrRSC0	= r27
45115013SmarcelrTMP1	= r28
46115013SmarcelrTMP2	= r29
47115013SmarcelrTMP3	= r30
48115013SmarcelrTMP4	= r31
49115013SmarcelrTMP5	= r8
50160157SmarcelrMYPFS	= r9
51160157SmarcelrPSP	= r10
52115013Smarcel
53160157SmarcelVALID_IP      = 1
54160157SmarcelVALID_SP      = 1 << 1
55160157SmarcelVALID_BSP     = 1 << 2
56160157SmarcelVALID_CFM     = 1 << 3
57160157SmarcelVALID_PREDS   = 1 << 7
58160157SmarcelVALID_PRIUNAT = 1 << 8
59160157SmarcelVALID_RNAT    = 1 << 10
60160157SmarcelVALID_UNAT    = 1 << 11
61160157SmarcelVALID_FPSR    = 1 << 12
62160157SmarcelVALID_LC      = 1 << 13
63160157SmarcelVALID_GRS     = 0xf << 16
64160157SmarcelVALID_BRS     = 0x1f << 20
65160157SmarcelVALID_BASIC4  = VALID_IP | VALID_SP | VALID_BSP | VALID_CFM
66160157SmarcelVALID_SPEC    = VALID_PREDS | VALID_PRIUNAT | VALID_RNAT | VALID_UNAT | VALID_FPSR | VALID_LC
67160157SmarcelVALID_REGS    = VALID_BASIC4 | VALID_SPEC | VALID_GRS | VALID_BRS
68160157SmarcelVALID_FRS     = 0xfffff
69160157Smarcel// valid_regs and valid_frs are separate unsigned int fields.
70160157Smarcel// In order to store them with a single st8, we need to know
71160157Smarcel// the endianness.
72160157Smarcel#ifdef __LITTLE_ENDIAN__
73160157SmarcelVALID_BITS   = (VALID_FRS << 32) | VALID_REGS
74160157Smarcel#else
75160157SmarcelVALID_BITS   = (VALID_REGS << 32) | VALID_FRS
76160157Smarcel#endif
77129059Smarcel
78115013Smarcel	.text
79160157Smarcel
80160157Smarcel// int uwx_self_init_context(struct uwx_env *env);
81160157Smarcel//
82160157Smarcel// Stores a snapshot of the caller's context in the uwx_env structure.
83160157Smarcel
84115013Smarcel	.proc	uwx_self_init_context
85115013Smarcel	.global uwx_self_init_context
86115013Smarceluwx_self_init_context:
87115013Smarcel	.prologue
88115013Smarcel	alloc	rPFS = ar.pfs, 1, 0, 0, 0
89115013Smarcel	mov	rUNAT = ar.unat
90115013Smarcel	.body
91115013Smarcel	SWIZZLE	rENV0 = r0, r32		// rENV0 = &env
92115013Smarcel	;;
93115013Smarcel	flushrs
94115013Smarcel	extr.u	rNSLOT = rPFS, 7, 7 	// nslots = pfs.sol
95115013Smarcel	mov	rRP = b0
96115013Smarcel	;;
97115013Smarcel	mov	rRSC = ar.rsc
98129059Smarcel	add	rENV1 = 136, rENV0	// rENV1 = &env->context.gr[0]
99129059Smarcel	add	rENV2 = 144, rENV0	// rENV2 = &env->context.gr[1]
100115013Smarcel	;;
101115013Smarcel	and	rRSC0 = -4, rRSC	// clear ar.rsc.mode
102115013Smarcel	adds	rNATP = 0x1f8, r0
103115013Smarcel	mov	rTMP1 = b1
104115013Smarcel	;;
105129059Smarcel	st8.spill [rENV1] = r4, 16	// env+136: r4
106129059Smarcel	st8.spill [rENV2] = r5, 16	// env+144: r5
107115013Smarcel	mov	rTMP2 = b2
108115013Smarcel	;;
109129059Smarcel	st8.spill [rENV1] = r6, 16	// env+152: r6
110129059Smarcel	st8.spill [rENV2] = r7, 16	// env+160: r7
111115013Smarcel	mov	rTMP3 = b3
112115013Smarcel	;;
113129059Smarcel	st8	[rENV1] = rTMP1, 16	// env+168: b1
114129059Smarcel	st8	[rENV2] = rTMP2, 16	// env+176: b2
115115013Smarcel	mov	rTMP1 = b4
116115013Smarcel	;;
117129059Smarcel	st8	[rENV1] = rTMP3, 16	// env+184: b3
118129059Smarcel	st8	[rENV2] = rTMP1, 16	// env+192: b4
119115013Smarcel	mov	rTMP2 = b5
120115013Smarcel	;;
121129059Smarcel	st8	[rENV1] = rTMP2		// env+200: b5
122115013Smarcel	mov	ar.rsc = rRSC0		// enforced lazy mode
123115013Smarcel	add	rENV1 = 8, rENV0
124115013Smarcel	;;
125115013Smarcel	mov	rRNAT = ar.rnat		// get copy of ar.rnat
126129059Smarcel	movl	rTMP1 = VALID_BITS	// valid_regs: ip, sp, bsp, cfm,
127160157Smarcel					// preds, priunat, rnat, unat, fpsr,
128129059Smarcel					// lc, grs, brs
129160157Smarcel					// = 0x1ff3d8f00000000
130115013Smarcel	;;
131115013Smarcel	mov	ar.rsc = rRSC		// restore ar.rsc
132115013Smarcel	mov	rBSP = ar.bsp
133129059Smarcel	add	rTMP3 = 136, rENV0	// spill_loc = &env->context.gr[0]
134115013Smarcel	;;
135115013Smarcel	mov	rTMP2 = ar.unat
136115013Smarcel	nop
137115013Smarcel	extr.u	rTMP3 = rTMP3, 3, 6	// bitpos = spill_loc{8:3}
138115013Smarcel	;;
139160157Smarcel	and	rBIAS = rBSP, rNATP	// bias = (bsp & 0x1f8) ...
140115013Smarcel	sub	rTMP4 = 64, rTMP3	// (64 - bitpos)
141115013Smarcel	shr	rTMP5 = rTMP2, rTMP3	// (unat >> bitpos)
142115013Smarcel	;;
143115013Smarcel	nop
144160157Smarcel	extr.u	rBIAS = rBIAS, 3, 6	//   ... div 8
145115013Smarcel	shl	rTMP2 = rTMP2, rTMP4	// (unat << (64 - bitpos))
146115013Smarcel	;;
147115013Smarcel	or	rTMP2 = rTMP2, rTMP5	// rotate_right(unat, bitpos)
148160157Smarcel	nop
149115013Smarcel	mov	rTMP4 = pr
150115013Smarcel	;;
151115013Smarcel	st8	[rENV0] = rTMP1, 16	// env+0: valid_regs mask
152115013Smarcel	st8	[rENV1] = rRP, 24	// env+8: ip (my rp)
153160157Smarcel	sub	rBIAS = rNSLOT, rBIAS	// bias = nslots - bias
154115013Smarcel	;;
155160157Smarcel	cmp.lt	p6, p0 = 0, rBIAS	// if (0 < bias) ...
156160157Smarcel	cmp.lt	p7, p0 = 63, rBIAS	// if (63 < bias) ...
157115013Smarcel	;;
158115013Smarcel	st8	[rENV0] = r12, 48	// env+16: sp
159115013Smarcel	st8	[rENV1] = rPFS, 40	// env+32: cfm (my pfs)
160115013Smarcel(p6)	add	rNSLOT = 1, rNSLOT	//   ... nslots++
161115013Smarcel	;;
162115013Smarcel	st8	[rENV0] = rTMP4, 24	// env+64: preds
163115013Smarcel	st8	[rENV1] = rTMP2, 24	// env+72: priunat
164115013Smarcel(p7)	add	rNSLOT = 1, rNSLOT	//   ... nslots++
165115013Smarcel	;;
166160157Smarcel	st8	[rENV0] = rRNAT, -64	// env+88: ar.rnat
167160157Smarcel	st8	[rENV1] = rUNAT, 8	// env+96: ar.unat
168115013Smarcel	dep.z	rTMP3 = rNSLOT, 3, 7 	// (nslots << 3)
169115013Smarcel	;;
170115013Smarcel	sub	rPBSP = rBSP, rTMP3	// prev_bsp = bsp - (nslots << 3)
171115013Smarcel	mov	rTMP3 = ar.fpsr
172115013Smarcel	mov	rTMP1 = ar.lc
173115013Smarcel	;;
174160157Smarcel	st8	[rENV0] = rPBSP, 184	// env+24: bsp (my prev bsp)
175160157Smarcel	st8	[rENV1] = rTMP3, 8	// env+104: ar.fpsr
176115013Smarcel	add	rENV2 = 320, rENV2	// rENV2 = &env->context.rstate
177115013Smarcel	;;
178160157Smarcel	st8	[rENV1] = rTMP1, 112	// env+112: ar.lc
179129059Smarcel	STPTR	[rENV2] = r0		// env+528: env->rstate = 0
180115013Smarcel	nop
181115013Smarcel	;;
182160157Smarcel	// THIS CODE NEEDS TO BE SCHEDULED!!!
183160157Smarcel	stf.spill [rENV0] = f2, 32	// env+208: f2
184160157Smarcel	stf.spill [rENV1] = f3, 32	// env+224: f3
185160157Smarcel	;;
186160157Smarcel	stf.spill [rENV0] = f4, 32	// env+240: f4
187160157Smarcel	stf.spill [rENV1] = f5, 32	// env+256: f5
188160157Smarcel	;;
189160157Smarcel	stf.spill [rENV0] = f16, 32	// env+272: f16
190160157Smarcel	stf.spill [rENV1] = f17, 32	// env+288: f17
191160157Smarcel	;;
192160157Smarcel	stf.spill [rENV0] = f18, 32	// env+304: f16
193160157Smarcel	stf.spill [rENV1] = f19, 32	// env+320: f17
194160157Smarcel	;;
195160157Smarcel	stf.spill [rENV0] = f20, 32	// env+336: f16
196160157Smarcel	stf.spill [rENV1] = f21, 32	// env+352: f17
197160157Smarcel	;;
198160157Smarcel	stf.spill [rENV0] = f22, 32	// env+368: f16
199160157Smarcel	stf.spill [rENV1] = f23, 32	// env+384: f17
200160157Smarcel	;;
201160157Smarcel	stf.spill [rENV0] = f24, 32	// env+400: f16
202160157Smarcel	stf.spill [rENV1] = f25, 32	// env+416: f17
203160157Smarcel	;;
204160157Smarcel	stf.spill [rENV0] = f26, 32	// env+432: f16
205160157Smarcel	stf.spill [rENV1] = f27, 32	// env+448: f17
206160157Smarcel	;;
207160157Smarcel	stf.spill [rENV0] = f28, 32	// env+464: f16
208160157Smarcel	stf.spill [rENV1] = f29, 32	// env+480: f17
209160157Smarcel	;;
210160157Smarcel	stf.spill [rENV0] = f30, 32	// env+496: f16
211160157Smarcel	stf.spill [rENV1] = f31, 32	// env+512: f17
212160157Smarcel	;;
213115013Smarcel	mov	ar.unat = rUNAT
214115013Smarcel	mov	ret0 = r0		// return UWX_OK
215160157Smarcel	br.ret.sptk b0
216115013Smarcel	.endp
217115013Smarcel
218160157Smarcel// uwx_self_install_context(
219160157Smarcel//		struct uwx_env *env,
220160157Smarcel//		uint64_t r15,
221160157Smarcel//		uint64_t r16,
222160157Smarcel//		uint64_t r17,
223160157Smarcel//		uint64_t r18,
224160157Smarcel//		uint64_t ret
225160157Smarcel//		);
226160157Smarcel//
227160157Smarcel// Installs the given context, and sets the landing pad binding
228160157Smarcel// registers r15-r18 to the values given.
229160157Smarcel// Returns the value "ret" to the new context (for testing --
230160157Smarcel// when transferring to a landing pad, the new context won't
231160157Smarcel// care about the return value).
232160157Smarcel
233160157Smarcel	.proc	uwx_self_install_context
234160157Smarcel	.global uwx_self_install_context
235160157Smarceluwx_self_install_context:
236160157Smarcel	.prologue
237160157Smarcel	alloc	rMYPFS = ar.pfs, 6, 0, 0, 0
238160157Smarcel	.body
239160157Smarcel	SWIZZLE	rENV0 = r0, r32		// rENV0 = &env
240160157Smarcel	;;
241160157Smarcel
242160157Smarcel	// THIS CODE NEEDS TO BE SCHEDULED!!!
243160157Smarcel
244160157Smarcel	// Restore GR 4-7 and ar.unat
245160157Smarcel	add	rENV1 = 136, rENV0	// &env->context.gr[0]
246160157Smarcel	add	rENV2 = 72, rENV0	// &env->context.priunat
247160157Smarcel	;;
248160157Smarcel	ld8	rTMP2 = [rENV2], 24	// env+72: priunat
249160157Smarcel	extr.u	rTMP3 = rENV1, 3, 6	// bitpos = spill_loc{8:3}
250160157Smarcel	;;
251160157Smarcel	ld8	rUNAT = [rENV2], 48	// env+96: ar.unat
252160157Smarcel	sub	rTMP4 = 64, rTMP3	// (64 - bitpos)
253160157Smarcel	shl	rTMP5 = rTMP2, rTMP3	// (unat << bitpos)
254160157Smarcel	;;
255160157Smarcel	shr	rTMP2 = rTMP2, rTMP4	// (unat >> (64 - bitpos))
256160157Smarcel	;;
257160157Smarcel	or	rTMP2 = rTMP2, rTMP5	// rotate_left(unat, bitpos)
258160157Smarcel	;;
259160157Smarcel	mov	ar.unat = rTMP2		// put priunat in place
260160157Smarcel	;;
261160157Smarcel	ld8.fill r4 = [rENV1], 16	// env+136: r4
262160157Smarcel	ld8.fill r5 = [rENV2], 16	// env+144: r5
263160157Smarcel	;;
264160157Smarcel	ld8.fill r6 = [rENV1], 16	// env+152: r6
265160157Smarcel	ld8.fill r7 = [rENV2], 16	// env+160: r7
266160157Smarcel	;;
267160157Smarcel	mov	ar.unat = rUNAT		// restore real ar.unat
268160157Smarcel
269160157Smarcel	// Restore BR 1-5
270160157Smarcel	ld8	rTMP1 = [rENV1], 16	// env+168: b1
271160157Smarcel	ld8	rTMP2 = [rENV2], 16	// env+176: b2
272160157Smarcel	;;
273160157Smarcel	ld8	rTMP3 = [rENV1], 16	// env+184: b3
274160157Smarcel	ld8	rTMP4 = [rENV2], -168	// env+192: b4
275160157Smarcel	mov	b1 = rTMP1
276160157Smarcel	;;
277160157Smarcel	ld8	rTMP1 = [rENV1], -168	// env+200: b5
278160157Smarcel	mov	b2 = rTMP2
279160157Smarcel	mov	b3 = rTMP3
280160157Smarcel	mov	b4 = rTMP4
281160157Smarcel	;;
282160157Smarcel	mov	b5 = rTMP1
283160157Smarcel
284160157Smarcel	// Restore ar.bsp, ar.pfs, and ar.rnat
285160157Smarcel	ld8	rPFS = [rENV1], 56	// env+32: cfm (+saved ar.ec)
286160157Smarcel	mov	rRSC = ar.rsc
287160157Smarcel	adds	rBIAS = 0x1f8, r0
288160157Smarcel	;;
289160157Smarcel	flushrs
290160157Smarcel	ld8	rRNAT = [rENV1], -24	// env+88: ar.rnat
291160157Smarcel	ld8	rPBSP = [rENV2], 88	// env+24: prev_bsp
292160157Smarcel	and	rRSC0 = -4, rRSC	// clear ar.rsc.mode
293160157Smarcel	;;
294160157Smarcel	mov	ar.rsc = rRSC0		// enforced lazy mode
295160157Smarcel	extr.u	rNSLOT = rPFS, 7, 7 	// nslots = pfs.sol
296160157Smarcel	;;
297160157Smarcel	invala
298160157Smarcel	and	rBIAS = rPBSP, rBIAS	// bias = prev_bsp & 0x1f8 ...
299160157Smarcel	;;
300160157Smarcel	extr.u	rBIAS = rBIAS, 3, 6	// ... div 8
301160157Smarcel	;;
302160157Smarcel	add	rBIAS = rNSLOT, rBIAS	// bias += nslots
303160157Smarcel	;;
304160157Smarcel	cmp.lt	p6, p0 = 63, rBIAS	// if (63 < bias) ...
305160157Smarcel	cmp.lt	p7, p0 = 126, rBIAS	// if (126 < bias) ...
306160157Smarcel	;;
307160157Smarcel(p6)	add	rNSLOT = 1, rNSLOT	//   ... nslots++
308160157Smarcel	;;
309160157Smarcel(p7)	add	rNSLOT = 1, rNSLOT	//   ... nslots++
310160157Smarcel	;;
311160157Smarcel	dep.z	rTMP3 = rNSLOT, 3, 7 	// (nslots << 3)
312160157Smarcel	;;
313160157Smarcel	add	rBSP = rPBSP, rTMP3	// bsp = prev_bsp + (nslots << 3)
314160157Smarcel	;;
315160157Smarcel	mov	ar.bspstore = rBSP	// restore ar.bsp
316160157Smarcel	;;
317160157Smarcel	mov	ar.rnat = rRNAT		// restore ar.rnat
318160157Smarcel	mov	ar.pfs = rPFS		// restore ar.pfs
319160157Smarcel	;;
320160157Smarcel	mov	ar.rsc = rRSC		// restore ar.rsc
321160157Smarcel
322160157Smarcel	// Restore preds and ar.lc
323160157Smarcel	ld8	rTMP1 = [rENV1], -56	// env+64: preds
324160157Smarcel	ld8	rTMP2 = [rENV2], -96	// env+112: ar.lc
325160157Smarcel	;;
326160157Smarcel	mov	pr = rTMP1
327160157Smarcel	mov	ar.lc = rTMP2
328160157Smarcel
329160157Smarcel	// Get previous sp and ip
330160157Smarcel	ld8	rRP = [rENV1], 96	// env+8: ip (my rp)
331160157Smarcel	ld8	rPSP = [rENV2], 112	// env+16: sp
332160157Smarcel	;;
333160157Smarcel
334160157Smarcel	// Restore ar.fpsr and gp
335160157Smarcel	ld8	rTMP1 = [rENV1], 104	// env+104: ar.fpsr
336160157Smarcel	ld8	r1 = [rENV2], 96	// env+128: gp
337160157Smarcel	;;
338160157Smarcel	mov	ar.fpsr = rTMP1		// restore ar.fpsr
339160157Smarcel
340160157Smarcel	// Restore FR 2-5 and 16-31
341160157Smarcel	ldf.fill f2 = [rENV1], 32	// env+208: f2
342160157Smarcel	ldf.fill f3 = [rENV2], 32	// env+224: f3
343160157Smarcel	;;
344160157Smarcel	ldf.fill f4 = [rENV1], 32	// env+240: f4
345160157Smarcel	ldf.fill f5 = [rENV2], 32	// env+256: f5
346160157Smarcel	;;
347160157Smarcel	ldf.fill f16 = [rENV1], 32	// env+272: f16
348160157Smarcel	ldf.fill f17 = [rENV2], 32	// env+288: f17
349160157Smarcel	;;
350160157Smarcel	ldf.fill f18 = [rENV1], 32	// env+304: f16
351160157Smarcel	ldf.fill f19 = [rENV2], 32	// env+320: f17
352160157Smarcel	;;
353160157Smarcel	ldf.fill f20 = [rENV1], 32	// env+336: f16
354160157Smarcel	ldf.fill f21 = [rENV2], 32	// env+352: f17
355160157Smarcel	;;
356160157Smarcel	ldf.fill f22 = [rENV1], 32	// env+368: f16
357160157Smarcel	ldf.fill f23 = [rENV2], 32	// env+384: f17
358160157Smarcel	;;
359160157Smarcel	ldf.fill f24 = [rENV1], 32	// env+400: f16
360160157Smarcel	ldf.fill f25 = [rENV2], 32	// env+416: f17
361160157Smarcel	;;
362160157Smarcel	ldf.fill f26 = [rENV1], 32	// env+432: f16
363160157Smarcel	ldf.fill f27 = [rENV2], 32	// env+448: f17
364160157Smarcel	;;
365160157Smarcel	ldf.fill f28 = [rENV1], 32	// env+464: f16
366160157Smarcel	ldf.fill f29 = [rENV2], 32	// env+480: f17
367160157Smarcel	;;
368160157Smarcel	ldf.fill f30 = [rENV1], 32	// env+496: f16
369160157Smarcel	ldf.fill f31 = [rENV2], 32	// env+512: f17
370160157Smarcel
371160157Smarcel	// Set landing pad parameter registers
372160157Smarcel	mov	r15 = r33
373160157Smarcel	mov	r16 = r34
374160157Smarcel	mov	r17 = r35
375160157Smarcel	mov	r18 = r36
376160157Smarcel
377160157Smarcel	// Restore previous sp and Return
378160157Smarcel	mov	ret0 = r37
379160157Smarcel	mov	sp = rPSP
380160157Smarcel	mov	b0 = rRP
381160157Smarcel	br.ret.sptk b0
382160157Smarcel
383160157Smarcel	.endp
384