1238106Sdes// Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P.
2238106Sdes// Permission is hereby granted, free of charge, to any person
3238106Sdes// obtaining a copy of this software and associated documentation
4238106Sdes// files (the "Software"), to deal in the Software without
5238106Sdes// restriction, including without limitation the rights to use,
6238106Sdes// copy, modify, merge, publish, distribute, sublicense, and/or sell
7238106Sdes// copies of the Software, and to permit persons to whom the
8238106Sdes// Software is furnished to do so, subject to the following
9238106Sdes// conditions:
10238106Sdes//
11238106Sdes// The above copyright notice and this permission notice shall be
12238106Sdes// included in all copies or substantial portions of the Software.
13238106Sdes//
14238106Sdes// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15238106Sdes// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16238106Sdes// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17238106Sdes// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18238106Sdes// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19238106Sdes// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20238106Sdes// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21238106Sdes// OTHER DEALINGS IN THE SOFTWARE.
22238106Sdes
23238106Sdes#ifdef _LP64
24269257Sdes#define SWIZZLE add
25269257Sdes#define STPTR st8
26269257Sdes#else
27269257Sdes#define SWIZZLE addp4
28269257Sdes#define STPTR st4
29269257Sdes#endif
30269257Sdes
31269257SdesrRP	= r14
32269257SdesrPFS	= r15
33269257SdesrUNAT	= r16
34238106SdesrRNAT	= r17
35238106SdesrENV0	= r18
36238106SdesrENV1	= r19
37238106SdesrENV2	= r20
38238106SdesrNSLOT	= r21
39238106SdesrBSP	= r22
40238106SdesrPBSP	= r23
41238106SdesrRSC	= r24
42238106SdesrNATP	= r25
43238106SdesrBIAS	= r26
44238106SdesrRSC0	= r27
45238106SdesrTMP1	= r28
46238106SdesrTMP2	= r29
47238106SdesrTMP3	= r30
48238106SdesrTMP4	= r31
49238106SdesrTMP5	= r8
50238106SdesrMYPFS	= r9
51238106SdesrPSP	= r10
52238106Sdes
53238106SdesVALID_IP      = 1
54238106SdesVALID_SP      = 1 << 1
55238106SdesVALID_BSP     = 1 << 2
56238106SdesVALID_CFM     = 1 << 3
57238106SdesVALID_PREDS   = 1 << 7
58238106SdesVALID_PRIUNAT = 1 << 8
59238106SdesVALID_RNAT    = 1 << 10
60238106SdesVALID_UNAT    = 1 << 11
61238106SdesVALID_FPSR    = 1 << 12
62238106SdesVALID_LC      = 1 << 13
63269257SdesVALID_GRS     = 0xf << 16
64238106SdesVALID_BRS     = 0x1f << 20
65238106SdesVALID_BASIC4  = VALID_IP | VALID_SP | VALID_BSP | VALID_CFM
66238106SdesVALID_SPEC    = VALID_PREDS | VALID_PRIUNAT | VALID_RNAT | VALID_UNAT | VALID_FPSR | VALID_LC
67238106SdesVALID_REGS    = VALID_BASIC4 | VALID_SPEC | VALID_GRS | VALID_BRS
68238106SdesVALID_FRS     = 0xfffff
69238106Sdes// valid_regs and valid_frs are separate unsigned int fields.
70238106Sdes// In order to store them with a single st8, we need to know
71238106Sdes// the endianness.
72238106Sdes#ifdef __LITTLE_ENDIAN__
73238106SdesVALID_BITS   = (VALID_FRS << 32) | VALID_REGS
74238106Sdes#else
75238106SdesVALID_BITS   = (VALID_REGS << 32) | VALID_FRS
76238106Sdes#endif
77238106Sdes
78238106Sdes	.text
79238106Sdes
80238106Sdes// int uwx_self_init_context(struct uwx_env *env);
81238106Sdes//
82238106Sdes// Stores a snapshot of the caller's context in the uwx_env structure.
83238106Sdes
84238106Sdes	.proc	uwx_self_init_context
85238106Sdes	.global uwx_self_init_context
86238106Sdesuwx_self_init_context:
87238106Sdes	.prologue
88238106Sdes	alloc	rPFS = ar.pfs, 1, 0, 0, 0
89238106Sdes	mov	rUNAT = ar.unat
90238106Sdes	.body
91238106Sdes	SWIZZLE	rENV0 = r0, r32		// rENV0 = &env
92238106Sdes	;;
93238106Sdes	flushrs
94238106Sdes	extr.u	rNSLOT = rPFS, 7, 7 	// nslots = pfs.sol
95238106Sdes	mov	rRP = b0
96238106Sdes	;;
97238106Sdes	mov	rRSC = ar.rsc
98238106Sdes	add	rENV1 = 136, rENV0	// rENV1 = &env->context.gr[0]
99238106Sdes	add	rENV2 = 144, rENV0	// rENV2 = &env->context.gr[1]
100238106Sdes	;;
101238106Sdes	and	rRSC0 = -4, rRSC	// clear ar.rsc.mode
102238106Sdes	adds	rNATP = 0x1f8, r0
103238106Sdes	mov	rTMP1 = b1
104238106Sdes	;;
105238106Sdes	st8.spill [rENV1] = r4, 16	// env+136: r4
106238106Sdes	st8.spill [rENV2] = r5, 16	// env+144: r5
107238106Sdes	mov	rTMP2 = b2
108238106Sdes	;;
109238106Sdes	st8.spill [rENV1] = r6, 16	// env+152: r6
110238106Sdes	st8.spill [rENV2] = r7, 16	// env+160: r7
111238106Sdes	mov	rTMP3 = b3
112238106Sdes	;;
113238106Sdes	st8	[rENV1] = rTMP1, 16	// env+168: b1
114238106Sdes	st8	[rENV2] = rTMP2, 16	// env+176: b2
115238106Sdes	mov	rTMP1 = b4
116238106Sdes	;;
117238106Sdes	st8	[rENV1] = rTMP3, 16	// env+184: b3
118238106Sdes	st8	[rENV2] = rTMP1, 16	// env+192: b4
119238106Sdes	mov	rTMP2 = b5
120238106Sdes	;;
121238106Sdes	st8	[rENV1] = rTMP2		// env+200: b5
122238106Sdes	mov	ar.rsc = rRSC0		// enforced lazy mode
123238106Sdes	add	rENV1 = 8, rENV0
124238106Sdes	;;
125238106Sdes	mov	rRNAT = ar.rnat		// get copy of ar.rnat
126238106Sdes	movl	rTMP1 = VALID_BITS	// valid_regs: ip, sp, bsp, cfm,
127238106Sdes					// preds, priunat, rnat, unat, fpsr,
128238106Sdes					// lc, grs, brs
129238106Sdes					// = 0x1ff3d8f00000000
130238106Sdes	;;
131238106Sdes	mov	ar.rsc = rRSC		// restore ar.rsc
132238106Sdes	mov	rBSP = ar.bsp
133238106Sdes	add	rTMP3 = 136, rENV0	// spill_loc = &env->context.gr[0]
134238106Sdes	;;
135238106Sdes	mov	rTMP2 = ar.unat
136238106Sdes	nop
137238106Sdes	extr.u	rTMP3 = rTMP3, 3, 6	// bitpos = spill_loc{8:3}
138238106Sdes	;;
139238106Sdes	and	rBIAS = rBSP, rNATP	// bias = (bsp & 0x1f8) ...
140238106Sdes	sub	rTMP4 = 64, rTMP3	// (64 - bitpos)
141238106Sdes	shr	rTMP5 = rTMP2, rTMP3	// (unat >> bitpos)
142238106Sdes	;;
143238106Sdes	nop
144238106Sdes	extr.u	rBIAS = rBIAS, 3, 6	//   ... div 8
145238106Sdes	shl	rTMP2 = rTMP2, rTMP4	// (unat << (64 - bitpos))
146238106Sdes	;;
147238106Sdes	or	rTMP2 = rTMP2, rTMP5	// rotate_right(unat, bitpos)
148238106Sdes	nop
149238106Sdes	mov	rTMP4 = pr
150238106Sdes	;;
151238106Sdes	st8	[rENV0] = rTMP1, 16	// env+0: valid_regs mask
152269257Sdes	st8	[rENV1] = rRP, 24	// env+8: ip (my rp)
153238106Sdes	sub	rBIAS = rNSLOT, rBIAS	// bias = nslots - bias
154238106Sdes	;;
155238106Sdes	cmp.lt	p6, p0 = 0, rBIAS	// if (0 < bias) ...
156238106Sdes	cmp.lt	p7, p0 = 63, rBIAS	// if (63 < bias) ...
157238106Sdes	;;
158238106Sdes	st8	[rENV0] = r12, 48	// env+16: sp
159238106Sdes	st8	[rENV1] = rPFS, 40	// env+32: cfm (my pfs)
160238106Sdes(p6)	add	rNSLOT = 1, rNSLOT	//   ... nslots++
161238106Sdes	;;
162238106Sdes	st8	[rENV0] = rTMP4, 24	// env+64: preds
163238106Sdes	st8	[rENV1] = rTMP2, 24	// env+72: priunat
164238106Sdes(p7)	add	rNSLOT = 1, rNSLOT	//   ... nslots++
165238106Sdes	;;
166238106Sdes	st8	[rENV0] = rRNAT, -64	// env+88: ar.rnat
167291767Sdes	st8	[rENV1] = rUNAT, 8	// env+96: ar.unat
168291767Sdes	dep.z	rTMP3 = rNSLOT, 3, 7 	// (nslots << 3)
169238106Sdes	;;
170238106Sdes	sub	rPBSP = rBSP, rTMP3	// prev_bsp = bsp - (nslots << 3)
171238106Sdes	mov	rTMP3 = ar.fpsr
172238106Sdes	mov	rTMP1 = ar.lc
173238106Sdes	;;
174238106Sdes	st8	[rENV0] = rPBSP, 184	// env+24: bsp (my prev bsp)
175238106Sdes	st8	[rENV1] = rTMP3, 8	// env+104: ar.fpsr
176238106Sdes	add	rENV2 = 320, rENV2	// rENV2 = &env->context.rstate
177238106Sdes	;;
178238106Sdes	st8	[rENV1] = rTMP1, 112	// env+112: ar.lc
179238106Sdes	STPTR	[rENV2] = r0		// env+528: env->rstate = 0
180238106Sdes	nop
181238106Sdes	;;
182238106Sdes	// THIS CODE NEEDS TO BE SCHEDULED!!!
183238106Sdes	stf.spill [rENV0] = f2, 32	// env+208: f2
184238106Sdes	stf.spill [rENV1] = f3, 32	// env+224: f3
185238106Sdes	;;
186238106Sdes	stf.spill [rENV0] = f4, 32	// env+240: f4
187238106Sdes	stf.spill [rENV1] = f5, 32	// env+256: f5
188238106Sdes	;;
189238106Sdes	stf.spill [rENV0] = f16, 32	// env+272: f16
190238106Sdes	stf.spill [rENV1] = f17, 32	// env+288: f17
191238106Sdes	;;
192238106Sdes	stf.spill [rENV0] = f18, 32	// env+304: f16
193285206Sdes	stf.spill [rENV1] = f19, 32	// env+320: f17
194285206Sdes	;;
195285206Sdes	stf.spill [rENV0] = f20, 32	// env+336: f16
196285206Sdes	stf.spill [rENV1] = f21, 32	// env+352: f17
197238106Sdes	;;
198238106Sdes	stf.spill [rENV0] = f22, 32	// env+368: f16
199238106Sdes	stf.spill [rENV1] = f23, 32	// env+384: f17
200238106Sdes	;;
201238106Sdes	stf.spill [rENV0] = f24, 32	// env+400: f16
202238106Sdes	stf.spill [rENV1] = f25, 32	// env+416: f17
203238106Sdes	;;
204238106Sdes	stf.spill [rENV0] = f26, 32	// env+432: f16
205238106Sdes	stf.spill [rENV1] = f27, 32	// env+448: f17
206238106Sdes	;;
207238106Sdes	stf.spill [rENV0] = f28, 32	// env+464: f16
208238106Sdes	stf.spill [rENV1] = f29, 32	// env+480: f17
209238106Sdes	;;
210238106Sdes	stf.spill [rENV0] = f30, 32	// env+496: f16
211238106Sdes	stf.spill [rENV1] = f31, 32	// env+512: f17
212238106Sdes	;;
213238106Sdes	mov	ar.unat = rUNAT
214238106Sdes	mov	ret0 = r0		// return UWX_OK
215238106Sdes	br.ret.sptk b0
216238106Sdes	.endp
217238106Sdes
218238106Sdes// uwx_self_install_context(
219238106Sdes//		struct uwx_env *env,
220238106Sdes//		uint64_t r15,
221238106Sdes//		uint64_t r16,
222238106Sdes//		uint64_t r17,
223238106Sdes//		uint64_t r18,
224238106Sdes//		uint64_t ret
225238106Sdes//		);
226238106Sdes//
227238106Sdes// Installs the given context, and sets the landing pad binding
228238106Sdes// registers r15-r18 to the values given.
229238106Sdes// Returns the value "ret" to the new context (for testing --
230238106Sdes// when transferring to a landing pad, the new context won't
231238106Sdes// care about the return value).
232238106Sdes
233238106Sdes	.proc	uwx_self_install_context
234238106Sdes	.global uwx_self_install_context
235238106Sdesuwx_self_install_context:
236238106Sdes	.prologue
237238106Sdes	alloc	rMYPFS = ar.pfs, 6, 0, 0, 0
238238106Sdes	.body
239238106Sdes	SWIZZLE	rENV0 = r0, r32		// rENV0 = &env
240238106Sdes	;;
241238106Sdes
242238106Sdes	// THIS CODE NEEDS TO BE SCHEDULED!!!
243238106Sdes
244238106Sdes	// Restore GR 4-7 and ar.unat
245238106Sdes	add	rENV1 = 136, rENV0	// &env->context.gr[0]
246238106Sdes	add	rENV2 = 72, rENV0	// &env->context.priunat
247238106Sdes	;;
248238106Sdes	ld8	rTMP2 = [rENV2], 24	// env+72: priunat
249238106Sdes	extr.u	rTMP3 = rENV1, 3, 6	// bitpos = spill_loc{8:3}
250238106Sdes	;;
251238106Sdes	ld8	rUNAT = [rENV2], 48	// env+96: ar.unat
252238106Sdes	sub	rTMP4 = 64, rTMP3	// (64 - bitpos)
253238106Sdes	shl	rTMP5 = rTMP2, rTMP3	// (unat << bitpos)
254238106Sdes	;;
255238106Sdes	shr	rTMP2 = rTMP2, rTMP4	// (unat >> (64 - bitpos))
256238106Sdes	;;
257238106Sdes	or	rTMP2 = rTMP2, rTMP5	// rotate_left(unat, bitpos)
258238106Sdes	;;
259238106Sdes	mov	ar.unat = rTMP2		// put priunat in place
260238106Sdes	;;
261238106Sdes	ld8.fill r4 = [rENV1], 16	// env+136: r4
262238106Sdes	ld8.fill r5 = [rENV2], 16	// env+144: r5
263238106Sdes	;;
264238106Sdes	ld8.fill r6 = [rENV1], 16	// env+152: r6
265238106Sdes	ld8.fill r7 = [rENV2], 16	// env+160: r7
266238106Sdes	;;
267238106Sdes	mov	ar.unat = rUNAT		// restore real ar.unat
268238106Sdes
269238106Sdes	// Restore BR 1-5
270238106Sdes	ld8	rTMP1 = [rENV1], 16	// env+168: b1
271238106Sdes	ld8	rTMP2 = [rENV2], 16	// env+176: b2
272238106Sdes	;;
273238106Sdes	ld8	rTMP3 = [rENV1], 16	// env+184: b3
274238106Sdes	ld8	rTMP4 = [rENV2], -168	// env+192: b4
275238106Sdes	mov	b1 = rTMP1
276238106Sdes	;;
277238106Sdes	ld8	rTMP1 = [rENV1], -168	// env+200: b5
278238106Sdes	mov	b2 = rTMP2
279238106Sdes	mov	b3 = rTMP3
280238106Sdes	mov	b4 = rTMP4
281238106Sdes	;;
282238106Sdes	mov	b5 = rTMP1
283238106Sdes
284238106Sdes	// Restore ar.bsp, ar.pfs, and ar.rnat
285238106Sdes	ld8	rPFS = [rENV1], 56	// env+32: cfm (+saved ar.ec)
286238106Sdes	mov	rRSC = ar.rsc
287238106Sdes	adds	rBIAS = 0x1f8, r0
288238106Sdes	;;
289238106Sdes	flushrs
290238106Sdes	ld8	rRNAT = [rENV1], -24	// env+88: ar.rnat
291238106Sdes	ld8	rPBSP = [rENV2], 88	// env+24: prev_bsp
292238106Sdes	and	rRSC0 = -4, rRSC	// clear ar.rsc.mode
293238106Sdes	;;
294238106Sdes	mov	ar.rsc = rRSC0		// enforced lazy mode
295238106Sdes	extr.u	rNSLOT = rPFS, 7, 7 	// nslots = pfs.sol
296238106Sdes	;;
297238106Sdes	invala
298238106Sdes	and	rBIAS = rPBSP, rBIAS	// bias = prev_bsp & 0x1f8 ...
299238106Sdes	;;
300238106Sdes	extr.u	rBIAS = rBIAS, 3, 6	// ... div 8
301238106Sdes	;;
302238106Sdes	add	rBIAS = rNSLOT, rBIAS	// bias += nslots
303238106Sdes	;;
304269257Sdes	cmp.lt	p6, p0 = 63, rBIAS	// if (63 < bias) ...
305269257Sdes	cmp.lt	p7, p0 = 126, rBIAS	// if (126 < bias) ...
306269257Sdes	;;
307269257Sdes(p6)	add	rNSLOT = 1, rNSLOT	//   ... nslots++
308269257Sdes	;;
309269257Sdes(p7)	add	rNSLOT = 1, rNSLOT	//   ... nslots++
310269257Sdes	;;
311269257Sdes	dep.z	rTMP3 = rNSLOT, 3, 7 	// (nslots << 3)
312269257Sdes	;;
313269257Sdes	add	rBSP = rPBSP, rTMP3	// bsp = prev_bsp + (nslots << 3)
314269257Sdes	;;
315269257Sdes	mov	ar.bspstore = rBSP	// restore ar.bsp
316269257Sdes	;;
317269257Sdes	mov	ar.rnat = rRNAT		// restore ar.rnat
318269257Sdes	mov	ar.pfs = rPFS		// restore ar.pfs
319238106Sdes	;;
320238106Sdes	mov	ar.rsc = rRSC		// restore ar.rsc
321238106Sdes
322238106Sdes	// Restore preds and ar.lc
323238106Sdes	ld8	rTMP1 = [rENV1], -56	// env+64: preds
324238106Sdes	ld8	rTMP2 = [rENV2], -96	// env+112: ar.lc
325238106Sdes	;;
326238106Sdes	mov	pr = rTMP1
327238106Sdes	mov	ar.lc = rTMP2
328238106Sdes
329238106Sdes	// Get previous sp and ip
330238106Sdes	ld8	rRP = [rENV1], 96	// env+8: ip (my rp)
331238106Sdes	ld8	rPSP = [rENV2], 112	// env+16: sp
332269257Sdes	;;
333238106Sdes
334238106Sdes	// Restore ar.fpsr and gp
335238106Sdes	ld8	rTMP1 = [rENV1], 104	// env+104: ar.fpsr
336238106Sdes	ld8	r1 = [rENV2], 96	// env+128: gp
337238106Sdes	;;
338238106Sdes	mov	ar.fpsr = rTMP1		// restore ar.fpsr
339238106Sdes
340238106Sdes	// Restore FR 2-5 and 16-31
341238106Sdes	ldf.fill f2 = [rENV1], 32	// env+208: f2
342238106Sdes	ldf.fill f3 = [rENV2], 32	// env+224: f3
343238106Sdes	;;
344238106Sdes	ldf.fill f4 = [rENV1], 32	// env+240: f4
345238106Sdes	ldf.fill f5 = [rENV2], 32	// env+256: f5
346238106Sdes	;;
347238106Sdes	ldf.fill f16 = [rENV1], 32	// env+272: f16
348238106Sdes	ldf.fill f17 = [rENV2], 32	// env+288: f17
349238106Sdes	;;
350238106Sdes	ldf.fill f18 = [rENV1], 32	// env+304: f16
351238106Sdes	ldf.fill f19 = [rENV2], 32	// env+320: f17
352238106Sdes	;;
353238106Sdes	ldf.fill f20 = [rENV1], 32	// env+336: f16
354238106Sdes	ldf.fill f21 = [rENV2], 32	// env+352: f17
355238106Sdes	;;
356238106Sdes	ldf.fill f22 = [rENV1], 32	// env+368: f16
357238106Sdes	ldf.fill f23 = [rENV2], 32	// env+384: f17
358238106Sdes	;;
359238106Sdes	ldf.fill f24 = [rENV1], 32	// env+400: f16
360238106Sdes	ldf.fill f25 = [rENV2], 32	// env+416: f17
361238106Sdes	;;
362238106Sdes	ldf.fill f26 = [rENV1], 32	// env+432: f16
363238106Sdes	ldf.fill f27 = [rENV2], 32	// env+448: f17
364238106Sdes	;;
365238106Sdes	ldf.fill f28 = [rENV1], 32	// env+464: f16
366238106Sdes	ldf.fill f29 = [rENV2], 32	// env+480: f17
367238106Sdes	;;
368238106Sdes	ldf.fill f30 = [rENV1], 32	// env+496: f16
369238106Sdes	ldf.fill f31 = [rENV2], 32	// env+512: f17
370238106Sdes
371238106Sdes	// Set landing pad parameter registers
372238106Sdes	mov	r15 = r33
373238106Sdes	mov	r16 = r34
374238106Sdes	mov	r17 = r35
375238106Sdes	mov	r18 = r36
376269257Sdes
377238106Sdes	// Restore previous sp and Return
378238106Sdes	mov	ret0 = r37
379238106Sdes	mov	sp = rPSP
380238106Sdes	mov	b0 = rRP
381238106Sdes	br.ret.sptk b0
382238106Sdes
383238106Sdes	.endp
384238106Sdes