1/* Overlay manager for SPU.
2
3   Copyright 2006, 2007 Free Software Foundation, Inc.
4
5   This file is part of GLD, the Gnu Linker.
6
7   GLD is free software; you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 2, or (at your option)
10   any later version.
11
12   GLD is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with GLD; see the file COPYING.  If not, write to the Free
19   Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
20   02110-1301, USA.  */
21
22/**
23 * MFC DMA defn's.
24 */
25#define MFC_GET_CMD		0x40
26#define MFC_MAX_DMA_SIZE	0x4000
27#define MFC_TAG_UPDATE_ALL	2
28#define MFC_TAG_ID		0
29
30
31/**
32 * Temporary register allocations.
33 * These are saved/restored here.
34 */
35#define tab		$75
36#define cgbits		$75
37#define add64		$75
38#define ealo		$75
39#define newmask		$75
40#define tagstat		$75
41#define bchn		$75
42#define rv1		$75
43
44#define off		$76
45#define off64		$76
46#define maxsize		$76
47#define oldmask		$76
48#define sz		$76
49#define lnkr		$76
50#define rv2		$76
51
52#define cur		$77
53#define cmp		$77
54#define buf		$77
55#define genwi		$77
56#define tagid		$77
57#define cmd		$77
58#define rv3		$77
59
60#define cgshuf		$78
61
62#define vma		$6
63
64#define map		$7
65#define osize		$7
66#define cmp2		$7
67
68#define ea64		$8
69#define retval		$8
70
71#ifdef OVLY_IRQ_SAVE
72#define irqtmp		$8
73#define irq_stat	$9
74#endif
75
76	.extern		_ovly_table
77	.extern		_ovly_buf_table
78
79	.text
80	.align 		4
81	.type		__rv_pattern, @object
82	.size		__rv_pattern, 16
83__rv_pattern:
84	.word		0x00010203, 0x1c1d1e1f, 0x00010203, 0x10111213
85	.type		__cg_pattern, @object
86	.size		__cg_pattern, 16
87__cg_pattern:
88	.word		0x04050607, 0x80808080, 0x80808080, 0x80808080
89
90/**
91 * __ovly_return - stub for returning from overlay functions.
92 *
93 * inputs:
94 *	$lr	link register
95 *
96 * outputs:
97 *	$78	old partition number, to be reloaded
98 *	$79	return address in old partion number
99 */
100	.global		__ovly_return
101	.type		__ovly_return, @function
102
103	.word		0
104__ovly_return:
105	shlqbyi		$78, $lr, 4
106	shlqbyi		$79, $lr, 8
107	biz		$78, $79
108	.size		__ovly_return, . - __ovly_return
109
110/**
111 * __ovly_load - copy an overlay partion to local store.
112 *
113 * inputs:
114 *	$78	partition number to be loaded.
115 *	$79	branch target in new partition.
116 *	$lr	link register, containing return addr.
117 *
118 * outputs:
119 *	$lr	new link register, returning through __ovly_return.
120 *
121 * Copy a new overlay partition into local store, or return
122 * immediately if the partition is already resident.
123 */
124	.global		__ovly_load
125	.type		__ovly_load, @function
126
127__ovly_load:
128/* Save temporary registers to stack. */
129	stqd		$6, -16($sp)
130	stqd		$7, -32($sp)
131	stqd		$8, -48($sp)
132
133#ifdef OVLY_IRQ_SAVE
134/* Save irq state, then disable interrupts. */
135	stqd		$9, -64($sp)
136	ila		irqtmp, __ovly_irq_save
137	rdch		irq_stat, $SPU_RdMachStat
138	bid		irqtmp
139__ovly_irq_save:
140#endif
141
142/* Set branch hint to overlay target. */
143	hbr		__ovly_load_ret, $79
144
145/* Get caller's overlay index by back chaining through stack frames.
146 * Loop until end of stack (back chain all-zeros) or
147 * encountered a link register we set here. */
148	lqd		bchn, 0($sp)
149	ila		retval, __ovly_return
150
151__ovly_backchain_loop:
152	lqd		lnkr, 16(bchn)
153	lqd		bchn, 0(bchn)
154	ceq		cmp, lnkr, retval
155	ceqi		cmp2, bchn, 0
156	or		cmp, cmp, cmp2
157	brz		cmp, __ovly_backchain_loop
158
159/* If we reached the zero back-chain, then lnkr is bogus.  Clear the
160 * part of lnkr that we use later (slot 3). */
161	rotqbyi		cmp2, cmp2, 4
162	andc		lnkr, lnkr, cmp2
163
164/* Set lr = {__ovly_return, prev ovl ndx, caller return adr, callee ovl ndx}. */
165	lqd		rv1, (__rv_pattern-__ovly_return+4)(retval)
166	shufb		rv2, retval, lnkr, rv1
167	shufb		rv3, $lr, $78, rv1
168	fsmbi		rv1, 0xff
169	selb		rv2, rv2, rv3, rv1
170/* If we have a tail call from one overlay function to another overlay,
171   then lr is already set up.  Don't change it.  */
172	ceq		rv1, $lr, retval
173	fsmb		rv1, rv1
174	selb		$lr, rv2, $lr, rv1
175
176/* Branch to $79 if non-overlay */
177	brz		$78, __ovly_load_restore
178
179/* Load values from _ovly_table[$78].
180 *	extern struct {
181 *		u32 vma;
182 *		u32 size;
183 *		u32 file_offset;
184 *		u32 buf;
185 *	} _ovly_table[];
186 */
187	shli		off, $78, 4
188	ila		tab, _ovly_table - 16
189	lqx		vma, tab, off
190	rotqbyi		buf, vma, 12
191
192/* Load values from _ovly_buf_table[buf].
193 *	extern struct {
194 *		u32 mapped;
195 *	} _ovly_buf_table[];
196 */
197	ila		tab, _ovly_buf_table
198	ai		off, buf, -1
199	shli		off, off, 2
200	lqx		map, tab, off
201	rotqby		cur, map, off
202
203/* Branch to $79 now if overlay is already mapped.  */
204	ceq		cmp, $78, cur
205	brnz		cmp, __ovly_load_restore
206
207/* Marker for profiling code.  If we get here, we are about to load
208 * a new overlay.
209 */
210	.global		__ovly_load_event
211	.type		__ovly_load_event, @function
212__ovly_load_event:
213
214/* Set _ovly_buf_table[buf].mapped = $78. */
215	cwx		genwi, tab, off
216	shufb		map, $78, map, genwi
217	stqx		map, tab, off
218
219/* A new partition needs to be loaded. Prepare for DMA loop.
220 * _EAR_ is the 64b base EA, filled in at run time by the
221 * loader, and indicating the value for SPU executable image start.
222 */
223	lqd		cgshuf, (__cg_pattern-__ovly_return+4)(retval)
224	rotqbyi		osize, vma, 4
225	rotqbyi		sz, vma, 8
226	lqa		ea64, _EAR_
227
228__ovly_xfer_loop:
229/* 64b add to compute next ea64. */
230	rotqmbyi	off64, sz, -4
231	cg		cgbits, ea64, off64
232	shufb		add64, cgbits, cgbits, cgshuf
233	addx		add64, ea64, off64
234	ori		ea64, add64, 0
235
236/* Setup DMA parameters, then issue DMA request. */
237	rotqbyi		ealo, add64, 4
238	ila		maxsize, MFC_MAX_DMA_SIZE
239	cgt		cmp, osize, maxsize
240	selb		sz, osize, maxsize, cmp
241	ila		tagid, MFC_TAG_ID
242	wrch		$MFC_LSA, vma
243	wrch		$MFC_EAH, ea64
244	wrch		$MFC_EAL, ealo
245	wrch		$MFC_Size, sz
246	wrch		$MFC_TagId, tagid
247	ila		cmd, MFC_GET_CMD
248	wrch		$MFC_Cmd, cmd
249
250/* Increment vma, decrement size, branch back as needed. */
251	a		vma, vma, sz
252	sf		osize, sz, osize
253	brnz		osize, __ovly_xfer_loop
254
255/* Save app's tagmask, wait for DMA complete, restore mask. */
256	rdch		oldmask, $MFC_RdTagMask
257#if MFC_TAG_ID < 16
258	ilh		newmask, 1 << MFC_TAG_ID
259#else
260	ilhu		newmask, 1 << (MFC_TAG_ID - 16)
261#endif
262	wrch		$MFC_WrTagMask, newmask
263	ila		tagstat, MFC_TAG_UPDATE_ALL
264	wrch		$MFC_WrTagUpdate, tagstat
265	rdch		tagstat, $MFC_RdTagStat
266	sync
267	wrch		$MFC_WrTagMask, oldmask
268
269	.global		_ovly_debug_event
270	.type		_ovly_debug_event, @function
271_ovly_debug_event:
272/* GDB inserts debugger trap here.  */
273	nop
274
275__ovly_load_restore:
276#ifdef OVLY_IRQ_SAVE
277/* Conditionally re-enable interrupts. */
278	andi		irq_stat, irq_stat, 1
279	ila		irqtmp, __ovly_irq_restore
280	binze		irq_stat, irqtmp
281__ovly_irq_restore:
282	lqd		$9, -64($sp)
283#endif
284
285/* Restore saved registers. */
286	lqd		$8, -48($sp)
287	lqd		$7, -32($sp)
288	lqd		$6, -16($sp)
289
290__ovly_load_ret:
291/* Branch to target address. */
292	bi		$79
293
294	.size		__ovly_load, . - __ovly_load
295