1214571Sdim/* Overlay manager for SPU.
2214571Sdim
3214571Sdim   Copyright 2006, 2007 Free Software Foundation, Inc.
4214571Sdim
5214571Sdim   This file is part of GLD, the Gnu Linker.
6214571Sdim
7214571Sdim   GLD is free software; you can redistribute it and/or modify
8214571Sdim   it under the terms of the GNU General Public License as published by
9214571Sdim   the Free Software Foundation; either version 2, or (at your option)
10214571Sdim   any later version.
11214571Sdim
12214571Sdim   GLD is distributed in the hope that it will be useful,
13214571Sdim   but WITHOUT ANY WARRANTY; without even the implied warranty of
14214571Sdim   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15214571Sdim   GNU General Public License for more details.
16214571Sdim
17214571Sdim   You should have received a copy of the GNU General Public License
18214571Sdim   along with GLD; see the file COPYING.  If not, write to the Free
19214571Sdim   Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
20214571Sdim   02110-1301, USA.  */
21214571Sdim
22214571Sdim/**
23214571Sdim * MFC DMA defn's.
24214571Sdim */
25214571Sdim#define MFC_GET_CMD		0x40
26214571Sdim#define MFC_MAX_DMA_SIZE	0x4000
27214571Sdim#define MFC_TAG_UPDATE_ALL	2
28214571Sdim#define MFC_TAG_ID		0
29214571Sdim
30214571Sdim
31214571Sdim/**
32214571Sdim * Temporary register allocations.
33214571Sdim * These are saved/restored here.
34214571Sdim */
35214571Sdim#define tab		$75
36214571Sdim#define cgbits		$75
37214571Sdim#define add64		$75
38214571Sdim#define ealo		$75
39214571Sdim#define newmask		$75
40214571Sdim#define tagstat		$75
41214571Sdim#define bchn		$75
42214571Sdim#define rv1		$75
43214571Sdim
44214571Sdim#define off		$76
45214571Sdim#define off64		$76
46214571Sdim#define maxsize		$76
47214571Sdim#define oldmask		$76
48214571Sdim#define sz		$76
49214571Sdim#define lnkr		$76
50214571Sdim#define rv2		$76
51214571Sdim
52214571Sdim#define cur		$77
53214571Sdim#define cmp		$77
54214571Sdim#define buf		$77
55214571Sdim#define genwi		$77
56214571Sdim#define tagid		$77
57214571Sdim#define cmd		$77
58214571Sdim#define rv3		$77
59214571Sdim
60214571Sdim#define cgshuf		$78
61214571Sdim
62214571Sdim#define vma		$6
63214571Sdim
64214571Sdim#define map		$7
65214571Sdim#define osize		$7
66214571Sdim#define cmp2		$7
67214571Sdim
68214571Sdim#define ea64		$8
69214571Sdim#define retval		$8
70214571Sdim
71214571Sdim#ifdef OVLY_IRQ_SAVE
72214571Sdim#define irqtmp		$8
73214571Sdim#define irq_stat	$9
74214571Sdim#endif
75214571Sdim
76214571Sdim	.extern		_ovly_table
77214571Sdim	.extern		_ovly_buf_table
78214571Sdim
79214571Sdim	.text
80214571Sdim	.align 		4
81214571Sdim	.type		__rv_pattern, @object
82214571Sdim	.size		__rv_pattern, 16
83214571Sdim__rv_pattern:
84214571Sdim	.word		0x00010203, 0x1c1d1e1f, 0x00010203, 0x10111213
85214571Sdim	.type		__cg_pattern, @object
86214571Sdim	.size		__cg_pattern, 16
87214571Sdim__cg_pattern:
88214571Sdim	.word		0x04050607, 0x80808080, 0x80808080, 0x80808080
89214571Sdim
90214571Sdim/**
91214571Sdim * __ovly_return - stub for returning from overlay functions.
92214571Sdim *
93214571Sdim * inputs:
94214571Sdim *	$lr	link register
95214571Sdim *
96214571Sdim * outputs:
97214571Sdim *	$78	old partition number, to be reloaded
98214571Sdim *	$79	return address in old partion number
99214571Sdim */
100214571Sdim	.global		__ovly_return
101214571Sdim	.type		__ovly_return, @function
102214571Sdim
103214571Sdim	.word		0
104214571Sdim__ovly_return:
105214571Sdim	shlqbyi		$78, $lr, 4
106214571Sdim	shlqbyi		$79, $lr, 8
107214571Sdim	biz		$78, $79
108214571Sdim	.size		__ovly_return, . - __ovly_return
109214571Sdim
110214571Sdim/**
111214571Sdim * __ovly_load - copy an overlay partion to local store.
112214571Sdim *
113214571Sdim * inputs:
114214571Sdim *	$78	partition number to be loaded.
115214571Sdim *	$79	branch target in new partition.
116214571Sdim *	$lr	link register, containing return addr.
117214571Sdim *
118214571Sdim * outputs:
119214571Sdim *	$lr	new link register, returning through __ovly_return.
120214571Sdim *
121214571Sdim * Copy a new overlay partition into local store, or return
122214571Sdim * immediately if the partition is already resident.
123214571Sdim */
124214571Sdim	.global		__ovly_load
125214571Sdim	.type		__ovly_load, @function
126214571Sdim
127214571Sdim__ovly_load:
128214571Sdim/* Save temporary registers to stack. */
129214571Sdim	stqd		$6, -16($sp)
130214571Sdim	stqd		$7, -32($sp)
131214571Sdim	stqd		$8, -48($sp)
132214571Sdim
133214571Sdim#ifdef OVLY_IRQ_SAVE
134214571Sdim/* Save irq state, then disable interrupts. */
135214571Sdim	stqd		$9, -64($sp)
136214571Sdim	ila		irqtmp, __ovly_irq_save
137214571Sdim	rdch		irq_stat, $SPU_RdMachStat
138214571Sdim	bid		irqtmp
139214571Sdim__ovly_irq_save:
140214571Sdim#endif
141214571Sdim
142214571Sdim/* Set branch hint to overlay target. */
143214571Sdim	hbr		__ovly_load_ret, $79
144214571Sdim
145214571Sdim/* Get caller's overlay index by back chaining through stack frames.
146214571Sdim * Loop until end of stack (back chain all-zeros) or
147214571Sdim * encountered a link register we set here. */
148214571Sdim	lqd		bchn, 0($sp)
149214571Sdim	ila		retval, __ovly_return
150214571Sdim
151214571Sdim__ovly_backchain_loop:
152214571Sdim	lqd		lnkr, 16(bchn)
153214571Sdim	lqd		bchn, 0(bchn)
154214571Sdim	ceq		cmp, lnkr, retval
155214571Sdim	ceqi		cmp2, bchn, 0
156214571Sdim	or		cmp, cmp, cmp2
157214571Sdim	brz		cmp, __ovly_backchain_loop
158214571Sdim
159214571Sdim/* If we reached the zero back-chain, then lnkr is bogus.  Clear the
160214571Sdim * part of lnkr that we use later (slot 3). */
161214571Sdim	rotqbyi		cmp2, cmp2, 4
162214571Sdim	andc		lnkr, lnkr, cmp2
163214571Sdim
164214571Sdim/* Set lr = {__ovly_return, prev ovl ndx, caller return adr, callee ovl ndx}. */
165214571Sdim	lqd		rv1, (__rv_pattern-__ovly_return+4)(retval)
166214571Sdim	shufb		rv2, retval, lnkr, rv1
167214571Sdim	shufb		rv3, $lr, $78, rv1
168214571Sdim	fsmbi		rv1, 0xff
169214571Sdim	selb		rv2, rv2, rv3, rv1
170214571Sdim/* If we have a tail call from one overlay function to another overlay,
171214571Sdim   then lr is already set up.  Don't change it.  */
172214571Sdim	ceq		rv1, $lr, retval
173214571Sdim	fsmb		rv1, rv1
174214571Sdim	selb		$lr, rv2, $lr, rv1
175214571Sdim
176214571Sdim/* Branch to $79 if non-overlay */
177214571Sdim	brz		$78, __ovly_load_restore
178214571Sdim
179214571Sdim/* Load values from _ovly_table[$78].
180214571Sdim *	extern struct {
181214571Sdim *		u32 vma;
182214571Sdim *		u32 size;
183214571Sdim *		u32 file_offset;
184214571Sdim *		u32 buf;
185214571Sdim *	} _ovly_table[];
186214571Sdim */
187214571Sdim	shli		off, $78, 4
188214571Sdim	ila		tab, _ovly_table - 16
189214571Sdim	lqx		vma, tab, off
190214571Sdim	rotqbyi		buf, vma, 12
191214571Sdim
192214571Sdim/* Load values from _ovly_buf_table[buf].
193214571Sdim *	extern struct {
194214571Sdim *		u32 mapped;
195214571Sdim *	} _ovly_buf_table[];
196214571Sdim */
197214571Sdim	ila		tab, _ovly_buf_table
198214571Sdim	ai		off, buf, -1
199214571Sdim	shli		off, off, 2
200214571Sdim	lqx		map, tab, off
201214571Sdim	rotqby		cur, map, off
202214571Sdim
203214571Sdim/* Branch to $79 now if overlay is already mapped.  */
204214571Sdim	ceq		cmp, $78, cur
205214571Sdim	brnz		cmp, __ovly_load_restore
206214571Sdim
207214571Sdim/* Marker for profiling code.  If we get here, we are about to load
208214571Sdim * a new overlay.
209214571Sdim */
210214571Sdim	.global		__ovly_load_event
211214571Sdim	.type		__ovly_load_event, @function
212214571Sdim__ovly_load_event:
213214571Sdim
214214571Sdim/* Set _ovly_buf_table[buf].mapped = $78. */
215214571Sdim	cwx		genwi, tab, off
216214571Sdim	shufb		map, $78, map, genwi
217214571Sdim	stqx		map, tab, off
218214571Sdim
219214571Sdim/* A new partition needs to be loaded. Prepare for DMA loop.
220214571Sdim * _EAR_ is the 64b base EA, filled in at run time by the
221214571Sdim * loader, and indicating the value for SPU executable image start.
222214571Sdim */
223214571Sdim	lqd		cgshuf, (__cg_pattern-__ovly_return+4)(retval)
224214571Sdim	rotqbyi		osize, vma, 4
225214571Sdim	rotqbyi		sz, vma, 8
226214571Sdim	lqa		ea64, _EAR_
227214571Sdim
228214571Sdim__ovly_xfer_loop:
229214571Sdim/* 64b add to compute next ea64. */
230214571Sdim	rotqmbyi	off64, sz, -4
231214571Sdim	cg		cgbits, ea64, off64
232214571Sdim	shufb		add64, cgbits, cgbits, cgshuf
233214571Sdim	addx		add64, ea64, off64
234214571Sdim	ori		ea64, add64, 0
235214571Sdim
236214571Sdim/* Setup DMA parameters, then issue DMA request. */
237214571Sdim	rotqbyi		ealo, add64, 4
238214571Sdim	ila		maxsize, MFC_MAX_DMA_SIZE
239214571Sdim	cgt		cmp, osize, maxsize
240214571Sdim	selb		sz, osize, maxsize, cmp
241214571Sdim	ila		tagid, MFC_TAG_ID
242214571Sdim	wrch		$MFC_LSA, vma
243214571Sdim	wrch		$MFC_EAH, ea64
244214571Sdim	wrch		$MFC_EAL, ealo
245214571Sdim	wrch		$MFC_Size, sz
246214571Sdim	wrch		$MFC_TagId, tagid
247214571Sdim	ila		cmd, MFC_GET_CMD
248214571Sdim	wrch		$MFC_Cmd, cmd
249214571Sdim
250214571Sdim/* Increment vma, decrement size, branch back as needed. */
251214571Sdim	a		vma, vma, sz
252214571Sdim	sf		osize, sz, osize
253214571Sdim	brnz		osize, __ovly_xfer_loop
254214571Sdim
255214571Sdim/* Save app's tagmask, wait for DMA complete, restore mask. */
256214571Sdim	rdch		oldmask, $MFC_RdTagMask
257214571Sdim#if MFC_TAG_ID < 16
258214571Sdim	ilh		newmask, 1 << MFC_TAG_ID
259214571Sdim#else
260214571Sdim	ilhu		newmask, 1 << (MFC_TAG_ID - 16)
261214571Sdim#endif
262214571Sdim	wrch		$MFC_WrTagMask, newmask
263214571Sdim	ila		tagstat, MFC_TAG_UPDATE_ALL
264214571Sdim	wrch		$MFC_WrTagUpdate, tagstat
265214571Sdim	rdch		tagstat, $MFC_RdTagStat
266214571Sdim	sync
267214571Sdim	wrch		$MFC_WrTagMask, oldmask
268214571Sdim
269214571Sdim	.global		_ovly_debug_event
270214571Sdim	.type		_ovly_debug_event, @function
271214571Sdim_ovly_debug_event:
272214571Sdim/* GDB inserts debugger trap here.  */
273214571Sdim	nop
274214571Sdim
275214571Sdim__ovly_load_restore:
276214571Sdim#ifdef OVLY_IRQ_SAVE
277214571Sdim/* Conditionally re-enable interrupts. */
278214571Sdim	andi		irq_stat, irq_stat, 1
279214571Sdim	ila		irqtmp, __ovly_irq_restore
280214571Sdim	binze		irq_stat, irqtmp
281214571Sdim__ovly_irq_restore:
282214571Sdim	lqd		$9, -64($sp)
283214571Sdim#endif
284214571Sdim
285214571Sdim/* Restore saved registers. */
286214571Sdim	lqd		$8, -48($sp)
287214571Sdim	lqd		$7, -32($sp)
288214571Sdim	lqd		$6, -16($sp)
289214571Sdim
290214571Sdim__ovly_load_ret:
291214571Sdim/* Branch to target address. */
292214571Sdim	bi		$79
293214571Sdim
294214571Sdim	.size		__ovly_load, . - __ovly_load
295