1/* Overlay manager for SPU. 2 3 Copyright 2006, 2007 Free Software Foundation, Inc. 4 5 This file is part of GLD, the Gnu Linker. 6 7 GLD is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 2, or (at your option) 10 any later version. 11 12 GLD is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GLD; see the file COPYING. If not, write to the Free 19 Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA 20 02110-1301, USA. */ 21 22/** 23 * MFC DMA defn's. 24 */ 25#define MFC_GET_CMD 0x40 26#define MFC_MAX_DMA_SIZE 0x4000 27#define MFC_TAG_UPDATE_ALL 2 28#define MFC_TAG_ID 0 29 30 31/** 32 * Temporary register allocations. 33 * These are saved/restored here. 34 */ 35#define tab $75 36#define cgbits $75 37#define add64 $75 38#define ealo $75 39#define newmask $75 40#define tagstat $75 41#define bchn $75 42#define rv1 $75 43 44#define off $76 45#define off64 $76 46#define maxsize $76 47#define oldmask $76 48#define sz $76 49#define lnkr $76 50#define rv2 $76 51 52#define cur $77 53#define cmp $77 54#define buf $77 55#define genwi $77 56#define tagid $77 57#define cmd $77 58#define rv3 $77 59 60#define cgshuf $78 61 62#define vma $6 63 64#define map $7 65#define osize $7 66#define cmp2 $7 67 68#define ea64 $8 69#define retval $8 70 71#ifdef OVLY_IRQ_SAVE 72#define irqtmp $8 73#define irq_stat $9 74#endif 75 76 .extern _ovly_table 77 .extern _ovly_buf_table 78 79 .text 80 .align 4 81 .type __rv_pattern, @object 82 .size __rv_pattern, 16 83__rv_pattern: 84 .word 0x00010203, 0x1c1d1e1f, 0x00010203, 0x10111213 85 .type __cg_pattern, @object 86 .size __cg_pattern, 16 87__cg_pattern: 88 .word 0x04050607, 0x80808080, 0x80808080, 0x80808080 89 90/** 91 * __ovly_return - stub for returning from overlay functions. 92 * 93 * inputs: 94 * $lr link register 95 * 96 * outputs: 97 * $78 old partition number, to be reloaded 98 * $79 return address in old partion number 99 */ 100 .global __ovly_return 101 .type __ovly_return, @function 102 103 .word 0 104__ovly_return: 105 shlqbyi $78, $lr, 4 106 shlqbyi $79, $lr, 8 107 biz $78, $79 108 .size __ovly_return, . - __ovly_return 109 110/** 111 * __ovly_load - copy an overlay partion to local store. 112 * 113 * inputs: 114 * $78 partition number to be loaded. 115 * $79 branch target in new partition. 116 * $lr link register, containing return addr. 117 * 118 * outputs: 119 * $lr new link register, returning through __ovly_return. 120 * 121 * Copy a new overlay partition into local store, or return 122 * immediately if the partition is already resident. 123 */ 124 .global __ovly_load 125 .type __ovly_load, @function 126 127__ovly_load: 128/* Save temporary registers to stack. */ 129 stqd $6, -16($sp) 130 stqd $7, -32($sp) 131 stqd $8, -48($sp) 132 133#ifdef OVLY_IRQ_SAVE 134/* Save irq state, then disable interrupts. */ 135 stqd $9, -64($sp) 136 ila irqtmp, __ovly_irq_save 137 rdch irq_stat, $SPU_RdMachStat 138 bid irqtmp 139__ovly_irq_save: 140#endif 141 142/* Set branch hint to overlay target. */ 143 hbr __ovly_load_ret, $79 144 145/* Get caller's overlay index by back chaining through stack frames. 146 * Loop until end of stack (back chain all-zeros) or 147 * encountered a link register we set here. */ 148 lqd bchn, 0($sp) 149 ila retval, __ovly_return 150 151__ovly_backchain_loop: 152 lqd lnkr, 16(bchn) 153 lqd bchn, 0(bchn) 154 ceq cmp, lnkr, retval 155 ceqi cmp2, bchn, 0 156 or cmp, cmp, cmp2 157 brz cmp, __ovly_backchain_loop 158 159/* If we reached the zero back-chain, then lnkr is bogus. Clear the 160 * part of lnkr that we use later (slot 3). */ 161 rotqbyi cmp2, cmp2, 4 162 andc lnkr, lnkr, cmp2 163 164/* Set lr = {__ovly_return, prev ovl ndx, caller return adr, callee ovl ndx}. */ 165 lqd rv1, (__rv_pattern-__ovly_return+4)(retval) 166 shufb rv2, retval, lnkr, rv1 167 shufb rv3, $lr, $78, rv1 168 fsmbi rv1, 0xff 169 selb rv2, rv2, rv3, rv1 170/* If we have a tail call from one overlay function to another overlay, 171 then lr is already set up. Don't change it. */ 172 ceq rv1, $lr, retval 173 fsmb rv1, rv1 174 selb $lr, rv2, $lr, rv1 175 176/* Branch to $79 if non-overlay */ 177 brz $78, __ovly_load_restore 178 179/* Load values from _ovly_table[$78]. 180 * extern struct { 181 * u32 vma; 182 * u32 size; 183 * u32 file_offset; 184 * u32 buf; 185 * } _ovly_table[]; 186 */ 187 shli off, $78, 4 188 ila tab, _ovly_table - 16 189 lqx vma, tab, off 190 rotqbyi buf, vma, 12 191 192/* Load values from _ovly_buf_table[buf]. 193 * extern struct { 194 * u32 mapped; 195 * } _ovly_buf_table[]; 196 */ 197 ila tab, _ovly_buf_table 198 ai off, buf, -1 199 shli off, off, 2 200 lqx map, tab, off 201 rotqby cur, map, off 202 203/* Branch to $79 now if overlay is already mapped. */ 204 ceq cmp, $78, cur 205 brnz cmp, __ovly_load_restore 206 207/* Marker for profiling code. If we get here, we are about to load 208 * a new overlay. 209 */ 210 .global __ovly_load_event 211 .type __ovly_load_event, @function 212__ovly_load_event: 213 214/* Set _ovly_buf_table[buf].mapped = $78. */ 215 cwx genwi, tab, off 216 shufb map, $78, map, genwi 217 stqx map, tab, off 218 219/* A new partition needs to be loaded. Prepare for DMA loop. 220 * _EAR_ is the 64b base EA, filled in at run time by the 221 * loader, and indicating the value for SPU executable image start. 222 */ 223 lqd cgshuf, (__cg_pattern-__ovly_return+4)(retval) 224 rotqbyi osize, vma, 4 225 rotqbyi sz, vma, 8 226 lqa ea64, _EAR_ 227 228__ovly_xfer_loop: 229/* 64b add to compute next ea64. */ 230 rotqmbyi off64, sz, -4 231 cg cgbits, ea64, off64 232 shufb add64, cgbits, cgbits, cgshuf 233 addx add64, ea64, off64 234 ori ea64, add64, 0 235 236/* Setup DMA parameters, then issue DMA request. */ 237 rotqbyi ealo, add64, 4 238 ila maxsize, MFC_MAX_DMA_SIZE 239 cgt cmp, osize, maxsize 240 selb sz, osize, maxsize, cmp 241 ila tagid, MFC_TAG_ID 242 wrch $MFC_LSA, vma 243 wrch $MFC_EAH, ea64 244 wrch $MFC_EAL, ealo 245 wrch $MFC_Size, sz 246 wrch $MFC_TagId, tagid 247 ila cmd, MFC_GET_CMD 248 wrch $MFC_Cmd, cmd 249 250/* Increment vma, decrement size, branch back as needed. */ 251 a vma, vma, sz 252 sf osize, sz, osize 253 brnz osize, __ovly_xfer_loop 254 255/* Save app's tagmask, wait for DMA complete, restore mask. */ 256 rdch oldmask, $MFC_RdTagMask 257#if MFC_TAG_ID < 16 258 ilh newmask, 1 << MFC_TAG_ID 259#else 260 ilhu newmask, 1 << (MFC_TAG_ID - 16) 261#endif 262 wrch $MFC_WrTagMask, newmask 263 ila tagstat, MFC_TAG_UPDATE_ALL 264 wrch $MFC_WrTagUpdate, tagstat 265 rdch tagstat, $MFC_RdTagStat 266 sync 267 wrch $MFC_WrTagMask, oldmask 268 269 .global _ovly_debug_event 270 .type _ovly_debug_event, @function 271_ovly_debug_event: 272/* GDB inserts debugger trap here. */ 273 nop 274 275__ovly_load_restore: 276#ifdef OVLY_IRQ_SAVE 277/* Conditionally re-enable interrupts. */ 278 andi irq_stat, irq_stat, 1 279 ila irqtmp, __ovly_irq_restore 280 binze irq_stat, irqtmp 281__ovly_irq_restore: 282 lqd $9, -64($sp) 283#endif 284 285/* Restore saved registers. */ 286 lqd $8, -48($sp) 287 lqd $7, -32($sp) 288 lqd $6, -16($sp) 289 290__ovly_load_ret: 291/* Branch to target address. */ 292 bi $79 293 294 .size __ovly_load, . - __ovly_load 295