1214571Sdim/* Overlay manager for SPU. 2214571Sdim 3214571Sdim Copyright 2006, 2007 Free Software Foundation, Inc. 4214571Sdim 5214571Sdim This file is part of GLD, the Gnu Linker. 6214571Sdim 7214571Sdim GLD is free software; you can redistribute it and/or modify 8214571Sdim it under the terms of the GNU General Public License as published by 9214571Sdim the Free Software Foundation; either version 2, or (at your option) 10214571Sdim any later version. 11214571Sdim 12214571Sdim GLD is distributed in the hope that it will be useful, 13214571Sdim but WITHOUT ANY WARRANTY; without even the implied warranty of 14214571Sdim MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15214571Sdim GNU General Public License for more details. 16214571Sdim 17214571Sdim You should have received a copy of the GNU General Public License 18214571Sdim along with GLD; see the file COPYING. If not, write to the Free 19214571Sdim Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA 20214571Sdim 02110-1301, USA. */ 21214571Sdim 22214571Sdim/** 23214571Sdim * MFC DMA defn's. 24214571Sdim */ 25214571Sdim#define MFC_GET_CMD 0x40 26214571Sdim#define MFC_MAX_DMA_SIZE 0x4000 27214571Sdim#define MFC_TAG_UPDATE_ALL 2 28214571Sdim#define MFC_TAG_ID 0 29214571Sdim 30214571Sdim 31214571Sdim/** 32214571Sdim * Temporary register allocations. 33214571Sdim * These are saved/restored here. 34214571Sdim */ 35214571Sdim#define tab $75 36214571Sdim#define cgbits $75 37214571Sdim#define add64 $75 38214571Sdim#define ealo $75 39214571Sdim#define newmask $75 40214571Sdim#define tagstat $75 41214571Sdim#define bchn $75 42214571Sdim#define rv1 $75 43214571Sdim 44214571Sdim#define off $76 45214571Sdim#define off64 $76 46214571Sdim#define maxsize $76 47214571Sdim#define oldmask $76 48214571Sdim#define sz $76 49214571Sdim#define lnkr $76 50214571Sdim#define rv2 $76 51214571Sdim 52214571Sdim#define cur $77 53214571Sdim#define cmp $77 54214571Sdim#define buf $77 55214571Sdim#define genwi $77 56214571Sdim#define tagid $77 57214571Sdim#define cmd $77 58214571Sdim#define rv3 $77 59214571Sdim 60214571Sdim#define cgshuf $78 61214571Sdim 62214571Sdim#define vma $6 63214571Sdim 64214571Sdim#define map $7 65214571Sdim#define osize $7 66214571Sdim#define cmp2 $7 67214571Sdim 68214571Sdim#define ea64 $8 69214571Sdim#define retval $8 70214571Sdim 71214571Sdim#ifdef OVLY_IRQ_SAVE 72214571Sdim#define irqtmp $8 73214571Sdim#define irq_stat $9 74214571Sdim#endif 75214571Sdim 76214571Sdim .extern _ovly_table 77214571Sdim .extern _ovly_buf_table 78214571Sdim 79214571Sdim .text 80214571Sdim .align 4 81214571Sdim .type __rv_pattern, @object 82214571Sdim .size __rv_pattern, 16 83214571Sdim__rv_pattern: 84214571Sdim .word 0x00010203, 0x1c1d1e1f, 0x00010203, 0x10111213 85214571Sdim .type __cg_pattern, @object 86214571Sdim .size __cg_pattern, 16 87214571Sdim__cg_pattern: 88214571Sdim .word 0x04050607, 0x80808080, 0x80808080, 0x80808080 89214571Sdim 90214571Sdim/** 91214571Sdim * __ovly_return - stub for returning from overlay functions. 92214571Sdim * 93214571Sdim * inputs: 94214571Sdim * $lr link register 95214571Sdim * 96214571Sdim * outputs: 97214571Sdim * $78 old partition number, to be reloaded 98214571Sdim * $79 return address in old partion number 99214571Sdim */ 100214571Sdim .global __ovly_return 101214571Sdim .type __ovly_return, @function 102214571Sdim 103214571Sdim .word 0 104214571Sdim__ovly_return: 105214571Sdim shlqbyi $78, $lr, 4 106214571Sdim shlqbyi $79, $lr, 8 107214571Sdim biz $78, $79 108214571Sdim .size __ovly_return, . - __ovly_return 109214571Sdim 110214571Sdim/** 111214571Sdim * __ovly_load - copy an overlay partion to local store. 112214571Sdim * 113214571Sdim * inputs: 114214571Sdim * $78 partition number to be loaded. 115214571Sdim * $79 branch target in new partition. 116214571Sdim * $lr link register, containing return addr. 117214571Sdim * 118214571Sdim * outputs: 119214571Sdim * $lr new link register, returning through __ovly_return. 120214571Sdim * 121214571Sdim * Copy a new overlay partition into local store, or return 122214571Sdim * immediately if the partition is already resident. 123214571Sdim */ 124214571Sdim .global __ovly_load 125214571Sdim .type __ovly_load, @function 126214571Sdim 127214571Sdim__ovly_load: 128214571Sdim/* Save temporary registers to stack. */ 129214571Sdim stqd $6, -16($sp) 130214571Sdim stqd $7, -32($sp) 131214571Sdim stqd $8, -48($sp) 132214571Sdim 133214571Sdim#ifdef OVLY_IRQ_SAVE 134214571Sdim/* Save irq state, then disable interrupts. */ 135214571Sdim stqd $9, -64($sp) 136214571Sdim ila irqtmp, __ovly_irq_save 137214571Sdim rdch irq_stat, $SPU_RdMachStat 138214571Sdim bid irqtmp 139214571Sdim__ovly_irq_save: 140214571Sdim#endif 141214571Sdim 142214571Sdim/* Set branch hint to overlay target. */ 143214571Sdim hbr __ovly_load_ret, $79 144214571Sdim 145214571Sdim/* Get caller's overlay index by back chaining through stack frames. 146214571Sdim * Loop until end of stack (back chain all-zeros) or 147214571Sdim * encountered a link register we set here. */ 148214571Sdim lqd bchn, 0($sp) 149214571Sdim ila retval, __ovly_return 150214571Sdim 151214571Sdim__ovly_backchain_loop: 152214571Sdim lqd lnkr, 16(bchn) 153214571Sdim lqd bchn, 0(bchn) 154214571Sdim ceq cmp, lnkr, retval 155214571Sdim ceqi cmp2, bchn, 0 156214571Sdim or cmp, cmp, cmp2 157214571Sdim brz cmp, __ovly_backchain_loop 158214571Sdim 159214571Sdim/* If we reached the zero back-chain, then lnkr is bogus. Clear the 160214571Sdim * part of lnkr that we use later (slot 3). */ 161214571Sdim rotqbyi cmp2, cmp2, 4 162214571Sdim andc lnkr, lnkr, cmp2 163214571Sdim 164214571Sdim/* Set lr = {__ovly_return, prev ovl ndx, caller return adr, callee ovl ndx}. */ 165214571Sdim lqd rv1, (__rv_pattern-__ovly_return+4)(retval) 166214571Sdim shufb rv2, retval, lnkr, rv1 167214571Sdim shufb rv3, $lr, $78, rv1 168214571Sdim fsmbi rv1, 0xff 169214571Sdim selb rv2, rv2, rv3, rv1 170214571Sdim/* If we have a tail call from one overlay function to another overlay, 171214571Sdim then lr is already set up. Don't change it. */ 172214571Sdim ceq rv1, $lr, retval 173214571Sdim fsmb rv1, rv1 174214571Sdim selb $lr, rv2, $lr, rv1 175214571Sdim 176214571Sdim/* Branch to $79 if non-overlay */ 177214571Sdim brz $78, __ovly_load_restore 178214571Sdim 179214571Sdim/* Load values from _ovly_table[$78]. 180214571Sdim * extern struct { 181214571Sdim * u32 vma; 182214571Sdim * u32 size; 183214571Sdim * u32 file_offset; 184214571Sdim * u32 buf; 185214571Sdim * } _ovly_table[]; 186214571Sdim */ 187214571Sdim shli off, $78, 4 188214571Sdim ila tab, _ovly_table - 16 189214571Sdim lqx vma, tab, off 190214571Sdim rotqbyi buf, vma, 12 191214571Sdim 192214571Sdim/* Load values from _ovly_buf_table[buf]. 193214571Sdim * extern struct { 194214571Sdim * u32 mapped; 195214571Sdim * } _ovly_buf_table[]; 196214571Sdim */ 197214571Sdim ila tab, _ovly_buf_table 198214571Sdim ai off, buf, -1 199214571Sdim shli off, off, 2 200214571Sdim lqx map, tab, off 201214571Sdim rotqby cur, map, off 202214571Sdim 203214571Sdim/* Branch to $79 now if overlay is already mapped. */ 204214571Sdim ceq cmp, $78, cur 205214571Sdim brnz cmp, __ovly_load_restore 206214571Sdim 207214571Sdim/* Marker for profiling code. If we get here, we are about to load 208214571Sdim * a new overlay. 209214571Sdim */ 210214571Sdim .global __ovly_load_event 211214571Sdim .type __ovly_load_event, @function 212214571Sdim__ovly_load_event: 213214571Sdim 214214571Sdim/* Set _ovly_buf_table[buf].mapped = $78. */ 215214571Sdim cwx genwi, tab, off 216214571Sdim shufb map, $78, map, genwi 217214571Sdim stqx map, tab, off 218214571Sdim 219214571Sdim/* A new partition needs to be loaded. Prepare for DMA loop. 220214571Sdim * _EAR_ is the 64b base EA, filled in at run time by the 221214571Sdim * loader, and indicating the value for SPU executable image start. 222214571Sdim */ 223214571Sdim lqd cgshuf, (__cg_pattern-__ovly_return+4)(retval) 224214571Sdim rotqbyi osize, vma, 4 225214571Sdim rotqbyi sz, vma, 8 226214571Sdim lqa ea64, _EAR_ 227214571Sdim 228214571Sdim__ovly_xfer_loop: 229214571Sdim/* 64b add to compute next ea64. */ 230214571Sdim rotqmbyi off64, sz, -4 231214571Sdim cg cgbits, ea64, off64 232214571Sdim shufb add64, cgbits, cgbits, cgshuf 233214571Sdim addx add64, ea64, off64 234214571Sdim ori ea64, add64, 0 235214571Sdim 236214571Sdim/* Setup DMA parameters, then issue DMA request. */ 237214571Sdim rotqbyi ealo, add64, 4 238214571Sdim ila maxsize, MFC_MAX_DMA_SIZE 239214571Sdim cgt cmp, osize, maxsize 240214571Sdim selb sz, osize, maxsize, cmp 241214571Sdim ila tagid, MFC_TAG_ID 242214571Sdim wrch $MFC_LSA, vma 243214571Sdim wrch $MFC_EAH, ea64 244214571Sdim wrch $MFC_EAL, ealo 245214571Sdim wrch $MFC_Size, sz 246214571Sdim wrch $MFC_TagId, tagid 247214571Sdim ila cmd, MFC_GET_CMD 248214571Sdim wrch $MFC_Cmd, cmd 249214571Sdim 250214571Sdim/* Increment vma, decrement size, branch back as needed. */ 251214571Sdim a vma, vma, sz 252214571Sdim sf osize, sz, osize 253214571Sdim brnz osize, __ovly_xfer_loop 254214571Sdim 255214571Sdim/* Save app's tagmask, wait for DMA complete, restore mask. */ 256214571Sdim rdch oldmask, $MFC_RdTagMask 257214571Sdim#if MFC_TAG_ID < 16 258214571Sdim ilh newmask, 1 << MFC_TAG_ID 259214571Sdim#else 260214571Sdim ilhu newmask, 1 << (MFC_TAG_ID - 16) 261214571Sdim#endif 262214571Sdim wrch $MFC_WrTagMask, newmask 263214571Sdim ila tagstat, MFC_TAG_UPDATE_ALL 264214571Sdim wrch $MFC_WrTagUpdate, tagstat 265214571Sdim rdch tagstat, $MFC_RdTagStat 266214571Sdim sync 267214571Sdim wrch $MFC_WrTagMask, oldmask 268214571Sdim 269214571Sdim .global _ovly_debug_event 270214571Sdim .type _ovly_debug_event, @function 271214571Sdim_ovly_debug_event: 272214571Sdim/* GDB inserts debugger trap here. */ 273214571Sdim nop 274214571Sdim 275214571Sdim__ovly_load_restore: 276214571Sdim#ifdef OVLY_IRQ_SAVE 277214571Sdim/* Conditionally re-enable interrupts. */ 278214571Sdim andi irq_stat, irq_stat, 1 279214571Sdim ila irqtmp, __ovly_irq_restore 280214571Sdim binze irq_stat, irqtmp 281214571Sdim__ovly_irq_restore: 282214571Sdim lqd $9, -64($sp) 283214571Sdim#endif 284214571Sdim 285214571Sdim/* Restore saved registers. */ 286214571Sdim lqd $8, -48($sp) 287214571Sdim lqd $7, -32($sp) 288214571Sdim lqd $6, -16($sp) 289214571Sdim 290214571Sdim__ovly_load_ret: 291214571Sdim/* Branch to target address. */ 292214571Sdim bi $79 293214571Sdim 294214571Sdim .size __ovly_load, . - __ovly_load 295