1/* Copyright (C) 2008-2015 Free Software Foundation, Inc. 2 3This file is part of GCC. 4 5GCC is free software; you can redistribute it and/or modify it under 6the terms of the GNU General Public License as published by the Free 7Software Foundation; either version 3, or (at your option) any later 8version. 9 10GCC is distributed in the hope that it will be useful, but WITHOUT ANY 11WARRANTY; without even the implied warranty of MERCHANTABILITY or 12FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13for more details. 14 15Under Section 7 of GPL version 3, you are granted additional 16permissions described in the GCC Runtime Library Exception, version 173.1, as published by the Free Software Foundation. 18 19You should have received a copy of the GNU General Public License and 20a copy of the GCC Runtime Library Exception along with this program; 21see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22<http://www.gnu.org/licenses/>. */ 23 24#include <spu_mfcio.h> 25#include <spu_internals.h> 26#include <spu_intrinsics.h> 27#include <spu_cache.h> 28 29extern unsigned long long __ea_local_store; 30extern char __cache_tag_array_size; 31 32#define LINE_SIZE 128 33#define TAG_MASK (LINE_SIZE - 1) 34 35#define WAYS 4 36#define SET_MASK ((int) &__cache_tag_array_size - LINE_SIZE) 37 38#define CACHE_LINES ((int) &__cache_tag_array_size / \ 39 sizeof (struct __cache_tag_array) * WAYS) 40 41struct __cache_tag_array 42{ 43 unsigned int tag_lo[WAYS]; 44 unsigned int tag_hi[WAYS]; 45 void *base[WAYS]; 46 int reserved[WAYS]; 47 vector unsigned short dirty_bits[WAYS]; 48}; 49 50extern struct __cache_tag_array __cache_tag_array[]; 51extern char __cache[]; 52 53/* In order to make the code seem a little cleaner, and to avoid having 54 64/32 bit ifdefs all over the place, we use macros. */ 55 56#ifdef __EA64__ 57typedef unsigned long long addr; 58 59#define CHECK_TAG(_entry, _way, _tag) \ 60 ((_entry)->tag_lo[(_way)] == ((_tag) & 0xFFFFFFFF) \ 61 && (_entry)->tag_hi[(_way)] == ((_tag) >> 32)) 62 63#define GET_TAG(_entry, _way) \ 64 ((unsigned long long)(_entry)->tag_hi[(_way)] << 32 \ 65 | (unsigned long long)(_entry)->tag_lo[(_way)]) 66 67#define SET_TAG(_entry, _way, _tag) \ 68 (_entry)->tag_lo[(_way)] = (_tag) & 0xFFFFFFFF; \ 69 (_entry)->tag_hi[(_way)] = (_tag) >> 32 70 71#else /*__EA32__*/ 72typedef unsigned long addr; 73 74#define CHECK_TAG(_entry, _way, _tag) \ 75 ((_entry)->tag_lo[(_way)] == (_tag)) 76 77#define GET_TAG(_entry, _way) \ 78 ((_entry)->tag_lo[(_way)]) 79 80#define SET_TAG(_entry, _way, _tag) \ 81 (_entry)->tag_lo[(_way)] = (_tag) 82 83#endif 84 85/* In GET_ENTRY, we cast away the high 32 bits, 86 as the tag is only in the low 32. */ 87 88#define GET_ENTRY(_addr) \ 89 ((struct __cache_tag_array *) \ 90 si_to_uint (si_a (si_and (si_from_uint ((unsigned int) (addr) (_addr)), \ 91 si_from_uint (SET_MASK)), \ 92 si_from_uint ((unsigned int) __cache_tag_array)))) 93 94#define GET_CACHE_LINE(_addr, _way) \ 95 ((void *) (__cache + ((_addr) & SET_MASK) * WAYS) + ((_way) * LINE_SIZE)); 96 97#define CHECK_DIRTY(_vec) (si_to_uint (si_orx ((qword) (_vec)))) 98#define SET_EMPTY(_entry, _way) ((_entry)->tag_lo[(_way)] = 1) 99#define CHECK_EMPTY(_entry, _way) ((_entry)->tag_lo[(_way)] == 1) 100 101#define LS_FLAG 0x80000000 102#define SET_IS_LS(_entry, _way) ((_entry)->reserved[(_way)] |= LS_FLAG) 103#define CHECK_IS_LS(_entry, _way) ((_entry)->reserved[(_way)] & LS_FLAG) 104#define GET_LRU(_entry, _way) ((_entry)->reserved[(_way)] & ~LS_FLAG) 105 106static int dma_tag = 32; 107 108static void 109__cache_evict_entry (struct __cache_tag_array *entry, int way) 110{ 111 addr tag = GET_TAG (entry, way); 112 113 if (CHECK_DIRTY (entry->dirty_bits[way]) && !CHECK_IS_LS (entry, way)) 114 { 115#ifdef NONATOMIC 116 /* Non-atomic writes. */ 117 unsigned int oldmask, mach_stat; 118 char *line = ((void *) 0); 119 120 /* Enter critical section. */ 121 mach_stat = spu_readch (SPU_RdMachStat); 122 spu_idisable (); 123 124 /* Issue DMA request. */ 125 line = GET_CACHE_LINE (entry->tag_lo[way], way); 126 mfc_put (line, tag, LINE_SIZE, dma_tag, 0, 0); 127 128 /* Wait for DMA completion. */ 129 oldmask = mfc_read_tag_mask (); 130 mfc_write_tag_mask (1 << dma_tag); 131 mfc_read_tag_status_all (); 132 mfc_write_tag_mask (oldmask); 133 134 /* Leave critical section. */ 135 if (__builtin_expect (mach_stat & 1, 0)) 136 spu_ienable (); 137#else 138 /* Allocate a buffer large enough that we know it has 128 bytes 139 that are 128 byte aligned (for DMA). */ 140 141 char buffer[LINE_SIZE + 127]; 142 qword *buf_ptr = (qword *) (((unsigned int) (buffer) + 127) & ~127); 143 qword *line = GET_CACHE_LINE (entry->tag_lo[way], way); 144 qword bits; 145 unsigned int mach_stat; 146 147 /* Enter critical section. */ 148 mach_stat = spu_readch (SPU_RdMachStat); 149 spu_idisable (); 150 151 do 152 { 153 /* We atomically read the current memory into a buffer 154 modify the dirty bytes in the buffer, and write it 155 back. If writeback fails, loop and try again. */ 156 157 mfc_getllar (buf_ptr, tag, 0, 0); 158 mfc_read_atomic_status (); 159 160 /* The method we're using to write 16 dirty bytes into 161 the buffer at a time uses fsmb which in turn uses 162 the least significant 16 bits of word 0, so we 163 load the bits and rotate so that the first bit of 164 the bitmap is in the first bit that fsmb will use. */ 165 166 bits = (qword) entry->dirty_bits[way]; 167 bits = si_rotqbyi (bits, -2); 168 169 /* Si_fsmb creates the mask of dirty bytes. 170 Use selb to nab the appropriate bits. */ 171 buf_ptr[0] = si_selb (buf_ptr[0], line[0], si_fsmb (bits)); 172 173 /* Rotate to next 16 byte section of cache. */ 174 bits = si_rotqbyi (bits, 2); 175 176 buf_ptr[1] = si_selb (buf_ptr[1], line[1], si_fsmb (bits)); 177 bits = si_rotqbyi (bits, 2); 178 buf_ptr[2] = si_selb (buf_ptr[2], line[2], si_fsmb (bits)); 179 bits = si_rotqbyi (bits, 2); 180 buf_ptr[3] = si_selb (buf_ptr[3], line[3], si_fsmb (bits)); 181 bits = si_rotqbyi (bits, 2); 182 buf_ptr[4] = si_selb (buf_ptr[4], line[4], si_fsmb (bits)); 183 bits = si_rotqbyi (bits, 2); 184 buf_ptr[5] = si_selb (buf_ptr[5], line[5], si_fsmb (bits)); 185 bits = si_rotqbyi (bits, 2); 186 buf_ptr[6] = si_selb (buf_ptr[6], line[6], si_fsmb (bits)); 187 bits = si_rotqbyi (bits, 2); 188 buf_ptr[7] = si_selb (buf_ptr[7], line[7], si_fsmb (bits)); 189 bits = si_rotqbyi (bits, 2); 190 191 mfc_putllc (buf_ptr, tag, 0, 0); 192 } 193 while (mfc_read_atomic_status ()); 194 195 /* Leave critical section. */ 196 if (__builtin_expect (mach_stat & 1, 0)) 197 spu_ienable (); 198#endif 199 } 200 201 /* In any case, marking the lo tag with 1 which denotes empty. */ 202 SET_EMPTY (entry, way); 203 entry->dirty_bits[way] = (vector unsigned short) si_from_uint (0); 204} 205 206void 207__cache_evict (__ea void *ea) 208{ 209 addr tag = (addr) ea & ~TAG_MASK; 210 struct __cache_tag_array *entry = GET_ENTRY (ea); 211 int i = 0; 212 213 /* Cycles through all the possible ways an address could be at 214 and evicts the way if found. */ 215 216 for (i = 0; i < WAYS; i++) 217 if (CHECK_TAG (entry, i, tag)) 218 __cache_evict_entry (entry, i); 219} 220 221static void * 222__cache_fill (int way, addr tag) 223{ 224 unsigned int oldmask, mach_stat; 225 char *line = ((void *) 0); 226 227 /* Reserve our DMA tag. */ 228 if (dma_tag == 32) 229 dma_tag = mfc_tag_reserve (); 230 231 /* Enter critical section. */ 232 mach_stat = spu_readch (SPU_RdMachStat); 233 spu_idisable (); 234 235 /* Issue DMA request. */ 236 line = GET_CACHE_LINE (tag, way); 237 mfc_get (line, tag, LINE_SIZE, dma_tag, 0, 0); 238 239 /* Wait for DMA completion. */ 240 oldmask = mfc_read_tag_mask (); 241 mfc_write_tag_mask (1 << dma_tag); 242 mfc_read_tag_status_all (); 243 mfc_write_tag_mask (oldmask); 244 245 /* Leave critical section. */ 246 if (__builtin_expect (mach_stat & 1, 0)) 247 spu_ienable (); 248 249 return (void *) line; 250} 251 252static void 253__cache_miss (__ea void *ea, struct __cache_tag_array *entry, int way) 254{ 255 256 addr tag = (addr) ea & ~TAG_MASK; 257 unsigned int lru = 0; 258 int i = 0; 259 int idx = 0; 260 261 /* If way > 4, then there are no empty slots, so we must evict 262 the least recently used entry. */ 263 if (way >= 4) 264 { 265 for (i = 0; i < WAYS; i++) 266 { 267 if (GET_LRU (entry, i) > lru) 268 { 269 lru = GET_LRU (entry, i); 270 idx = i; 271 } 272 } 273 __cache_evict_entry (entry, idx); 274 way = idx; 275 } 276 277 /* Set the empty entry's tag and fill it's cache line. */ 278 279 SET_TAG (entry, way, tag); 280 entry->reserved[way] = 0; 281 282 /* Check if the address is just an effective address within the 283 SPU's local store. */ 284 285 /* Because the LS is not 256k aligned, we can't do a nice and mask 286 here to compare, so we must check the whole range. */ 287 288 if ((addr) ea >= (addr) __ea_local_store 289 && (addr) ea < (addr) (__ea_local_store + 0x40000)) 290 { 291 SET_IS_LS (entry, way); 292 entry->base[way] = 293 (void *) ((unsigned int) ((addr) ea - 294 (addr) __ea_local_store) & ~0x7f); 295 } 296 else 297 { 298 entry->base[way] = __cache_fill (way, tag); 299 } 300} 301 302void * 303__cache_fetch_dirty (__ea void *ea, int n_bytes_dirty) 304{ 305#ifdef __EA64__ 306 unsigned int tag_hi; 307 qword etag_hi; 308#endif 309 unsigned int tag_lo; 310 struct __cache_tag_array *entry; 311 312 qword etag_lo; 313 qword equal; 314 qword bit_mask; 315 qword way; 316 317 /* This first chunk, we merely fill the pointer and tag. */ 318 319 entry = GET_ENTRY (ea); 320 321#ifndef __EA64__ 322 tag_lo = 323 si_to_uint (si_andc 324 (si_shufb 325 (si_from_uint ((addr) ea), si_from_uint (0), 326 si_from_uint (0x00010203)), si_from_uint (TAG_MASK))); 327#else 328 tag_lo = 329 si_to_uint (si_andc 330 (si_shufb 331 (si_from_ullong ((addr) ea), si_from_uint (0), 332 si_from_uint (0x04050607)), si_from_uint (TAG_MASK))); 333 334 tag_hi = 335 si_to_uint (si_shufb 336 (si_from_ullong ((addr) ea), si_from_uint (0), 337 si_from_uint (0x00010203))); 338#endif 339 340 /* Increment LRU in reserved bytes. */ 341 si_stqd (si_ai (si_lqd (si_from_ptr (entry), 48), 1), 342 si_from_ptr (entry), 48); 343 344missreturn: 345 /* Check if the entry's lo_tag is equal to the address' lo_tag. */ 346 etag_lo = si_lqd (si_from_ptr (entry), 0); 347 equal = si_ceq (etag_lo, si_from_uint (tag_lo)); 348#ifdef __EA64__ 349 /* And the high tag too. */ 350 etag_hi = si_lqd (si_from_ptr (entry), 16); 351 equal = si_and (equal, (si_ceq (etag_hi, si_from_uint (tag_hi)))); 352#endif 353 354 if ((si_to_uint (si_orx (equal)) == 0)) 355 goto misshandler; 356 357 if (n_bytes_dirty) 358 { 359 /* way = 0x40,0x50,0x60,0x70 for each way, which is also the 360 offset of the appropriate dirty bits. */ 361 way = si_shli (si_clz (si_gbb (equal)), 2); 362 363 /* To create the bit_mask, we set it to all 1s (uint -1), then we 364 shift it over (128 - n_bytes_dirty) times. */ 365 366 bit_mask = si_from_uint (-1); 367 368 bit_mask = 369 si_shlqby (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) / 8)); 370 371 bit_mask = 372 si_shlqbi (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) % 8)); 373 374 /* Rotate it around to the correct offset. */ 375 bit_mask = 376 si_rotqby (bit_mask, 377 si_from_uint (-1 * ((addr) ea & TAG_MASK) / 8)); 378 379 bit_mask = 380 si_rotqbi (bit_mask, 381 si_from_uint (-1 * ((addr) ea & TAG_MASK) % 8)); 382 383 /* Update the dirty bits. */ 384 si_stqx (si_or (si_lqx (si_from_ptr (entry), way), bit_mask), 385 si_from_ptr (entry), way); 386 }; 387 388 /* We've definitely found the right entry, set LRU (reserved) to 0 389 maintaining the LS flag (MSB). */ 390 391 si_stqd (si_andc 392 (si_lqd (si_from_ptr (entry), 48), 393 si_and (equal, si_from_uint (~(LS_FLAG)))), 394 si_from_ptr (entry), 48); 395 396 return (void *) 397 si_to_uint (si_a 398 (si_orx 399 (si_and (si_lqd (si_from_ptr (entry), 32), equal)), 400 si_from_uint (((unsigned int) (addr) ea) & TAG_MASK))); 401 402misshandler: 403 equal = si_ceqi (etag_lo, 1); 404 __cache_miss (ea, entry, (si_to_uint (si_clz (si_gbb (equal))) - 16) >> 2); 405 goto missreturn; 406} 407 408void * 409__cache_fetch (__ea void *ea) 410{ 411 return __cache_fetch_dirty (ea, 0); 412} 413 414void 415__cache_touch (__ea void *ea __attribute__ ((unused))) 416{ 417 /* NO-OP for now. */ 418} 419 420void __cache_flush (void) __attribute__ ((destructor)); 421void 422__cache_flush (void) 423{ 424 struct __cache_tag_array *entry = __cache_tag_array; 425 unsigned int i; 426 int j; 427 428 /* Cycle through each cache entry and evict all used ways. */ 429 430 for (i = 0; i < CACHE_LINES / WAYS; i++) 431 { 432 for (j = 0; j < WAYS; j++) 433 if (!CHECK_EMPTY (entry, j)) 434 __cache_evict_entry (entry, j); 435 436 entry++; 437 } 438} 439