cache_mipsNN.c revision 331722
1/* $NetBSD: cache_mipsNN.c,v 1.10 2005/12/24 20:07:19 perry Exp $ */ 2 3/* 4 * Copyright 2001 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Jason R. Thorpe and Simon Burge for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38#include <sys/cdefs.h> 39__FBSDID("$FreeBSD: stable/11/sys/mips/mips/cache_mipsNN.c 331722 2018-03-29 02:50:57Z eadler $"); 40 41#include <sys/types.h> 42#include <sys/systm.h> 43#include <sys/param.h> 44 45#include <machine/cache.h> 46#include <machine/cache_r4k.h> 47#include <machine/cpuinfo.h> 48 49#define round_line16(x) (((x) + 15) & ~15) 50#define trunc_line16(x) ((x) & ~15) 51 52#define round_line32(x) (((x) + 31) & ~31) 53#define trunc_line32(x) ((x) & ~31) 54 55#define round_line64(x) (((x) + 63) & ~63) 56#define trunc_line64(x) ((x) & ~63) 57 58#define round_line128(x) (((x) + 127) & ~127) 59#define trunc_line128(x) ((x) & ~127) 60 61#if defined(CPU_NLM) 62static __inline void 63xlp_sync(void) 64{ 65 __asm __volatile ( 66 ".set push \n" 67 ".set noreorder \n" 68 ".set mips64 \n" 69 "dla $8, 1f \n" 70 "/* jr.hb $8 */ \n" 71 ".word 0x1000408 \n" 72 "nop \n" 73 "1: nop \n" 74 ".set pop \n" 75 : : : "$8"); 76} 77#endif 78 79#if defined(SB1250_PASS1) 80#define SYNC __asm volatile("sync; sync") 81#elif defined(CPU_NLM) 82#define SYNC xlp_sync() 83#else 84#define SYNC __asm volatile("sync") 85#endif 86 87#if defined(CPU_CNMIPS) 88#define SYNCI mips_sync_icache(); 89#elif defined(CPU_NLM) 90#define SYNCI xlp_sync() 91#else 92#define SYNCI 93#endif 94 95/* 96 * Exported variables for consumers like bus_dma code 97 */ 98int mips_picache_linesize; 99int mips_pdcache_linesize; 100 101static int picache_size; 102static int picache_stride; 103static int picache_loopcount; 104static int picache_way_mask; 105static int pdcache_size; 106static int pdcache_stride; 107static int pdcache_loopcount; 108static int pdcache_way_mask; 109static int sdcache_size; 110static int sdcache_stride; 111static int sdcache_loopcount; 112static int sdcache_way_mask; 113 114void 115mipsNN_cache_init(struct mips_cpuinfo * cpuinfo) 116{ 117 int flush_multiple_lines_per_way; 118 119 flush_multiple_lines_per_way = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize * cpuinfo->l1.ic_linesize > PAGE_SIZE; 120 if (cpuinfo->icache_virtual) { 121 /* 122 * With a virtual Icache we don't need to flush 123 * multiples of the page size with index ops; we just 124 * need to flush one pages' worth. 125 */ 126 flush_multiple_lines_per_way = 0; 127 } 128 129 if (flush_multiple_lines_per_way) { 130 picache_stride = PAGE_SIZE; 131 picache_loopcount = (cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize / PAGE_SIZE) * 132 cpuinfo->l1.ic_nways; 133 } else { 134 picache_stride = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize; 135 picache_loopcount = cpuinfo->l1.ic_nways; 136 } 137 138 if (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize < PAGE_SIZE) { 139 pdcache_stride = cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize; 140 pdcache_loopcount = cpuinfo->l1.dc_nways; 141 } else { 142 pdcache_stride = PAGE_SIZE; 143 pdcache_loopcount = (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize / PAGE_SIZE) * 144 cpuinfo->l1.dc_nways; 145 } 146 147 mips_picache_linesize = cpuinfo->l1.ic_linesize; 148 mips_pdcache_linesize = cpuinfo->l1.dc_linesize; 149 150 picache_size = cpuinfo->l1.ic_size; 151 picache_way_mask = cpuinfo->l1.ic_nways - 1; 152 pdcache_size = cpuinfo->l1.dc_size; 153 pdcache_way_mask = cpuinfo->l1.dc_nways - 1; 154 155 sdcache_stride = cpuinfo->l2.dc_nsets * cpuinfo->l2.dc_linesize; 156 sdcache_loopcount = cpuinfo->l2.dc_nways; 157 sdcache_size = cpuinfo->l2.dc_size; 158 sdcache_way_mask = cpuinfo->l2.dc_nways - 1; 159 160#define CACHE_DEBUG 161#ifdef CACHE_DEBUG 162 printf("Cache info:\n"); 163 if (cpuinfo->icache_virtual) 164 printf(" icache is virtual\n"); 165 printf(" picache_stride = %d\n", picache_stride); 166 printf(" picache_loopcount = %d\n", picache_loopcount); 167 printf(" pdcache_stride = %d\n", pdcache_stride); 168 printf(" pdcache_loopcount = %d\n", pdcache_loopcount); 169#endif 170} 171 172void 173mipsNN_icache_sync_all_16(void) 174{ 175 vm_offset_t va, eva; 176 177 va = MIPS_PHYS_TO_KSEG0(0); 178 eva = va + picache_size; 179 180 /* 181 * Since we're hitting the whole thing, we don't have to 182 * worry about the N different "ways". 183 */ 184 185 mips_intern_dcache_wbinv_all(); 186 187 while (va < eva) { 188 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 189 va += (32 * 16); 190 } 191 192 SYNC; 193} 194 195void 196mipsNN_icache_sync_all_32(void) 197{ 198 vm_offset_t va, eva; 199 200 va = MIPS_PHYS_TO_KSEG0(0); 201 eva = va + picache_size; 202 203 /* 204 * Since we're hitting the whole thing, we don't have to 205 * worry about the N different "ways". 206 */ 207 208 mips_intern_dcache_wbinv_all(); 209 210 while (va < eva) { 211 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 212 va += (32 * 32); 213 } 214 215 SYNC; 216} 217 218void 219mipsNN_icache_sync_all_64(void) 220{ 221 vm_offset_t va, eva; 222 223 va = MIPS_PHYS_TO_KSEG0(0); 224 eva = va + picache_size; 225 226 /* 227 * Since we're hitting the whole thing, we don't have to 228 * worry about the N different "ways". 229 */ 230 231 mips_intern_dcache_wbinv_all(); 232 233 while (va < eva) { 234 cache_r4k_op_32lines_64(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 235 va += (32 * 64); 236 } 237 238 SYNC; 239} 240 241void 242mipsNN_icache_sync_range_16(vm_offset_t va, vm_size_t size) 243{ 244 vm_offset_t eva; 245 246 eva = round_line16(va + size); 247 va = trunc_line16(va); 248 249 mips_intern_dcache_wb_range(va, (eva - va)); 250 251 while ((eva - va) >= (32 * 16)) { 252 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 253 va += (32 * 16); 254 } 255 256 while (va < eva) { 257 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 258 va += 16; 259 } 260 261 SYNC; 262} 263 264void 265mipsNN_icache_sync_range_32(vm_offset_t va, vm_size_t size) 266{ 267 vm_offset_t eva; 268 269 eva = round_line32(va + size); 270 va = trunc_line32(va); 271 272 mips_intern_dcache_wb_range(va, (eva - va)); 273 274 while ((eva - va) >= (32 * 32)) { 275 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 276 va += (32 * 32); 277 } 278 279 while (va < eva) { 280 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 281 va += 32; 282 } 283 284 SYNC; 285} 286 287void 288mipsNN_icache_sync_range_64(vm_offset_t va, vm_size_t size) 289{ 290 vm_offset_t eva; 291 292 eva = round_line64(va + size); 293 va = trunc_line64(va); 294 295 mips_intern_dcache_wb_range(va, (eva - va)); 296 297 while ((eva - va) >= (32 * 64)) { 298 cache_r4k_op_32lines_64(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 299 va += (32 * 64); 300 } 301 302 while (va < eva) { 303 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 304 va += 64; 305 } 306 307 SYNC; 308} 309 310void 311mipsNN_icache_sync_range_index_16(vm_offset_t va, vm_size_t size) 312{ 313 vm_offset_t eva, tmpva; 314 int i, stride, loopcount; 315 316 /* 317 * Since we're doing Index ops, we expect to not be able 318 * to access the address we've been given. So, get the 319 * bits that determine the cache index, and make a KSEG0 320 * address out of them. 321 */ 322 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask); 323 324 eva = round_line16(va + size); 325 va = trunc_line16(va); 326 327 /* 328 * GCC generates better code in the loops if we reference local 329 * copies of these global variables. 330 */ 331 stride = picache_stride; 332 loopcount = picache_loopcount; 333 334 mips_intern_dcache_wbinv_range_index(va, (eva - va)); 335 336 while ((eva - va) >= (8 * 16)) { 337 tmpva = va; 338 for (i = 0; i < loopcount; i++, tmpva += stride) 339 cache_r4k_op_8lines_16(tmpva, 340 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 341 va += 8 * 16; 342 } 343 344 while (va < eva) { 345 tmpva = va; 346 for (i = 0; i < loopcount; i++, tmpva += stride) 347 cache_op_r4k_line(tmpva, 348 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 349 va += 16; 350 } 351} 352 353void 354mipsNN_icache_sync_range_index_32(vm_offset_t va, vm_size_t size) 355{ 356 vm_offset_t eva, tmpva; 357 int i, stride, loopcount; 358 359 /* 360 * Since we're doing Index ops, we expect to not be able 361 * to access the address we've been given. So, get the 362 * bits that determine the cache index, and make a KSEG0 363 * address out of them. 364 */ 365 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask); 366 367 eva = round_line32(va + size); 368 va = trunc_line32(va); 369 370 /* 371 * GCC generates better code in the loops if we reference local 372 * copies of these global variables. 373 */ 374 stride = picache_stride; 375 loopcount = picache_loopcount; 376 377 mips_intern_dcache_wbinv_range_index(va, (eva - va)); 378 379 while ((eva - va) >= (8 * 32)) { 380 tmpva = va; 381 for (i = 0; i < loopcount; i++, tmpva += stride) 382 cache_r4k_op_8lines_32(tmpva, 383 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 384 va += 8 * 32; 385 } 386 387 while (va < eva) { 388 tmpva = va; 389 for (i = 0; i < loopcount; i++, tmpva += stride) 390 cache_op_r4k_line(tmpva, 391 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 392 va += 32; 393 } 394} 395 396void 397mipsNN_icache_sync_range_index_64(vm_offset_t va, vm_size_t size) 398{ 399 vm_offset_t eva, tmpva; 400 int i, stride, loopcount; 401 402 /* 403 * Since we're doing Index ops, we expect to not be able 404 * to access the address we've been given. So, get the 405 * bits that determine the cache index, and make a KSEG0 406 * address out of them. 407 */ 408 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask); 409 410 eva = round_line64(va + size); 411 va = trunc_line64(va); 412 413 /* 414 * GCC generates better code in the loops if we reference local 415 * copies of these global variables. 416 */ 417 stride = picache_stride; 418 loopcount = picache_loopcount; 419 420 mips_intern_dcache_wbinv_range_index(va, (eva - va)); 421 422 while ((eva - va) >= (8 * 64)) { 423 tmpva = va; 424 for (i = 0; i < loopcount; i++, tmpva += stride) 425 cache_r4k_op_8lines_64(tmpva, 426 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 427 va += 8 * 64; 428 } 429 430 while (va < eva) { 431 tmpva = va; 432 for (i = 0; i < loopcount; i++, tmpva += stride) 433 cache_op_r4k_line(tmpva, 434 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 435 va += 64; 436 } 437} 438 439void 440mipsNN_pdcache_wbinv_all_16(void) 441{ 442 vm_offset_t va, eva; 443 444 va = MIPS_PHYS_TO_KSEG0(0); 445 eva = va + pdcache_size; 446 447 /* 448 * Since we're hitting the whole thing, we don't have to 449 * worry about the N different "ways". 450 */ 451 452 while (va < eva) { 453 cache_r4k_op_32lines_16(va, 454 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 455 va += (32 * 16); 456 } 457 458 SYNC; 459} 460 461void 462mipsNN_pdcache_wbinv_all_32(void) 463{ 464 vm_offset_t va, eva; 465 466 va = MIPS_PHYS_TO_KSEG0(0); 467 eva = va + pdcache_size; 468 469 /* 470 * Since we're hitting the whole thing, we don't have to 471 * worry about the N different "ways". 472 */ 473 474 while (va < eva) { 475 cache_r4k_op_32lines_32(va, 476 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 477 va += (32 * 32); 478 } 479 480 SYNC; 481} 482 483void 484mipsNN_pdcache_wbinv_all_64(void) 485{ 486 vm_offset_t va, eva; 487 488 va = MIPS_PHYS_TO_KSEG0(0); 489 eva = va + pdcache_size; 490 491 /* 492 * Since we're hitting the whole thing, we don't have to 493 * worry about the N different "ways". 494 */ 495 496 while (va < eva) { 497 cache_r4k_op_32lines_64(va, 498 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 499 va += (32 * 64); 500 } 501 502 SYNC; 503} 504 505void 506mipsNN_pdcache_wbinv_range_16(vm_offset_t va, vm_size_t size) 507{ 508 vm_offset_t eva; 509 510 eva = round_line16(va + size); 511 va = trunc_line16(va); 512 513 while ((eva - va) >= (32 * 16)) { 514 cache_r4k_op_32lines_16(va, 515 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 516 va += (32 * 16); 517 } 518 519 while (va < eva) { 520 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 521 va += 16; 522 } 523 524 SYNC; 525} 526 527void 528mipsNN_pdcache_wbinv_range_32(vm_offset_t va, vm_size_t size) 529{ 530 vm_offset_t eva; 531 532 eva = round_line32(va + size); 533 va = trunc_line32(va); 534 535 while ((eva - va) >= (32 * 32)) { 536 cache_r4k_op_32lines_32(va, 537 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 538 va += (32 * 32); 539 } 540 541 while (va < eva) { 542 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 543 va += 32; 544 } 545 546 SYNC; 547} 548 549void 550mipsNN_pdcache_wbinv_range_64(vm_offset_t va, vm_size_t size) 551{ 552 vm_offset_t eva; 553 554 eva = round_line64(va + size); 555 va = trunc_line64(va); 556 557 while ((eva - va) >= (32 * 64)) { 558 cache_r4k_op_32lines_64(va, 559 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 560 va += (32 * 64); 561 } 562 563 while (va < eva) { 564 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 565 va += 64; 566 } 567 568 SYNC; 569} 570 571void 572mipsNN_pdcache_wbinv_range_index_16(vm_offset_t va, vm_size_t size) 573{ 574 vm_offset_t eva, tmpva; 575 int i, stride, loopcount; 576 577 /* 578 * Since we're doing Index ops, we expect to not be able 579 * to access the address we've been given. So, get the 580 * bits that determine the cache index, and make a KSEG0 581 * address out of them. 582 */ 583 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask); 584 585 eva = round_line16(va + size); 586 va = trunc_line16(va); 587 588 /* 589 * GCC generates better code in the loops if we reference local 590 * copies of these global variables. 591 */ 592 stride = pdcache_stride; 593 loopcount = pdcache_loopcount; 594 595 while ((eva - va) >= (8 * 16)) { 596 tmpva = va; 597 for (i = 0; i < loopcount; i++, tmpva += stride) 598 cache_r4k_op_8lines_16(tmpva, 599 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 600 va += 8 * 16; 601 } 602 603 while (va < eva) { 604 tmpva = va; 605 for (i = 0; i < loopcount; i++, tmpva += stride) 606 cache_op_r4k_line(tmpva, 607 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 608 va += 16; 609 } 610} 611 612void 613mipsNN_pdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size) 614{ 615 vm_offset_t eva, tmpva; 616 int i, stride, loopcount; 617 618 /* 619 * Since we're doing Index ops, we expect to not be able 620 * to access the address we've been given. So, get the 621 * bits that determine the cache index, and make a KSEG0 622 * address out of them. 623 */ 624 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask); 625 626 eva = round_line32(va + size); 627 va = trunc_line32(va); 628 629 /* 630 * GCC generates better code in the loops if we reference local 631 * copies of these global variables. 632 */ 633 stride = pdcache_stride; 634 loopcount = pdcache_loopcount; 635 636 while ((eva - va) >= (8 * 32)) { 637 tmpva = va; 638 for (i = 0; i < loopcount; i++, tmpva += stride) 639 cache_r4k_op_8lines_32(tmpva, 640 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 641 va += 8 * 32; 642 } 643 644 while (va < eva) { 645 tmpva = va; 646 for (i = 0; i < loopcount; i++, tmpva += stride) 647 cache_op_r4k_line(tmpva, 648 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 649 va += 32; 650 } 651} 652 653void 654mipsNN_pdcache_wbinv_range_index_64(vm_offset_t va, vm_size_t size) 655{ 656 vm_offset_t eva, tmpva; 657 int i, stride, loopcount; 658 659 /* 660 * Since we're doing Index ops, we expect to not be able 661 * to access the address we've been given. So, get the 662 * bits that determine the cache index, and make a KSEG0 663 * address out of them. 664 */ 665 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask); 666 667 eva = round_line64(va + size); 668 va = trunc_line64(va); 669 670 /* 671 * GCC generates better code in the loops if we reference local 672 * copies of these global variables. 673 */ 674 stride = pdcache_stride; 675 loopcount = pdcache_loopcount; 676 677 while ((eva - va) >= (8 * 64)) { 678 tmpva = va; 679 for (i = 0; i < loopcount; i++, tmpva += stride) 680 cache_r4k_op_8lines_64(tmpva, 681 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 682 va += 8 * 64; 683 } 684 685 while (va < eva) { 686 tmpva = va; 687 for (i = 0; i < loopcount; i++, tmpva += stride) 688 cache_op_r4k_line(tmpva, 689 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 690 va += 64; 691 } 692} 693 694void 695mipsNN_pdcache_inv_range_16(vm_offset_t va, vm_size_t size) 696{ 697 vm_offset_t eva; 698 699 eva = round_line16(va + size); 700 va = trunc_line16(va); 701 702 while ((eva - va) >= (32 * 16)) { 703 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 704 va += (32 * 16); 705 } 706 707 while (va < eva) { 708 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 709 va += 16; 710 } 711 712 SYNC; 713} 714 715void 716mipsNN_pdcache_inv_range_32(vm_offset_t va, vm_size_t size) 717{ 718 vm_offset_t eva; 719 720 eva = round_line32(va + size); 721 va = trunc_line32(va); 722 723 while ((eva - va) >= (32 * 32)) { 724 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 725 va += (32 * 32); 726 } 727 728 while (va < eva) { 729 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 730 va += 32; 731 } 732 733 SYNC; 734} 735 736void 737mipsNN_pdcache_inv_range_64(vm_offset_t va, vm_size_t size) 738{ 739 vm_offset_t eva; 740 741 eva = round_line64(va + size); 742 va = trunc_line64(va); 743 744 while ((eva - va) >= (32 * 64)) { 745 cache_r4k_op_32lines_64(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 746 va += (32 * 64); 747 } 748 749 while (va < eva) { 750 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 751 va += 64; 752 } 753 754 SYNC; 755} 756 757void 758mipsNN_pdcache_wb_range_16(vm_offset_t va, vm_size_t size) 759{ 760 vm_offset_t eva; 761 762 eva = round_line16(va + size); 763 va = trunc_line16(va); 764 765 while ((eva - va) >= (32 * 16)) { 766 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 767 va += (32 * 16); 768 } 769 770 while (va < eva) { 771 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 772 va += 16; 773 } 774 775 SYNC; 776} 777 778void 779mipsNN_pdcache_wb_range_32(vm_offset_t va, vm_size_t size) 780{ 781 vm_offset_t eva; 782 783 eva = round_line32(va + size); 784 va = trunc_line32(va); 785 786 while ((eva - va) >= (32 * 32)) { 787 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 788 va += (32 * 32); 789 } 790 791 while (va < eva) { 792 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 793 va += 32; 794 } 795 796 SYNC; 797} 798 799void 800mipsNN_pdcache_wb_range_64(vm_offset_t va, vm_size_t size) 801{ 802 vm_offset_t eva; 803 804 eva = round_line64(va + size); 805 va = trunc_line64(va); 806 807 while ((eva - va) >= (32 * 64)) { 808 cache_r4k_op_32lines_64(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 809 va += (32 * 64); 810 } 811 812 while (va < eva) { 813 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 814 va += 64; 815 } 816 817 SYNC; 818} 819 820#ifdef CPU_CNMIPS 821 822void 823mipsNN_icache_sync_all_128(void) 824{ 825 SYNCI 826} 827 828void 829mipsNN_icache_sync_range_128(vm_offset_t va, vm_size_t size) 830{ 831 SYNC; 832} 833 834void 835mipsNN_icache_sync_range_index_128(vm_offset_t va, vm_size_t size) 836{ 837} 838 839 840void 841mipsNN_pdcache_wbinv_all_128(void) 842{ 843} 844 845 846void 847mipsNN_pdcache_wbinv_range_128(vm_offset_t va, vm_size_t size) 848{ 849 SYNC; 850} 851 852void 853mipsNN_pdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size) 854{ 855} 856 857void 858mipsNN_pdcache_inv_range_128(vm_offset_t va, vm_size_t size) 859{ 860} 861 862void 863mipsNN_pdcache_wb_range_128(vm_offset_t va, vm_size_t size) 864{ 865 SYNC; 866} 867 868#else 869 870void 871mipsNN_icache_sync_all_128(void) 872{ 873 vm_offset_t va, eva; 874 875 va = MIPS_PHYS_TO_KSEG0(0); 876 eva = va + picache_size; 877 878 /* 879 * Since we're hitting the whole thing, we don't have to 880 * worry about the N different "ways". 881 */ 882 883 mips_intern_dcache_wbinv_all(); 884 885 while (va < eva) { 886 cache_r4k_op_32lines_128(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 887 va += (32 * 128); 888 } 889 890 SYNC; 891} 892 893void 894mipsNN_icache_sync_range_128(vm_offset_t va, vm_size_t size) 895{ 896 vm_offset_t eva; 897 898 eva = round_line128(va + size); 899 va = trunc_line128(va); 900 901 mips_intern_dcache_wb_range(va, (eva - va)); 902 903 while ((eva - va) >= (32 * 128)) { 904 cache_r4k_op_32lines_128(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 905 va += (32 * 128); 906 } 907 908 while (va < eva) { 909 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 910 va += 128; 911 } 912 913 SYNC; 914} 915 916void 917mipsNN_icache_sync_range_index_128(vm_offset_t va, vm_size_t size) 918{ 919 vm_offset_t eva, tmpva; 920 int i, stride, loopcount; 921 922 /* 923 * Since we're doing Index ops, we expect to not be able 924 * to access the address we've been given. So, get the 925 * bits that determine the cache index, and make a KSEG0 926 * address out of them. 927 */ 928 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask); 929 930 eva = round_line128(va + size); 931 va = trunc_line128(va); 932 933 /* 934 * GCC generates better code in the loops if we reference local 935 * copies of these global variables. 936 */ 937 stride = picache_stride; 938 loopcount = picache_loopcount; 939 940 mips_intern_dcache_wbinv_range_index(va, (eva - va)); 941 942 while ((eva - va) >= (32 * 128)) { 943 tmpva = va; 944 for (i = 0; i < loopcount; i++, tmpva += stride) 945 cache_r4k_op_32lines_128(tmpva, 946 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 947 va += 32 * 128; 948 } 949 950 while (va < eva) { 951 tmpva = va; 952 for (i = 0; i < loopcount; i++, tmpva += stride) 953 cache_op_r4k_line(tmpva, 954 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 955 va += 128; 956 } 957} 958 959void 960mipsNN_pdcache_wbinv_all_128(void) 961{ 962 vm_offset_t va, eva; 963 964 va = MIPS_PHYS_TO_KSEG0(0); 965 eva = va + pdcache_size; 966 967 /* 968 * Since we're hitting the whole thing, we don't have to 969 * worry about the N different "ways". 970 */ 971 972 while (va < eva) { 973 cache_r4k_op_32lines_128(va, 974 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 975 va += (32 * 128); 976 } 977 978 SYNC; 979} 980 981 982void 983mipsNN_pdcache_wbinv_range_128(vm_offset_t va, vm_size_t size) 984{ 985 vm_offset_t eva; 986 987 eva = round_line128(va + size); 988 va = trunc_line128(va); 989 990 while ((eva - va) >= (32 * 128)) { 991 cache_r4k_op_32lines_128(va, 992 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 993 va += (32 * 128); 994 } 995 996 while (va < eva) { 997 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 998 va += 128; 999 } 1000 1001 SYNC; 1002} 1003 1004void 1005mipsNN_pdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size) 1006{ 1007 vm_offset_t eva, tmpva; 1008 int i, stride, loopcount; 1009 1010 /* 1011 * Since we're doing Index ops, we expect to not be able 1012 * to access the address we've been given. So, get the 1013 * bits that determine the cache index, and make a KSEG0 1014 * address out of them. 1015 */ 1016 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask); 1017 1018 eva = round_line128(va + size); 1019 va = trunc_line128(va); 1020 1021 /* 1022 * GCC generates better code in the loops if we reference local 1023 * copies of these global variables. 1024 */ 1025 stride = pdcache_stride; 1026 loopcount = pdcache_loopcount; 1027 1028 while ((eva - va) >= (32 * 128)) { 1029 tmpva = va; 1030 for (i = 0; i < loopcount; i++, tmpva += stride) 1031 cache_r4k_op_32lines_128(tmpva, 1032 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 1033 va += 32 * 128; 1034 } 1035 1036 while (va < eva) { 1037 tmpva = va; 1038 for (i = 0; i < loopcount; i++, tmpva += stride) 1039 cache_op_r4k_line(tmpva, 1040 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 1041 va += 128; 1042 } 1043} 1044 1045void 1046mipsNN_pdcache_inv_range_128(vm_offset_t va, vm_size_t size) 1047{ 1048 vm_offset_t eva; 1049 1050 eva = round_line128(va + size); 1051 va = trunc_line128(va); 1052 1053 while ((eva - va) >= (32 * 128)) { 1054 cache_r4k_op_32lines_128(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 1055 va += (32 * 128); 1056 } 1057 1058 while (va < eva) { 1059 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 1060 va += 128; 1061 } 1062 1063 SYNC; 1064} 1065 1066void 1067mipsNN_pdcache_wb_range_128(vm_offset_t va, vm_size_t size) 1068{ 1069 vm_offset_t eva; 1070 1071 eva = round_line128(va + size); 1072 va = trunc_line128(va); 1073 1074 while ((eva - va) >= (32 * 128)) { 1075 cache_r4k_op_32lines_128(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 1076 va += (32 * 128); 1077 } 1078 1079 while (va < eva) { 1080 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 1081 va += 128; 1082 } 1083 1084 SYNC; 1085} 1086 1087#endif 1088 1089void 1090mipsNN_sdcache_wbinv_all_32(void) 1091{ 1092 vm_offset_t va = MIPS_PHYS_TO_KSEG0(0); 1093 vm_offset_t eva = va + sdcache_size; 1094 1095 while (va < eva) { 1096 cache_r4k_op_32lines_32(va, 1097 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV); 1098 va += (32 * 32); 1099 } 1100} 1101 1102void 1103mipsNN_sdcache_wbinv_all_64(void) 1104{ 1105 vm_offset_t va = MIPS_PHYS_TO_KSEG0(0); 1106 vm_offset_t eva = va + sdcache_size; 1107 1108 while (va < eva) { 1109 cache_r4k_op_32lines_64(va, 1110 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV); 1111 va += (32 * 64); 1112 } 1113} 1114 1115void 1116mipsNN_sdcache_wbinv_range_32(vm_offset_t va, vm_size_t size) 1117{ 1118 vm_offset_t eva = round_line32(va + size); 1119 1120 va = trunc_line32(va); 1121 1122 while ((eva - va) >= (32 * 32)) { 1123 cache_r4k_op_32lines_32(va, 1124 CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV); 1125 va += (32 * 32); 1126 } 1127 1128 while (va < eva) { 1129 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV); 1130 va += 32; 1131 } 1132} 1133 1134void 1135mipsNN_sdcache_wbinv_range_64(vm_offset_t va, vm_size_t size) 1136{ 1137 vm_offset_t eva = round_line64(va + size); 1138 1139 va = trunc_line64(va); 1140 1141 while ((eva - va) >= (32 * 64)) { 1142 cache_r4k_op_32lines_64(va, 1143 CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV); 1144 va += (32 * 64); 1145 } 1146 1147 while (va < eva) { 1148 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV); 1149 va += 64; 1150 } 1151} 1152 1153void 1154mipsNN_sdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size) 1155{ 1156 vm_offset_t eva; 1157 1158 /* 1159 * Since we're doing Index ops, we expect to not be able 1160 * to access the address we've been given. So, get the 1161 * bits that determine the cache index, and make a KSEG0 1162 * address out of them. 1163 */ 1164 va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1)); 1165 1166 eva = round_line32(va + size); 1167 va = trunc_line32(va); 1168 1169 while ((eva - va) >= (32 * 32)) { 1170 cache_r4k_op_32lines_32(va, 1171 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV); 1172 va += (32 * 32); 1173 } 1174 1175 while (va < eva) { 1176 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV); 1177 va += 32; 1178 } 1179} 1180 1181void 1182mipsNN_sdcache_wbinv_range_index_64(vm_offset_t va, vm_size_t size) 1183{ 1184 vm_offset_t eva; 1185 1186 /* 1187 * Since we're doing Index ops, we expect to not be able 1188 * to access the address we've been given. So, get the 1189 * bits that determine the cache index, and make a KSEG0 1190 * address out of them. 1191 */ 1192 va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1)); 1193 1194 eva = round_line64(va + size); 1195 va = trunc_line64(va); 1196 1197 while ((eva - va) >= (32 * 64)) { 1198 cache_r4k_op_32lines_64(va, 1199 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV); 1200 va += (32 * 64); 1201 } 1202 1203 while (va < eva) { 1204 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV); 1205 va += 64; 1206 } 1207} 1208 1209void 1210mipsNN_sdcache_inv_range_32(vm_offset_t va, vm_size_t size) 1211{ 1212 vm_offset_t eva = round_line32(va + size); 1213 1214 va = trunc_line32(va); 1215 1216 while ((eva - va) >= (32 * 32)) { 1217 cache_r4k_op_32lines_32(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV); 1218 va += (32 * 32); 1219 } 1220 1221 while (va < eva) { 1222 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV); 1223 va += 32; 1224 } 1225} 1226 1227void 1228mipsNN_sdcache_inv_range_64(vm_offset_t va, vm_size_t size) 1229{ 1230 vm_offset_t eva = round_line64(va + size); 1231 1232 va = trunc_line64(va); 1233 1234 while ((eva - va) >= (32 * 64)) { 1235 cache_r4k_op_32lines_64(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV); 1236 va += (32 * 64); 1237 } 1238 1239 while (va < eva) { 1240 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV); 1241 va += 64; 1242 } 1243} 1244 1245void 1246mipsNN_sdcache_wb_range_32(vm_offset_t va, vm_size_t size) 1247{ 1248 vm_offset_t eva = round_line32(va + size); 1249 1250 va = trunc_line32(va); 1251 1252 while ((eva - va) >= (32 * 32)) { 1253 cache_r4k_op_32lines_32(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB); 1254 va += (32 * 32); 1255 } 1256 1257 while (va < eva) { 1258 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB); 1259 va += 32; 1260 } 1261} 1262 1263void 1264mipsNN_sdcache_wb_range_64(vm_offset_t va, vm_size_t size) 1265{ 1266 vm_offset_t eva = round_line64(va + size); 1267 1268 va = trunc_line64(va); 1269 1270 while ((eva - va) >= (32 * 64)) { 1271 cache_r4k_op_32lines_64(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB); 1272 va += (32 * 64); 1273 } 1274 1275 while (va < eva) { 1276 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB); 1277 va += 64; 1278 } 1279} 1280 1281void 1282mipsNN_sdcache_wbinv_all_128(void) 1283{ 1284 vm_offset_t va = MIPS_PHYS_TO_KSEG0(0); 1285 vm_offset_t eva = va + sdcache_size; 1286 1287 while (va < eva) { 1288 cache_r4k_op_32lines_128(va, 1289 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV); 1290 va += (32 * 128); 1291 } 1292} 1293 1294void 1295mipsNN_sdcache_wbinv_range_128(vm_offset_t va, vm_size_t size) 1296{ 1297 vm_offset_t eva = round_line128(va + size); 1298 1299 va = trunc_line128(va); 1300 1301 while ((eva - va) >= (32 * 128)) { 1302 cache_r4k_op_32lines_128(va, 1303 CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV); 1304 va += (32 * 128); 1305 } 1306 1307 while (va < eva) { 1308 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV); 1309 va += 128; 1310 } 1311} 1312 1313void 1314mipsNN_sdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size) 1315{ 1316 vm_offset_t eva; 1317 1318 /* 1319 * Since we're doing Index ops, we expect to not be able 1320 * to access the address we've been given. So, get the 1321 * bits that determine the cache index, and make a KSEG0 1322 * address out of them. 1323 */ 1324 va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1)); 1325 1326 eva = round_line128(va + size); 1327 va = trunc_line128(va); 1328 1329 while ((eva - va) >= (32 * 128)) { 1330 cache_r4k_op_32lines_128(va, 1331 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV); 1332 va += (32 * 128); 1333 } 1334 1335 while (va < eva) { 1336 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV); 1337 va += 128; 1338 } 1339} 1340 1341void 1342mipsNN_sdcache_inv_range_128(vm_offset_t va, vm_size_t size) 1343{ 1344 vm_offset_t eva = round_line128(va + size); 1345 1346 va = trunc_line128(va); 1347 1348 while ((eva - va) >= (32 * 128)) { 1349 cache_r4k_op_32lines_128(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV); 1350 va += (32 * 128); 1351 } 1352 1353 while (va < eva) { 1354 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV); 1355 va += 128; 1356 } 1357} 1358 1359void 1360mipsNN_sdcache_wb_range_128(vm_offset_t va, vm_size_t size) 1361{ 1362 vm_offset_t eva = round_line128(va + size); 1363 1364 va = trunc_line128(va); 1365 1366 while ((eva - va) >= (32 * 128)) { 1367 cache_r4k_op_32lines_128(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB); 1368 va += (32 * 128); 1369 } 1370 1371 while (va < eva) { 1372 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB); 1373 va += 128; 1374 } 1375} 1376