cache_mipsNN.c revision 330897
1/* $NetBSD: cache_mipsNN.c,v 1.10 2005/12/24 20:07:19 perry Exp $ */ 2 3/* 4 * SPDX-License-Identifier: BSD-4-Clause 5 * 6 * Copyright 2001 Wasabi Systems, Inc. 7 * All rights reserved. 8 * 9 * Written by Jason R. Thorpe and Simon Burge for Wasabi Systems, Inc. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed for the NetBSD Project by 22 * Wasabi Systems, Inc. 23 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 24 * or promote products derived from this software without specific prior 25 * written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40#include <sys/cdefs.h> 41__FBSDID("$FreeBSD: stable/11/sys/mips/mips/cache_mipsNN.c 330897 2018-03-14 03:19:51Z eadler $"); 42 43#include <sys/types.h> 44#include <sys/systm.h> 45#include <sys/param.h> 46 47#include <machine/cache.h> 48#include <machine/cache_r4k.h> 49#include <machine/cpuinfo.h> 50 51#define round_line16(x) (((x) + 15) & ~15) 52#define trunc_line16(x) ((x) & ~15) 53 54#define round_line32(x) (((x) + 31) & ~31) 55#define trunc_line32(x) ((x) & ~31) 56 57#define round_line64(x) (((x) + 63) & ~63) 58#define trunc_line64(x) ((x) & ~63) 59 60#define round_line128(x) (((x) + 127) & ~127) 61#define trunc_line128(x) ((x) & ~127) 62 63#if defined(CPU_NLM) 64static __inline void 65xlp_sync(void) 66{ 67 __asm __volatile ( 68 ".set push \n" 69 ".set noreorder \n" 70 ".set mips64 \n" 71 "dla $8, 1f \n" 72 "/* jr.hb $8 */ \n" 73 ".word 0x1000408 \n" 74 "nop \n" 75 "1: nop \n" 76 ".set pop \n" 77 : : : "$8"); 78} 79#endif 80 81#if defined(SB1250_PASS1) 82#define SYNC __asm volatile("sync; sync") 83#elif defined(CPU_NLM) 84#define SYNC xlp_sync() 85#else 86#define SYNC __asm volatile("sync") 87#endif 88 89#if defined(CPU_CNMIPS) 90#define SYNCI mips_sync_icache(); 91#elif defined(CPU_NLM) 92#define SYNCI xlp_sync() 93#else 94#define SYNCI 95#endif 96 97/* 98 * Exported variables for consumers like bus_dma code 99 */ 100int mips_picache_linesize; 101int mips_pdcache_linesize; 102 103static int picache_size; 104static int picache_stride; 105static int picache_loopcount; 106static int picache_way_mask; 107static int pdcache_size; 108static int pdcache_stride; 109static int pdcache_loopcount; 110static int pdcache_way_mask; 111static int sdcache_size; 112static int sdcache_stride; 113static int sdcache_loopcount; 114static int sdcache_way_mask; 115 116void 117mipsNN_cache_init(struct mips_cpuinfo * cpuinfo) 118{ 119 int flush_multiple_lines_per_way; 120 121 flush_multiple_lines_per_way = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize * cpuinfo->l1.ic_linesize > PAGE_SIZE; 122 if (cpuinfo->icache_virtual) { 123 /* 124 * With a virtual Icache we don't need to flush 125 * multiples of the page size with index ops; we just 126 * need to flush one pages' worth. 127 */ 128 flush_multiple_lines_per_way = 0; 129 } 130 131 if (flush_multiple_lines_per_way) { 132 picache_stride = PAGE_SIZE; 133 picache_loopcount = (cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize / PAGE_SIZE) * 134 cpuinfo->l1.ic_nways; 135 } else { 136 picache_stride = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize; 137 picache_loopcount = cpuinfo->l1.ic_nways; 138 } 139 140 if (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize < PAGE_SIZE) { 141 pdcache_stride = cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize; 142 pdcache_loopcount = cpuinfo->l1.dc_nways; 143 } else { 144 pdcache_stride = PAGE_SIZE; 145 pdcache_loopcount = (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize / PAGE_SIZE) * 146 cpuinfo->l1.dc_nways; 147 } 148 149 mips_picache_linesize = cpuinfo->l1.ic_linesize; 150 mips_pdcache_linesize = cpuinfo->l1.dc_linesize; 151 152 picache_size = cpuinfo->l1.ic_size; 153 picache_way_mask = cpuinfo->l1.ic_nways - 1; 154 pdcache_size = cpuinfo->l1.dc_size; 155 pdcache_way_mask = cpuinfo->l1.dc_nways - 1; 156 157 sdcache_stride = cpuinfo->l2.dc_nsets * cpuinfo->l2.dc_linesize; 158 sdcache_loopcount = cpuinfo->l2.dc_nways; 159 sdcache_size = cpuinfo->l2.dc_size; 160 sdcache_way_mask = cpuinfo->l2.dc_nways - 1; 161 162#define CACHE_DEBUG 163#ifdef CACHE_DEBUG 164 printf("Cache info:\n"); 165 if (cpuinfo->icache_virtual) 166 printf(" icache is virtual\n"); 167 printf(" picache_stride = %d\n", picache_stride); 168 printf(" picache_loopcount = %d\n", picache_loopcount); 169 printf(" pdcache_stride = %d\n", pdcache_stride); 170 printf(" pdcache_loopcount = %d\n", pdcache_loopcount); 171#endif 172} 173 174void 175mipsNN_icache_sync_all_16(void) 176{ 177 vm_offset_t va, eva; 178 179 va = MIPS_PHYS_TO_KSEG0(0); 180 eva = va + picache_size; 181 182 /* 183 * Since we're hitting the whole thing, we don't have to 184 * worry about the N different "ways". 185 */ 186 187 mips_intern_dcache_wbinv_all(); 188 189 while (va < eva) { 190 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 191 va += (32 * 16); 192 } 193 194 SYNC; 195} 196 197void 198mipsNN_icache_sync_all_32(void) 199{ 200 vm_offset_t va, eva; 201 202 va = MIPS_PHYS_TO_KSEG0(0); 203 eva = va + picache_size; 204 205 /* 206 * Since we're hitting the whole thing, we don't have to 207 * worry about the N different "ways". 208 */ 209 210 mips_intern_dcache_wbinv_all(); 211 212 while (va < eva) { 213 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 214 va += (32 * 32); 215 } 216 217 SYNC; 218} 219 220void 221mipsNN_icache_sync_all_64(void) 222{ 223 vm_offset_t va, eva; 224 225 va = MIPS_PHYS_TO_KSEG0(0); 226 eva = va + picache_size; 227 228 /* 229 * Since we're hitting the whole thing, we don't have to 230 * worry about the N different "ways". 231 */ 232 233 mips_intern_dcache_wbinv_all(); 234 235 while (va < eva) { 236 cache_r4k_op_32lines_64(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 237 va += (32 * 64); 238 } 239 240 SYNC; 241} 242 243void 244mipsNN_icache_sync_range_16(vm_offset_t va, vm_size_t size) 245{ 246 vm_offset_t eva; 247 248 eva = round_line16(va + size); 249 va = trunc_line16(va); 250 251 mips_intern_dcache_wb_range(va, (eva - va)); 252 253 while ((eva - va) >= (32 * 16)) { 254 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 255 va += (32 * 16); 256 } 257 258 while (va < eva) { 259 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 260 va += 16; 261 } 262 263 SYNC; 264} 265 266void 267mipsNN_icache_sync_range_32(vm_offset_t va, vm_size_t size) 268{ 269 vm_offset_t eva; 270 271 eva = round_line32(va + size); 272 va = trunc_line32(va); 273 274 mips_intern_dcache_wb_range(va, (eva - va)); 275 276 while ((eva - va) >= (32 * 32)) { 277 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 278 va += (32 * 32); 279 } 280 281 while (va < eva) { 282 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 283 va += 32; 284 } 285 286 SYNC; 287} 288 289void 290mipsNN_icache_sync_range_64(vm_offset_t va, vm_size_t size) 291{ 292 vm_offset_t eva; 293 294 eva = round_line64(va + size); 295 va = trunc_line64(va); 296 297 mips_intern_dcache_wb_range(va, (eva - va)); 298 299 while ((eva - va) >= (32 * 64)) { 300 cache_r4k_op_32lines_64(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 301 va += (32 * 64); 302 } 303 304 while (va < eva) { 305 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 306 va += 64; 307 } 308 309 SYNC; 310} 311 312void 313mipsNN_icache_sync_range_index_16(vm_offset_t va, vm_size_t size) 314{ 315 vm_offset_t eva, tmpva; 316 int i, stride, loopcount; 317 318 /* 319 * Since we're doing Index ops, we expect to not be able 320 * to access the address we've been given. So, get the 321 * bits that determine the cache index, and make a KSEG0 322 * address out of them. 323 */ 324 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask); 325 326 eva = round_line16(va + size); 327 va = trunc_line16(va); 328 329 /* 330 * GCC generates better code in the loops if we reference local 331 * copies of these global variables. 332 */ 333 stride = picache_stride; 334 loopcount = picache_loopcount; 335 336 mips_intern_dcache_wbinv_range_index(va, (eva - va)); 337 338 while ((eva - va) >= (8 * 16)) { 339 tmpva = va; 340 for (i = 0; i < loopcount; i++, tmpva += stride) 341 cache_r4k_op_8lines_16(tmpva, 342 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 343 va += 8 * 16; 344 } 345 346 while (va < eva) { 347 tmpva = va; 348 for (i = 0; i < loopcount; i++, tmpva += stride) 349 cache_op_r4k_line(tmpva, 350 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 351 va += 16; 352 } 353} 354 355void 356mipsNN_icache_sync_range_index_32(vm_offset_t va, vm_size_t size) 357{ 358 vm_offset_t eva, tmpva; 359 int i, stride, loopcount; 360 361 /* 362 * Since we're doing Index ops, we expect to not be able 363 * to access the address we've been given. So, get the 364 * bits that determine the cache index, and make a KSEG0 365 * address out of them. 366 */ 367 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask); 368 369 eva = round_line32(va + size); 370 va = trunc_line32(va); 371 372 /* 373 * GCC generates better code in the loops if we reference local 374 * copies of these global variables. 375 */ 376 stride = picache_stride; 377 loopcount = picache_loopcount; 378 379 mips_intern_dcache_wbinv_range_index(va, (eva - va)); 380 381 while ((eva - va) >= (8 * 32)) { 382 tmpva = va; 383 for (i = 0; i < loopcount; i++, tmpva += stride) 384 cache_r4k_op_8lines_32(tmpva, 385 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 386 va += 8 * 32; 387 } 388 389 while (va < eva) { 390 tmpva = va; 391 for (i = 0; i < loopcount; i++, tmpva += stride) 392 cache_op_r4k_line(tmpva, 393 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 394 va += 32; 395 } 396} 397 398void 399mipsNN_icache_sync_range_index_64(vm_offset_t va, vm_size_t size) 400{ 401 vm_offset_t eva, tmpva; 402 int i, stride, loopcount; 403 404 /* 405 * Since we're doing Index ops, we expect to not be able 406 * to access the address we've been given. So, get the 407 * bits that determine the cache index, and make a KSEG0 408 * address out of them. 409 */ 410 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask); 411 412 eva = round_line64(va + size); 413 va = trunc_line64(va); 414 415 /* 416 * GCC generates better code in the loops if we reference local 417 * copies of these global variables. 418 */ 419 stride = picache_stride; 420 loopcount = picache_loopcount; 421 422 mips_intern_dcache_wbinv_range_index(va, (eva - va)); 423 424 while ((eva - va) >= (8 * 64)) { 425 tmpva = va; 426 for (i = 0; i < loopcount; i++, tmpva += stride) 427 cache_r4k_op_8lines_64(tmpva, 428 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 429 va += 8 * 64; 430 } 431 432 while (va < eva) { 433 tmpva = va; 434 for (i = 0; i < loopcount; i++, tmpva += stride) 435 cache_op_r4k_line(tmpva, 436 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 437 va += 64; 438 } 439} 440 441void 442mipsNN_pdcache_wbinv_all_16(void) 443{ 444 vm_offset_t va, eva; 445 446 va = MIPS_PHYS_TO_KSEG0(0); 447 eva = va + pdcache_size; 448 449 /* 450 * Since we're hitting the whole thing, we don't have to 451 * worry about the N different "ways". 452 */ 453 454 while (va < eva) { 455 cache_r4k_op_32lines_16(va, 456 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 457 va += (32 * 16); 458 } 459 460 SYNC; 461} 462 463void 464mipsNN_pdcache_wbinv_all_32(void) 465{ 466 vm_offset_t va, eva; 467 468 va = MIPS_PHYS_TO_KSEG0(0); 469 eva = va + pdcache_size; 470 471 /* 472 * Since we're hitting the whole thing, we don't have to 473 * worry about the N different "ways". 474 */ 475 476 while (va < eva) { 477 cache_r4k_op_32lines_32(va, 478 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 479 va += (32 * 32); 480 } 481 482 SYNC; 483} 484 485void 486mipsNN_pdcache_wbinv_all_64(void) 487{ 488 vm_offset_t va, eva; 489 490 va = MIPS_PHYS_TO_KSEG0(0); 491 eva = va + pdcache_size; 492 493 /* 494 * Since we're hitting the whole thing, we don't have to 495 * worry about the N different "ways". 496 */ 497 498 while (va < eva) { 499 cache_r4k_op_32lines_64(va, 500 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 501 va += (32 * 64); 502 } 503 504 SYNC; 505} 506 507void 508mipsNN_pdcache_wbinv_range_16(vm_offset_t va, vm_size_t size) 509{ 510 vm_offset_t eva; 511 512 eva = round_line16(va + size); 513 va = trunc_line16(va); 514 515 while ((eva - va) >= (32 * 16)) { 516 cache_r4k_op_32lines_16(va, 517 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 518 va += (32 * 16); 519 } 520 521 while (va < eva) { 522 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 523 va += 16; 524 } 525 526 SYNC; 527} 528 529void 530mipsNN_pdcache_wbinv_range_32(vm_offset_t va, vm_size_t size) 531{ 532 vm_offset_t eva; 533 534 eva = round_line32(va + size); 535 va = trunc_line32(va); 536 537 while ((eva - va) >= (32 * 32)) { 538 cache_r4k_op_32lines_32(va, 539 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 540 va += (32 * 32); 541 } 542 543 while (va < eva) { 544 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 545 va += 32; 546 } 547 548 SYNC; 549} 550 551void 552mipsNN_pdcache_wbinv_range_64(vm_offset_t va, vm_size_t size) 553{ 554 vm_offset_t eva; 555 556 eva = round_line64(va + size); 557 va = trunc_line64(va); 558 559 while ((eva - va) >= (32 * 64)) { 560 cache_r4k_op_32lines_64(va, 561 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 562 va += (32 * 64); 563 } 564 565 while (va < eva) { 566 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 567 va += 64; 568 } 569 570 SYNC; 571} 572 573void 574mipsNN_pdcache_wbinv_range_index_16(vm_offset_t va, vm_size_t size) 575{ 576 vm_offset_t eva, tmpva; 577 int i, stride, loopcount; 578 579 /* 580 * Since we're doing Index ops, we expect to not be able 581 * to access the address we've been given. So, get the 582 * bits that determine the cache index, and make a KSEG0 583 * address out of them. 584 */ 585 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask); 586 587 eva = round_line16(va + size); 588 va = trunc_line16(va); 589 590 /* 591 * GCC generates better code in the loops if we reference local 592 * copies of these global variables. 593 */ 594 stride = pdcache_stride; 595 loopcount = pdcache_loopcount; 596 597 while ((eva - va) >= (8 * 16)) { 598 tmpva = va; 599 for (i = 0; i < loopcount; i++, tmpva += stride) 600 cache_r4k_op_8lines_16(tmpva, 601 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 602 va += 8 * 16; 603 } 604 605 while (va < eva) { 606 tmpva = va; 607 for (i = 0; i < loopcount; i++, tmpva += stride) 608 cache_op_r4k_line(tmpva, 609 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 610 va += 16; 611 } 612} 613 614void 615mipsNN_pdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size) 616{ 617 vm_offset_t eva, tmpva; 618 int i, stride, loopcount; 619 620 /* 621 * Since we're doing Index ops, we expect to not be able 622 * to access the address we've been given. So, get the 623 * bits that determine the cache index, and make a KSEG0 624 * address out of them. 625 */ 626 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask); 627 628 eva = round_line32(va + size); 629 va = trunc_line32(va); 630 631 /* 632 * GCC generates better code in the loops if we reference local 633 * copies of these global variables. 634 */ 635 stride = pdcache_stride; 636 loopcount = pdcache_loopcount; 637 638 while ((eva - va) >= (8 * 32)) { 639 tmpva = va; 640 for (i = 0; i < loopcount; i++, tmpva += stride) 641 cache_r4k_op_8lines_32(tmpva, 642 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 643 va += 8 * 32; 644 } 645 646 while (va < eva) { 647 tmpva = va; 648 for (i = 0; i < loopcount; i++, tmpva += stride) 649 cache_op_r4k_line(tmpva, 650 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 651 va += 32; 652 } 653} 654 655void 656mipsNN_pdcache_wbinv_range_index_64(vm_offset_t va, vm_size_t size) 657{ 658 vm_offset_t eva, tmpva; 659 int i, stride, loopcount; 660 661 /* 662 * Since we're doing Index ops, we expect to not be able 663 * to access the address we've been given. So, get the 664 * bits that determine the cache index, and make a KSEG0 665 * address out of them. 666 */ 667 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask); 668 669 eva = round_line64(va + size); 670 va = trunc_line64(va); 671 672 /* 673 * GCC generates better code in the loops if we reference local 674 * copies of these global variables. 675 */ 676 stride = pdcache_stride; 677 loopcount = pdcache_loopcount; 678 679 while ((eva - va) >= (8 * 64)) { 680 tmpva = va; 681 for (i = 0; i < loopcount; i++, tmpva += stride) 682 cache_r4k_op_8lines_64(tmpva, 683 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 684 va += 8 * 64; 685 } 686 687 while (va < eva) { 688 tmpva = va; 689 for (i = 0; i < loopcount; i++, tmpva += stride) 690 cache_op_r4k_line(tmpva, 691 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 692 va += 64; 693 } 694} 695 696void 697mipsNN_pdcache_inv_range_16(vm_offset_t va, vm_size_t size) 698{ 699 vm_offset_t eva; 700 701 eva = round_line16(va + size); 702 va = trunc_line16(va); 703 704 while ((eva - va) >= (32 * 16)) { 705 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 706 va += (32 * 16); 707 } 708 709 while (va < eva) { 710 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 711 va += 16; 712 } 713 714 SYNC; 715} 716 717void 718mipsNN_pdcache_inv_range_32(vm_offset_t va, vm_size_t size) 719{ 720 vm_offset_t eva; 721 722 eva = round_line32(va + size); 723 va = trunc_line32(va); 724 725 while ((eva - va) >= (32 * 32)) { 726 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 727 va += (32 * 32); 728 } 729 730 while (va < eva) { 731 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 732 va += 32; 733 } 734 735 SYNC; 736} 737 738void 739mipsNN_pdcache_inv_range_64(vm_offset_t va, vm_size_t size) 740{ 741 vm_offset_t eva; 742 743 eva = round_line64(va + size); 744 va = trunc_line64(va); 745 746 while ((eva - va) >= (32 * 64)) { 747 cache_r4k_op_32lines_64(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 748 va += (32 * 64); 749 } 750 751 while (va < eva) { 752 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 753 va += 64; 754 } 755 756 SYNC; 757} 758 759void 760mipsNN_pdcache_wb_range_16(vm_offset_t va, vm_size_t size) 761{ 762 vm_offset_t eva; 763 764 eva = round_line16(va + size); 765 va = trunc_line16(va); 766 767 while ((eva - va) >= (32 * 16)) { 768 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 769 va += (32 * 16); 770 } 771 772 while (va < eva) { 773 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 774 va += 16; 775 } 776 777 SYNC; 778} 779 780void 781mipsNN_pdcache_wb_range_32(vm_offset_t va, vm_size_t size) 782{ 783 vm_offset_t eva; 784 785 eva = round_line32(va + size); 786 va = trunc_line32(va); 787 788 while ((eva - va) >= (32 * 32)) { 789 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 790 va += (32 * 32); 791 } 792 793 while (va < eva) { 794 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 795 va += 32; 796 } 797 798 SYNC; 799} 800 801void 802mipsNN_pdcache_wb_range_64(vm_offset_t va, vm_size_t size) 803{ 804 vm_offset_t eva; 805 806 eva = round_line64(va + size); 807 va = trunc_line64(va); 808 809 while ((eva - va) >= (32 * 64)) { 810 cache_r4k_op_32lines_64(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 811 va += (32 * 64); 812 } 813 814 while (va < eva) { 815 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 816 va += 64; 817 } 818 819 SYNC; 820} 821 822#ifdef CPU_CNMIPS 823 824void 825mipsNN_icache_sync_all_128(void) 826{ 827 SYNCI 828} 829 830void 831mipsNN_icache_sync_range_128(vm_offset_t va, vm_size_t size) 832{ 833 SYNC; 834} 835 836void 837mipsNN_icache_sync_range_index_128(vm_offset_t va, vm_size_t size) 838{ 839} 840 841 842void 843mipsNN_pdcache_wbinv_all_128(void) 844{ 845} 846 847 848void 849mipsNN_pdcache_wbinv_range_128(vm_offset_t va, vm_size_t size) 850{ 851 SYNC; 852} 853 854void 855mipsNN_pdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size) 856{ 857} 858 859void 860mipsNN_pdcache_inv_range_128(vm_offset_t va, vm_size_t size) 861{ 862} 863 864void 865mipsNN_pdcache_wb_range_128(vm_offset_t va, vm_size_t size) 866{ 867 SYNC; 868} 869 870#else 871 872void 873mipsNN_icache_sync_all_128(void) 874{ 875 vm_offset_t va, eva; 876 877 va = MIPS_PHYS_TO_KSEG0(0); 878 eva = va + picache_size; 879 880 /* 881 * Since we're hitting the whole thing, we don't have to 882 * worry about the N different "ways". 883 */ 884 885 mips_intern_dcache_wbinv_all(); 886 887 while (va < eva) { 888 cache_r4k_op_32lines_128(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 889 va += (32 * 128); 890 } 891 892 SYNC; 893} 894 895void 896mipsNN_icache_sync_range_128(vm_offset_t va, vm_size_t size) 897{ 898 vm_offset_t eva; 899 900 eva = round_line128(va + size); 901 va = trunc_line128(va); 902 903 mips_intern_dcache_wb_range(va, (eva - va)); 904 905 while ((eva - va) >= (32 * 128)) { 906 cache_r4k_op_32lines_128(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 907 va += (32 * 128); 908 } 909 910 while (va < eva) { 911 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 912 va += 128; 913 } 914 915 SYNC; 916} 917 918void 919mipsNN_icache_sync_range_index_128(vm_offset_t va, vm_size_t size) 920{ 921 vm_offset_t eva, tmpva; 922 int i, stride, loopcount; 923 924 /* 925 * Since we're doing Index ops, we expect to not be able 926 * to access the address we've been given. So, get the 927 * bits that determine the cache index, and make a KSEG0 928 * address out of them. 929 */ 930 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask); 931 932 eva = round_line128(va + size); 933 va = trunc_line128(va); 934 935 /* 936 * GCC generates better code in the loops if we reference local 937 * copies of these global variables. 938 */ 939 stride = picache_stride; 940 loopcount = picache_loopcount; 941 942 mips_intern_dcache_wbinv_range_index(va, (eva - va)); 943 944 while ((eva - va) >= (32 * 128)) { 945 tmpva = va; 946 for (i = 0; i < loopcount; i++, tmpva += stride) 947 cache_r4k_op_32lines_128(tmpva, 948 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 949 va += 32 * 128; 950 } 951 952 while (va < eva) { 953 tmpva = va; 954 for (i = 0; i < loopcount; i++, tmpva += stride) 955 cache_op_r4k_line(tmpva, 956 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 957 va += 128; 958 } 959} 960 961void 962mipsNN_pdcache_wbinv_all_128(void) 963{ 964 vm_offset_t va, eva; 965 966 va = MIPS_PHYS_TO_KSEG0(0); 967 eva = va + pdcache_size; 968 969 /* 970 * Since we're hitting the whole thing, we don't have to 971 * worry about the N different "ways". 972 */ 973 974 while (va < eva) { 975 cache_r4k_op_32lines_128(va, 976 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 977 va += (32 * 128); 978 } 979 980 SYNC; 981} 982 983 984void 985mipsNN_pdcache_wbinv_range_128(vm_offset_t va, vm_size_t size) 986{ 987 vm_offset_t eva; 988 989 eva = round_line128(va + size); 990 va = trunc_line128(va); 991 992 while ((eva - va) >= (32 * 128)) { 993 cache_r4k_op_32lines_128(va, 994 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 995 va += (32 * 128); 996 } 997 998 while (va < eva) { 999 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 1000 va += 128; 1001 } 1002 1003 SYNC; 1004} 1005 1006void 1007mipsNN_pdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size) 1008{ 1009 vm_offset_t eva, tmpva; 1010 int i, stride, loopcount; 1011 1012 /* 1013 * Since we're doing Index ops, we expect to not be able 1014 * to access the address we've been given. So, get the 1015 * bits that determine the cache index, and make a KSEG0 1016 * address out of them. 1017 */ 1018 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask); 1019 1020 eva = round_line128(va + size); 1021 va = trunc_line128(va); 1022 1023 /* 1024 * GCC generates better code in the loops if we reference local 1025 * copies of these global variables. 1026 */ 1027 stride = pdcache_stride; 1028 loopcount = pdcache_loopcount; 1029 1030 while ((eva - va) >= (32 * 128)) { 1031 tmpva = va; 1032 for (i = 0; i < loopcount; i++, tmpva += stride) 1033 cache_r4k_op_32lines_128(tmpva, 1034 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 1035 va += 32 * 128; 1036 } 1037 1038 while (va < eva) { 1039 tmpva = va; 1040 for (i = 0; i < loopcount; i++, tmpva += stride) 1041 cache_op_r4k_line(tmpva, 1042 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 1043 va += 128; 1044 } 1045} 1046 1047void 1048mipsNN_pdcache_inv_range_128(vm_offset_t va, vm_size_t size) 1049{ 1050 vm_offset_t eva; 1051 1052 eva = round_line128(va + size); 1053 va = trunc_line128(va); 1054 1055 while ((eva - va) >= (32 * 128)) { 1056 cache_r4k_op_32lines_128(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 1057 va += (32 * 128); 1058 } 1059 1060 while (va < eva) { 1061 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 1062 va += 128; 1063 } 1064 1065 SYNC; 1066} 1067 1068void 1069mipsNN_pdcache_wb_range_128(vm_offset_t va, vm_size_t size) 1070{ 1071 vm_offset_t eva; 1072 1073 eva = round_line128(va + size); 1074 va = trunc_line128(va); 1075 1076 while ((eva - va) >= (32 * 128)) { 1077 cache_r4k_op_32lines_128(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 1078 va += (32 * 128); 1079 } 1080 1081 while (va < eva) { 1082 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 1083 va += 128; 1084 } 1085 1086 SYNC; 1087} 1088 1089#endif 1090 1091void 1092mipsNN_sdcache_wbinv_all_32(void) 1093{ 1094 vm_offset_t va = MIPS_PHYS_TO_KSEG0(0); 1095 vm_offset_t eva = va + sdcache_size; 1096 1097 while (va < eva) { 1098 cache_r4k_op_32lines_32(va, 1099 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV); 1100 va += (32 * 32); 1101 } 1102} 1103 1104void 1105mipsNN_sdcache_wbinv_all_64(void) 1106{ 1107 vm_offset_t va = MIPS_PHYS_TO_KSEG0(0); 1108 vm_offset_t eva = va + sdcache_size; 1109 1110 while (va < eva) { 1111 cache_r4k_op_32lines_64(va, 1112 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV); 1113 va += (32 * 64); 1114 } 1115} 1116 1117void 1118mipsNN_sdcache_wbinv_range_32(vm_offset_t va, vm_size_t size) 1119{ 1120 vm_offset_t eva = round_line32(va + size); 1121 1122 va = trunc_line32(va); 1123 1124 while ((eva - va) >= (32 * 32)) { 1125 cache_r4k_op_32lines_32(va, 1126 CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV); 1127 va += (32 * 32); 1128 } 1129 1130 while (va < eva) { 1131 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV); 1132 va += 32; 1133 } 1134} 1135 1136void 1137mipsNN_sdcache_wbinv_range_64(vm_offset_t va, vm_size_t size) 1138{ 1139 vm_offset_t eva = round_line64(va + size); 1140 1141 va = trunc_line64(va); 1142 1143 while ((eva - va) >= (32 * 64)) { 1144 cache_r4k_op_32lines_64(va, 1145 CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV); 1146 va += (32 * 64); 1147 } 1148 1149 while (va < eva) { 1150 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV); 1151 va += 64; 1152 } 1153} 1154 1155void 1156mipsNN_sdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size) 1157{ 1158 vm_offset_t eva; 1159 1160 /* 1161 * Since we're doing Index ops, we expect to not be able 1162 * to access the address we've been given. So, get the 1163 * bits that determine the cache index, and make a KSEG0 1164 * address out of them. 1165 */ 1166 va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1)); 1167 1168 eva = round_line32(va + size); 1169 va = trunc_line32(va); 1170 1171 while ((eva - va) >= (32 * 32)) { 1172 cache_r4k_op_32lines_32(va, 1173 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV); 1174 va += (32 * 32); 1175 } 1176 1177 while (va < eva) { 1178 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV); 1179 va += 32; 1180 } 1181} 1182 1183void 1184mipsNN_sdcache_wbinv_range_index_64(vm_offset_t va, vm_size_t size) 1185{ 1186 vm_offset_t eva; 1187 1188 /* 1189 * Since we're doing Index ops, we expect to not be able 1190 * to access the address we've been given. So, get the 1191 * bits that determine the cache index, and make a KSEG0 1192 * address out of them. 1193 */ 1194 va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1)); 1195 1196 eva = round_line64(va + size); 1197 va = trunc_line64(va); 1198 1199 while ((eva - va) >= (32 * 64)) { 1200 cache_r4k_op_32lines_64(va, 1201 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV); 1202 va += (32 * 64); 1203 } 1204 1205 while (va < eva) { 1206 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV); 1207 va += 64; 1208 } 1209} 1210 1211void 1212mipsNN_sdcache_inv_range_32(vm_offset_t va, vm_size_t size) 1213{ 1214 vm_offset_t eva = round_line32(va + size); 1215 1216 va = trunc_line32(va); 1217 1218 while ((eva - va) >= (32 * 32)) { 1219 cache_r4k_op_32lines_32(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV); 1220 va += (32 * 32); 1221 } 1222 1223 while (va < eva) { 1224 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV); 1225 va += 32; 1226 } 1227} 1228 1229void 1230mipsNN_sdcache_inv_range_64(vm_offset_t va, vm_size_t size) 1231{ 1232 vm_offset_t eva = round_line64(va + size); 1233 1234 va = trunc_line64(va); 1235 1236 while ((eva - va) >= (32 * 64)) { 1237 cache_r4k_op_32lines_64(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV); 1238 va += (32 * 64); 1239 } 1240 1241 while (va < eva) { 1242 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV); 1243 va += 64; 1244 } 1245} 1246 1247void 1248mipsNN_sdcache_wb_range_32(vm_offset_t va, vm_size_t size) 1249{ 1250 vm_offset_t eva = round_line32(va + size); 1251 1252 va = trunc_line32(va); 1253 1254 while ((eva - va) >= (32 * 32)) { 1255 cache_r4k_op_32lines_32(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB); 1256 va += (32 * 32); 1257 } 1258 1259 while (va < eva) { 1260 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB); 1261 va += 32; 1262 } 1263} 1264 1265void 1266mipsNN_sdcache_wb_range_64(vm_offset_t va, vm_size_t size) 1267{ 1268 vm_offset_t eva = round_line64(va + size); 1269 1270 va = trunc_line64(va); 1271 1272 while ((eva - va) >= (32 * 64)) { 1273 cache_r4k_op_32lines_64(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB); 1274 va += (32 * 64); 1275 } 1276 1277 while (va < eva) { 1278 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB); 1279 va += 64; 1280 } 1281} 1282 1283void 1284mipsNN_sdcache_wbinv_all_128(void) 1285{ 1286 vm_offset_t va = MIPS_PHYS_TO_KSEG0(0); 1287 vm_offset_t eva = va + sdcache_size; 1288 1289 while (va < eva) { 1290 cache_r4k_op_32lines_128(va, 1291 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV); 1292 va += (32 * 128); 1293 } 1294} 1295 1296void 1297mipsNN_sdcache_wbinv_range_128(vm_offset_t va, vm_size_t size) 1298{ 1299 vm_offset_t eva = round_line128(va + size); 1300 1301 va = trunc_line128(va); 1302 1303 while ((eva - va) >= (32 * 128)) { 1304 cache_r4k_op_32lines_128(va, 1305 CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV); 1306 va += (32 * 128); 1307 } 1308 1309 while (va < eva) { 1310 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV); 1311 va += 128; 1312 } 1313} 1314 1315void 1316mipsNN_sdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size) 1317{ 1318 vm_offset_t eva; 1319 1320 /* 1321 * Since we're doing Index ops, we expect to not be able 1322 * to access the address we've been given. So, get the 1323 * bits that determine the cache index, and make a KSEG0 1324 * address out of them. 1325 */ 1326 va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1)); 1327 1328 eva = round_line128(va + size); 1329 va = trunc_line128(va); 1330 1331 while ((eva - va) >= (32 * 128)) { 1332 cache_r4k_op_32lines_128(va, 1333 CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV); 1334 va += (32 * 128); 1335 } 1336 1337 while (va < eva) { 1338 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV); 1339 va += 128; 1340 } 1341} 1342 1343void 1344mipsNN_sdcache_inv_range_128(vm_offset_t va, vm_size_t size) 1345{ 1346 vm_offset_t eva = round_line128(va + size); 1347 1348 va = trunc_line128(va); 1349 1350 while ((eva - va) >= (32 * 128)) { 1351 cache_r4k_op_32lines_128(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV); 1352 va += (32 * 128); 1353 } 1354 1355 while (va < eva) { 1356 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV); 1357 va += 128; 1358 } 1359} 1360 1361void 1362mipsNN_sdcache_wb_range_128(vm_offset_t va, vm_size_t size) 1363{ 1364 vm_offset_t eva = round_line128(va + size); 1365 1366 va = trunc_line128(va); 1367 1368 while ((eva - va) >= (32 * 128)) { 1369 cache_r4k_op_32lines_128(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB); 1370 va += (32 * 128); 1371 } 1372 1373 while (va < eva) { 1374 cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB); 1375 va += 128; 1376 } 1377} 1378