1/* $NetBSD: cache.c,v 1.96 2007/03/04 06:00:45 christos Exp $ */ 2 3/* 4 * Copyright (c) 1996 5 * The President and Fellows of Harvard College. All rights reserved. 6 * Copyright (c) 1992, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This software was developed by the Computer Systems Engineering group 10 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 11 * contributed to Berkeley. 12 * 13 * All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by Harvard University. 16 * This product includes software developed by the University of 17 * California, Lawrence Berkeley Laboratory. 18 * 19 * Redistribution and use in source and binary forms, with or without 20 * modification, are permitted provided that the following conditions 21 * are met: 22 * 23 * 1. Redistributions of source code must retain the above copyright 24 * notice, this list of conditions and the following disclaimer. 25 * 2. Redistributions in binary form must reproduce the above copyright 26 * notice, this list of conditions and the following disclaimer in the 27 * documentation and/or other materials provided with the distribution. 28 * 3. All advertising materials mentioning features or use of this software 29 * must display the following acknowledgement: 30 * This product includes software developed by Aaron Brown and 31 * Harvard University. 32 * This product includes software developed by the University of 33 * California, Berkeley and its contributors. 34 * 4. Neither the name of the University nor the names of its contributors 35 * may be used to endorse or promote products derived from this software 36 * without specific prior written permission. 37 * 38 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 39 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 41 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 42 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 43 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 44 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 45 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 46 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 47 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 48 * SUCH DAMAGE. 49 * 50 * @(#)cache.c 8.2 (Berkeley) 10/30/93 51 * 52 */ 53 54/* 55 * Cache routines. 56 * 57 * TODO: 58 * - rework range flush 59 */ 60 61#include <sys/cdefs.h> 62__KERNEL_RCSID(0, "$NetBSD: cache.c,v 1.96 2007/03/04 06:00:45 christos Exp $"); 63 64#include "opt_multiprocessor.h" 65#include "opt_sparc_arch.h" 66 67#include <sys/param.h> 68#include <sys/systm.h> 69#include <sys/kernel.h> 70 71#include <uvm/uvm_extern.h> 72 73#include <machine/ctlreg.h> 74#include <machine/pte.h> 75 76#include <sparc/sparc/asm.h> 77#include <sparc/sparc/cache.h> 78#include <sparc/sparc/cpuvar.h> 79 80struct evcnt vcache_flush_pg = 81 EVCNT_INITIALIZER(EVCNT_TYPE_INTR,0,"vcfl","pg"); 82EVCNT_ATTACH_STATIC(vcache_flush_pg); 83struct evcnt vcache_flush_seg = 84 EVCNT_INITIALIZER(EVCNT_TYPE_INTR,0,"vcfl","seg"); 85EVCNT_ATTACH_STATIC(vcache_flush_seg); 86struct evcnt vcache_flush_reg = 87 EVCNT_INITIALIZER(EVCNT_TYPE_INTR,0,"vcfl","reg"); 88EVCNT_ATTACH_STATIC(vcache_flush_reg); 89struct evcnt vcache_flush_ctx = 90 EVCNT_INITIALIZER(EVCNT_TYPE_INTR,0,"vcfl","ctx"); 91EVCNT_ATTACH_STATIC(vcache_flush_ctx); 92struct evcnt vcache_flush_range = 93 EVCNT_INITIALIZER(EVCNT_TYPE_INTR,0,"vcfl","rng"); 94EVCNT_ATTACH_STATIC(vcache_flush_range); 95 96int cache_alias_dist; /* Cache anti-aliasing constants */ 97int cache_alias_bits; 98u_long dvma_cachealign; 99 100/* 101 * Enable the cache. 102 * We need to clear out the valid bits first. 103 */ 104void 105sun4_cache_enable(void) 106{ 107 u_int i, lim, ls, ts; 108 109 cache_alias_bits = CPU_ISSUN4 110 ? CACHE_ALIAS_BITS_SUN4 111 : CACHE_ALIAS_BITS_SUN4C; 112 cache_alias_dist = CPU_ISSUN4 113 ? CACHE_ALIAS_DIST_SUN4 114 : CACHE_ALIAS_DIST_SUN4C; 115 116 ls = CACHEINFO.c_linesize; 117 ts = CACHEINFO.c_totalsize; 118 119 for (i = AC_CACHETAGS, lim = i + ts; i < lim; i += ls) 120 sta(i, ASI_CONTROL, 0); 121 122 stba(AC_SYSENABLE, ASI_CONTROL, 123 lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_CACHE); 124 CACHEINFO.c_enabled = 1; 125 126#ifdef notyet 127 if (cpuinfo.flags & SUN4_IOCACHE) { 128 stba(AC_SYSENABLE, ASI_CONTROL, 129 lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_IOCACHE); 130 printf("iocache enabled\n"); 131 } 132#endif 133} 134 135/* 136 * XXX Hammer is a bit too big, here; SUN4D systems only have Viking. 137 */ 138#if defined(SUN4M) || defined(SUN4D) 139void 140ms1_cache_enable(void) 141{ 142 u_int pcr; 143 144 cache_alias_dist = max( 145 CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity, 146 CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity); 147 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET; 148 149 pcr = lda(SRMMU_PCR, ASI_SRMMU); 150 151 /* We "flash-clear" the I/D caches. */ 152 if ((pcr & MS1_PCR_ICE) == 0) 153 sta(0, ASI_ICACHECLR, 0); 154 if ((pcr & MS1_PCR_DCE) == 0) 155 sta(0, ASI_DCACHECLR, 0); 156 157 /* Turn on caches */ 158 sta(SRMMU_PCR, ASI_SRMMU, pcr | MS1_PCR_DCE | MS1_PCR_ICE); 159 160 CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1; 161 162 /* 163 * When zeroing or copying pages, there might still be entries in 164 * the cache, since we don't flush pages from the cache when 165 * unmapping them (`vactype' is VAC_NONE). Fortunately, the 166 * MS1 cache is write-through and not write-allocate, so we can 167 * use cacheable access while not displacing cache lines. 168 */ 169 cpuinfo.flags |= CPUFLG_CACHE_MANDATORY; 170} 171 172void 173viking_cache_enable(void) 174{ 175 u_int pcr; 176 177 pcr = lda(SRMMU_PCR, ASI_SRMMU); 178 179 if ((pcr & VIKING_PCR_ICE) == 0) { 180 /* I-cache not on; "flash-clear" it now. */ 181 sta(0x80000000, ASI_ICACHECLR, 0); /* Unlock */ 182 sta(0, ASI_ICACHECLR, 0); /* clear */ 183 } 184 if ((pcr & VIKING_PCR_DCE) == 0) { 185 /* D-cache not on: "flash-clear" it. */ 186 sta(0x80000000, ASI_DCACHECLR, 0); 187 sta(0, ASI_DCACHECLR, 0); 188 } 189 190 /* Turn on caches via MMU */ 191 sta(SRMMU_PCR, ASI_SRMMU, pcr | VIKING_PCR_DCE | VIKING_PCR_ICE); 192 193 CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1; 194 195 /* Now turn on MultiCache if it exists */ 196 if (cpuinfo.mxcc && CACHEINFO.ec_totalsize > 0) { 197 /* Set external cache enable bit in MXCC control register */ 198 stda(MXCC_CTRLREG, ASI_CONTROL, 199 ldda(MXCC_CTRLREG, ASI_CONTROL) | MXCC_CTRLREG_CE); 200 cpuinfo.flags |= CPUFLG_CACHEPAGETABLES; /* Ok to cache PTEs */ 201 CACHEINFO.ec_enabled = 1; 202 } 203} 204 205void 206hypersparc_cache_enable(void) 207{ 208 int i, ls, ts; 209 u_int pcr, v; 210 int alias_dist; 211 212 /* 213 * Setup the anti-aliasing constants and DVMA alignment constraint. 214 */ 215 alias_dist = CACHEINFO.c_totalsize; 216 if (alias_dist > cache_alias_dist) { 217 cache_alias_dist = alias_dist; 218 cache_alias_bits = (alias_dist - 1) & ~PGOFSET; 219 dvma_cachealign = cache_alias_dist; 220 } 221 222 ls = CACHEINFO.c_linesize; 223 ts = CACHEINFO.c_totalsize; 224 pcr = lda(SRMMU_PCR, ASI_SRMMU); 225 226 /* Now reset cache tag memory if cache not yet enabled */ 227 if ((pcr & HYPERSPARC_PCR_CE) == 0) 228 for (i = 0; i < ts; i += ls) 229 sta(i, ASI_DCACHETAG, 0); 230 231 pcr &= ~(HYPERSPARC_PCR_CE | HYPERSPARC_PCR_CM); 232 hypersparc_cache_flush_all(); 233 234 /* Enable write-back cache */ 235 pcr |= HYPERSPARC_PCR_CE; 236 if (CACHEINFO.c_vactype == VAC_WRITEBACK) 237 pcr |= HYPERSPARC_PCR_CM; 238 239 sta(SRMMU_PCR, ASI_SRMMU, pcr); 240 CACHEINFO.c_enabled = 1; 241 242 /* XXX: should add support */ 243 if (CACHEINFO.c_hwflush) 244 panic("cache_enable: can't handle 4M with hw-flush cache"); 245 246 /* 247 * Enable instruction cache and, on single-processor machines, 248 * disable `Unimplemented Flush Traps'. 249 */ 250 v = HYPERSPARC_ICCR_ICE | (sparc_ncpus <= 1 ? HYPERSPARC_ICCR_FTD : 0); 251 wrasr(v, HYPERSPARC_ASRNUM_ICCR); 252} 253 254 255void 256swift_cache_enable(void) 257{ 258 int i, ls, ts; 259 u_int pcr; 260 261 cache_alias_dist = max( 262 CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity, 263 CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity); 264 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET; 265 266 pcr = lda(SRMMU_PCR, ASI_SRMMU); 267 268 /* Now reset cache tag memory if cache not yet enabled */ 269 ls = CACHEINFO.ic_linesize; 270 ts = CACHEINFO.ic_totalsize; 271 if ((pcr & SWIFT_PCR_ICE) == 0) 272 for (i = 0; i < ts; i += ls) 273 sta(i, ASI_ICACHETAG, 0); 274 275 ls = CACHEINFO.dc_linesize; 276 ts = CACHEINFO.dc_totalsize; 277 if ((pcr & SWIFT_PCR_DCE) == 0) 278 for (i = 0; i < ts; i += ls) 279 sta(i, ASI_DCACHETAG, 0); 280 281 pcr |= (SWIFT_PCR_ICE | SWIFT_PCR_DCE); 282 sta(SRMMU_PCR, ASI_SRMMU, pcr); 283 CACHEINFO.c_enabled = 1; 284} 285 286void 287cypress_cache_enable(void) 288{ 289 int i, ls, ts; 290 u_int pcr; 291 int alias_dist; 292 293 alias_dist = CACHEINFO.c_totalsize; 294 if (alias_dist > cache_alias_dist) { 295 cache_alias_dist = alias_dist; 296 cache_alias_bits = (alias_dist - 1) & ~PGOFSET; 297 dvma_cachealign = alias_dist; 298 } 299 300 pcr = lda(SRMMU_PCR, ASI_SRMMU); 301 pcr &= ~CYPRESS_PCR_CM; 302 303 /* Now reset cache tag memory if cache not yet enabled */ 304 ls = CACHEINFO.c_linesize; 305 ts = CACHEINFO.c_totalsize; 306 if ((pcr & CYPRESS_PCR_CE) == 0) 307 for (i = 0; i < ts; i += ls) 308 sta(i, ASI_DCACHETAG, 0); 309 310 pcr |= CYPRESS_PCR_CE; 311 /* If put in write-back mode, turn it on */ 312 if (CACHEINFO.c_vactype == VAC_WRITEBACK) 313 pcr |= CYPRESS_PCR_CM; 314 sta(SRMMU_PCR, ASI_SRMMU, pcr); 315 CACHEINFO.c_enabled = 1; 316} 317 318void 319turbosparc_cache_enable(void) 320{ 321 int i, ls, ts; 322 u_int pcr, pcf; 323 /* External cache sizes in KB; see Turbo sparc manual */ 324 static const int ts_ecache_table[8] = {0,256,512,1024,512,1024,1024,0}; 325 326 cache_alias_dist = max( 327 CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity, 328 CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity); 329 cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET; 330 331 pcr = lda(SRMMU_PCR, ASI_SRMMU); 332 333 /* Now reset cache tag memory if cache not yet enabled */ 334 ls = CACHEINFO.ic_linesize; 335 ts = CACHEINFO.ic_totalsize; 336 if ((pcr & TURBOSPARC_PCR_ICE) == 0) 337 for (i = 0; i < ts; i += ls) 338 sta(i, ASI_ICACHETAG, 0); 339 340 ls = CACHEINFO.dc_linesize; 341 ts = CACHEINFO.dc_totalsize; 342 if ((pcr & TURBOSPARC_PCR_DCE) == 0) 343 for (i = 0; i < ts; i += ls) 344 sta(i, ASI_DCACHETAG, 0); 345 346 pcr |= (TURBOSPARC_PCR_ICE | TURBOSPARC_PCR_DCE); 347 sta(SRMMU_PCR, ASI_SRMMU, pcr); 348 349 pcf = lda(SRMMU_PCFG, ASI_SRMMU); 350 if (pcf & TURBOSPARC_PCFG_SE) { 351 /* 352 * Record external cache info. The Turbosparc's second- 353 * level cache is physically addressed/tagged and is 354 * not exposed by the PROM. 355 */ 356 CACHEINFO.ec_totalsize = 1024 * 357 ts_ecache_table[(pcf & TURBOSPARC_PCFG_SCC)]; 358 CACHEINFO.ec_linesize = 32; 359 } 360 if (pcf & TURBOSPARC_PCFG_SNP) 361 printf(": DVMA coherent "); 362 363 CACHEINFO.c_enabled = 1; 364} 365#endif /* SUN4M || SUN4D */ 366 367 368/* 369 * Note: the sun4 & sun4c the cache flush functions ignore the `ctx' 370 * parameter. This can be done since the pmap operations that need 371 * to flush cache lines will already have switched to the proper 372 * context to manipulate the MMU. Hence we can avoid the overhead 373 * if saving and restoring the context here. 374 */ 375 376/* 377 * Flush the current context from the cache. 378 * 379 * This is done by writing to each cache line in the `flush context' 380 * address space (or, for hardware flush, once to each page in the 381 * hardware flush space, for all cache pages). 382 */ 383void 384sun4_vcache_flush_context(int ctx) 385{ 386 char *p; 387 int i, ls; 388 389 vcache_flush_ctx.ev_count++; 390 p = (char *)0; /* addresses 0..cacheinfo.c_totalsize will do fine */ 391 if (CACHEINFO.c_hwflush) { 392 ls = PAGE_SIZE; 393 i = CACHEINFO.c_totalsize >> PGSHIFT; 394 for (; --i >= 0; p += ls) 395 sta(p, ASI_HWFLUSHCTX, 0); 396 } else { 397 ls = CACHEINFO.c_linesize; 398 i = CACHEINFO.c_nlines; 399 for (; --i >= 0; p += ls) 400 sta(p, ASI_FLUSHCTX, 0); 401 } 402} 403 404/* 405 * Flush the given virtual region from the cache. 406 * 407 * This is also done by writing to each cache line, except that 408 * now the addresses must include the virtual region number, and 409 * we use the `flush region' space. 410 * 411 * This function is only called on sun4's with 3-level MMUs; there's 412 * no hw-flush space. 413 */ 414void 415sun4_vcache_flush_region(int vreg, int ctx) 416{ 417 int i, ls; 418 char *p; 419 420 vcache_flush_reg.ev_count++; 421 p = (char *)VRTOVA(vreg); /* reg..reg+sz rather than 0..sz */ 422 ls = CACHEINFO.c_linesize; 423 i = CACHEINFO.c_nlines; 424 for (; --i >= 0; p += ls) 425 sta(p, ASI_FLUSHREG, 0); 426} 427 428/* 429 * Flush the given virtual segment from the cache. 430 * 431 * This is also done by writing to each cache line, except that 432 * now the addresses must include the virtual segment number, and 433 * we use the `flush segment' space. 434 * 435 * Again, for hardware, we just write each page (in hw-flush space). 436 */ 437void 438sun4_vcache_flush_segment(int vreg, int vseg, int ctx) 439{ 440 int i, ls; 441 char *p; 442 443 vcache_flush_seg.ev_count++; 444 p = (char *)VSTOVA(vreg, vseg); /* seg..seg+sz rather than 0..sz */ 445 if (CACHEINFO.c_hwflush) { 446 ls = PAGE_SIZE; 447 i = CACHEINFO.c_totalsize >> PGSHIFT; 448 for (; --i >= 0; p += ls) 449 sta(p, ASI_HWFLUSHSEG, 0); 450 } else { 451 ls = CACHEINFO.c_linesize; 452 i = CACHEINFO.c_nlines; 453 for (; --i >= 0; p += ls) 454 sta(p, ASI_FLUSHSEG, 0); 455 } 456} 457 458/* 459 * Flush the given virtual page from the cache. 460 * (va is the actual address, and must be aligned on a page boundary.) 461 * Again we write to each cache line. 462 */ 463void 464sun4_vcache_flush_page(int va, int ctx) 465{ 466 int i, ls; 467 char *p; 468 469#ifdef DEBUG 470 if (va & PGOFSET) 471 panic("cache_flush_page: asked to flush misaligned va 0x%x",va); 472#endif 473 474 vcache_flush_pg.ev_count++; 475 p = (char *)va; 476 ls = CACHEINFO.c_linesize; 477 i = PAGE_SIZE >> CACHEINFO.c_l2linesize; 478 for (; --i >= 0; p += ls) 479 sta(p, ASI_FLUSHPG, 0); 480} 481 482/* 483 * Flush the given virtual page from the cache. 484 * (va is the actual address, and must be aligned on a page boundary.) 485 * This version uses hardware-assisted flush operation and just needs 486 * one write into ASI_HWFLUSHPG space to flush all cache lines. 487 */ 488void 489sun4_vcache_flush_page_hw(int va, int ctx) 490{ 491 char *p; 492 493#ifdef DEBUG 494 if (va & PGOFSET) 495 panic("cache_flush_page: asked to flush misaligned va 0x%x",va); 496#endif 497 498 vcache_flush_pg.ev_count++; 499 p = (char *)va; 500 sta(p, ASI_HWFLUSHPG, 0); 501} 502 503/* 504 * Flush a range of virtual addresses (in the current context). 505 * The first byte is at (base&~PGOFSET) and the last one is just 506 * before byte (base+len). 507 * 508 * We choose the best of (context,segment,page) here. 509 */ 510 511#define CACHE_FLUSH_MAGIC (CACHEINFO.c_totalsize / PAGE_SIZE) 512 513void 514sun4_cache_flush(void *base, u_int len) 515{ 516 int i, ls, baseoff; 517 char *p; 518 519 if (CACHEINFO.c_vactype == VAC_NONE) 520 return; 521 522 /* 523 * Figure out how much must be flushed. 524 * 525 * If we need to do CACHE_FLUSH_MAGIC pages, we can do a segment 526 * in the same number of loop iterations. We can also do the whole 527 * region. If we need to do between 2 and NSEGRG, do the region. 528 * If we need to do two or more regions, just go ahead and do the 529 * whole context. This might not be ideal (e.g., fsck likes to do 530 * 65536-byte reads, which might not necessarily be aligned). 531 * 532 * We could try to be sneaky here and use the direct mapping 533 * to avoid flushing things `below' the start and `above' the 534 * ending address (rather than rounding to whole pages and 535 * segments), but I did not want to debug that now and it is 536 * not clear it would help much. 537 * 538 * (XXX the magic number 16 is now wrong, must review policy) 539 */ 540 baseoff = (int)base & PGOFSET; 541 i = (baseoff + len + PGOFSET) >> PGSHIFT; 542 543 vcache_flush_range.ev_count++; 544 545 if (__predict_true(i < CACHE_FLUSH_MAGIC)) { 546 /* cache_flush_page, for i pages */ 547 p = (char *)((int)base & ~baseoff); 548 if (CACHEINFO.c_hwflush) { 549 for (; --i >= 0; p += PAGE_SIZE) 550 sta(p, ASI_HWFLUSHPG, 0); 551 } else { 552 ls = CACHEINFO.c_linesize; 553 i <<= PGSHIFT - CACHEINFO.c_l2linesize; 554 for (; --i >= 0; p += ls) 555 sta(p, ASI_FLUSHPG, 0); 556 } 557 return; 558 } 559 560 baseoff = (u_int)base & SGOFSET; 561 i = (baseoff + len + SGOFSET) >> SGSHIFT; 562 if (__predict_true(i == 1)) { 563 sun4_vcache_flush_segment(VA_VREG(base), VA_VSEG(base), 0); 564 return; 565 } 566 567 if (HASSUN4_MMU3L) { 568 baseoff = (u_int)base & RGOFSET; 569 i = (baseoff + len + RGOFSET) >> RGSHIFT; 570 if (i == 1) 571 sun4_vcache_flush_region(VA_VREG(base), 0); 572 else 573 sun4_vcache_flush_context(0); 574 } else 575 sun4_vcache_flush_context(0); 576} 577 578 579#if defined(SUN4M) || defined(SUN4D) 580#define trapoff() do { setpsr(getpsr() & ~PSR_ET); } while(0) 581#define trapon() do { setpsr(getpsr() | PSR_ET); } while(0) 582/* 583 * Flush the current context from the cache. 584 * 585 * This is done by writing to each cache line in the `flush context' 586 * address space. 587 */ 588void 589srmmu_vcache_flush_context(int ctx) 590{ 591 int i, ls, octx; 592 char *p; 593 594 vcache_flush_ctx.ev_count++; 595 p = (char *)0; /* addresses 0..cacheinfo.c_totalsize will do fine */ 596 ls = CACHEINFO.c_linesize; 597 i = CACHEINFO.c_nlines; 598 octx = getcontext4m(); 599 trapoff(); 600 setcontext4m(ctx); 601 for (; --i >= 0; p += ls) 602 sta(p, ASI_IDCACHELFC, 0); 603 setcontext4m(octx); 604 trapon(); 605} 606 607/* 608 * Flush the given virtual region from the cache. 609 * 610 * This is also done by writing to each cache line, except that 611 * now the addresses must include the virtual region number, and 612 * we use the `flush region' space. 613 */ 614void 615srmmu_vcache_flush_region(int vreg, int ctx) 616{ 617 int i, ls, octx; 618 char *p; 619 620 vcache_flush_reg.ev_count++; 621 p = (char *)VRTOVA(vreg); /* reg..reg+sz rather than 0..sz */ 622 ls = CACHEINFO.c_linesize; 623 i = CACHEINFO.c_nlines; 624 octx = getcontext4m(); 625 trapoff(); 626 setcontext4m(ctx); 627 for (; --i >= 0; p += ls) 628 sta(p, ASI_IDCACHELFR, 0); 629 setcontext4m(octx); 630 trapon(); 631} 632 633/* 634 * Flush the given virtual segment from the cache. 635 * 636 * This is also done by writing to each cache line, except that 637 * now the addresses must include the virtual segment number, and 638 * we use the `flush segment' space. 639 * 640 * Again, for hardware, we just write each page (in hw-flush space). 641 */ 642void 643srmmu_vcache_flush_segment(int vreg, int vseg, int ctx) 644{ 645 int i, ls, octx; 646 char *p; 647 648 vcache_flush_seg.ev_count++; 649 p = (char *)VSTOVA(vreg, vseg); /* seg..seg+sz rather than 0..sz */ 650 ls = CACHEINFO.c_linesize; 651 i = CACHEINFO.c_nlines; 652 octx = getcontext4m(); 653 trapoff(); 654 setcontext4m(ctx); 655 for (; --i >= 0; p += ls) 656 sta(p, ASI_IDCACHELFS, 0); 657 setcontext4m(octx); 658 trapon(); 659} 660 661/* 662 * Flush the given virtual page from the cache. 663 * (va is the actual address, and must be aligned on a page boundary.) 664 * Again we write to each cache line. 665 */ 666void 667srmmu_vcache_flush_page(int va, int ctx) 668{ 669 int i, ls, octx; 670 char *p; 671 672#ifdef DEBUG 673 if (va & PGOFSET) 674 panic("cache_flush_page: asked to flush misaligned va 0x%x",va); 675#endif 676 677 vcache_flush_pg.ev_count++; 678 p = (char *)va; 679 680 /* 681 * XXX - if called early during bootstrap, we don't have the cache 682 * info yet. Make up a cache line size (double-word aligned) 683 */ 684 if ((ls = CACHEINFO.c_linesize) == 0) 685 ls = 8; 686 i = PAGE_SIZE; 687 octx = getcontext4m(); 688 trapoff(); 689 setcontext4m(ctx); 690 for (; i > 0; p += ls, i -= ls) 691 sta(p, ASI_IDCACHELFP, 0); 692#if defined(MULTIPROCESSOR) 693 /* 694 * The page flush operation will have caused a MMU table walk 695 * on Hypersparc because the is physically tagged. Since the pmap 696 * functions will not always cross flush it in the MP case (because 697 * may not be active on this CPU) we flush the TLB entry now. 698 */ 699 /*if (cpuinfo.cpu_type == CPUTYP_HS_MBUS) -- more work than it's worth */ 700 sta(va | ASI_SRMMUFP_L3, ASI_SRMMUFP, 0); 701 702#endif 703 setcontext4m(octx); 704 trapon(); 705} 706 707/* 708 * Flush entire cache. 709 */ 710void 711srmmu_cache_flush_all(void) 712{ 713 714 srmmu_vcache_flush_context(0); 715} 716 717void 718srmmu_vcache_flush_range(int va, int len, int ctx) 719{ 720 int i, ls, offset; 721 char *p; 722 int octx; 723 724 /* 725 * XXX - if called early during bootstrap, we don't have the cache 726 * info yet. Make up a cache line size (double-word aligned) 727 */ 728 if ((ls = CACHEINFO.c_linesize) == 0) 729 ls = 8; 730 731 vcache_flush_range.ev_count++; 732 733 /* Compute # of cache lines covered by this range */ 734 offset = va & (ls - 1); 735 i = len + offset; 736 p = (char *)(va & ~(ls - 1)); 737 738 octx = getcontext4m(); 739 trapoff(); 740 setcontext4m(ctx); 741 for (; i > 0; p += ls, i -= ls) 742 sta(p, ASI_IDCACHELFP, 0); 743 744#if defined(MULTIPROCESSOR) 745 if (cpuinfo.cpu_type == CPUTYP_HS_MBUS) { 746 /* 747 * See hypersparc comment in srmmu_vcache_flush_page(). 748 */ 749 offset = va & PGOFSET; 750 i = (offset + len + PGOFSET) >> PGSHIFT; 751 752 va = va & ~PGOFSET; 753 for (; --i >= 0; va += PAGE_SIZE) 754 sta(va | ASI_SRMMUFP_L3, ASI_SRMMUFP, 0); 755 } 756#endif 757 setcontext4m(octx); 758 trapon(); 759 return; 760} 761 762/* 763 * Flush a range of virtual addresses (in the current context). 764 * 765 * We choose the best of (context,segment,page) here. 766 */ 767 768void 769srmmu_cache_flush(void *base, u_int len) 770{ 771 int ctx = getcontext4m(); 772 int i, baseoff; 773 774 775 /* 776 * Figure out the most efficient way to flush. 777 * 778 * If we need to do CACHE_FLUSH_MAGIC pages, we can do a segment 779 * in the same number of loop iterations. We can also do the whole 780 * region. If we need to do between 2 and NSEGRG, do the region. 781 * If we need to do two or more regions, just go ahead and do the 782 * whole context. This might not be ideal (e.g., fsck likes to do 783 * 65536-byte reads, which might not necessarily be aligned). 784 * 785 * We could try to be sneaky here and use the direct mapping 786 * to avoid flushing things `below' the start and `above' the 787 * ending address (rather than rounding to whole pages and 788 * segments), but I did not want to debug that now and it is 789 * not clear it would help much. 790 * 791 */ 792 793 if (__predict_true(len < CACHEINFO.c_totalsize)) { 794#if defined(MULTIPROCESSOR) 795 FXCALL3(cpuinfo.sp_vcache_flush_range, 796 cpuinfo.ft_vcache_flush_range, 797 (int)base, len, ctx, CPUSET_ALL); 798#else 799 cpuinfo.sp_vcache_flush_range((int)base, len, ctx); 800#endif 801 return; 802 } 803 804 baseoff = (u_int)base & SGOFSET; 805 i = (baseoff + len + SGOFSET) >> SGSHIFT; 806 if (__predict_true(i == 1)) { 807#if defined(MULTIPROCESSOR) 808 FXCALL3(cpuinfo.sp_vcache_flush_segment, 809 cpuinfo.ft_vcache_flush_segment, 810 VA_VREG(base), VA_VSEG(base), ctx, CPUSET_ALL); 811#else 812 srmmu_vcache_flush_segment(VA_VREG(base), VA_VSEG(base), ctx); 813#endif 814 return; 815 } 816 817 baseoff = (u_int)base & RGOFSET; 818 i = (baseoff + len + RGOFSET) >> RGSHIFT; 819 while (i--) { 820#if defined(MULTIPROCESSOR) 821 FXCALL2(cpuinfo.sp_vcache_flush_region, 822 cpuinfo.ft_vcache_flush_region, 823 VA_VREG(base), ctx, CPUSET_ALL); 824#else 825 srmmu_vcache_flush_region(VA_VREG(base), ctx); 826#endif 827 base = ((char *)base + NBPRG); 828 } 829} 830 831int ms1_cacheflush_magic = 0; 832#define MS1_CACHEFLUSH_MAGIC ms1_cacheflush_magic 833 834void 835ms1_cache_flush(void *base, u_int len) 836{ 837 838 /* 839 * Although physically tagged, we still need to flush the 840 * data cache after (if we have a write-through cache) or before 841 * (in case of write-back caches) DMA operations. 842 */ 843 844#if MS1_CACHEFLUSH_MAGIC 845 if (len <= MS1_CACHEFLUSH_MAGIC) { 846 /* 847 * If the range to be flushed is sufficiently small 848 * invalidate the covered cache lines by hand. 849 * 850 * The MicroSPARC I has a direct-mapped virtually addressed 851 * physically tagged data cache which is organised as 852 * 128 lines of 16 bytes. Virtual address bits [4-10] 853 * select the cache line. The cache tags are accessed 854 * through the standard DCACHE control space using the 855 * same address bits as those used to select the cache 856 * line in the virtual address. 857 * 858 * Note: we don't bother to compare the actual tags 859 * since that would require looking up physical addresses. 860 * 861 * The format of the tags we read from ASI_DCACHE control 862 * space is: 863 * 864 * 31 27 26 11 10 1 0 865 * +--------+----------------+------------+-+ 866 * | xxx | PA[26-11] | xxx |V| 867 * +--------+----------------+------------+-+ 868 * 869 * PA: bits 11-26 of the physical address 870 * V: line valid bit 871 */ 872 int tagaddr = ((u_int)base & 0x7f0); 873 874 len = roundup(len, 16); 875 while (len != 0) { 876 int tag = lda(tagaddr, ASI_DCACHETAG); 877 if ((tag & 1) == 1) { 878 /* Mark this cache line invalid */ 879 sta(tagaddr, ASI_DCACHETAG, 0); 880 } 881 len -= 16; 882 tagaddr = (tagaddr + 16) & 0x7f0; 883 } 884 } else 885#endif 886 /* Flush entire data cache */ 887 sta(0, ASI_DCACHECLR, 0); 888} 889 890 891/* 892 * Flush entire cache. 893 */ 894void 895ms1_cache_flush_all(void) 896{ 897 898 /* Flash-clear both caches */ 899 sta(0, ASI_ICACHECLR, 0); 900 sta(0, ASI_DCACHECLR, 0); 901} 902 903void 904hypersparc_cache_flush_all(void) 905{ 906 907 srmmu_vcache_flush_context(getcontext4m()); 908 /* Flush instruction cache */ 909 hypersparc_pure_vcache_flush(); 910} 911 912void 913cypress_cache_flush_all(void) 914{ 915 extern char kernel_text[]; 916 917 char *p; 918 int i, ls; 919 920 /* Fill the cache with known read-only content */ 921 p = (char *)kernel_text; 922 ls = CACHEINFO.c_linesize; 923 i = CACHEINFO.c_nlines; 924 for (; --i >= 0; p += ls) 925 (*(volatile char *)p); 926} 927 928 929void 930viking_cache_flush(void *base, u_int len) 931{ 932} 933 934void 935viking_pcache_flush_page(paddr_t pa, int invalidate_only) 936{ 937 int set, i; 938 939 /* 940 * The viking's on-chip data cache is 4-way set associative, 941 * consisting of 128 sets, each holding 4 lines of 32 bytes. 942 * Note that one 4096 byte page exactly covers all 128 sets 943 * in the cache. 944 */ 945 if (invalidate_only) { 946 u_int pa_tag = (pa >> 12); 947 u_int tagaddr; 948 uint64_t tag; 949 950 /* 951 * Loop over all sets and invalidate all entries tagged 952 * with the given physical address by resetting the cache 953 * tag in ASI_DCACHETAG control space. 954 * 955 * The address format for accessing a tag is: 956 * 957 * 31 30 27 26 11 5 4 3 2 0 958 * +------+-----+------+-------//--------+--------+----+-----+ 959 * | type | xxx | line | xxx | set | xx | 0 | 960 * +------+-----+------+-------//--------+--------+----+-----+ 961 * 962 * set: the cache set tag to be read (0-127) 963 * line: the line within the set (0-3) 964 * type: 1: read set tag; 2: read physical tag 965 * 966 * The (type 2) tag read from this address is a 64-bit word 967 * formatted as follows: 968 * 969 * 5 4 4 970 * 63 6 8 0 23 0 971 * +-------+-+-------+-+-------+-+-----------+----------------+ 972 * | xxx |V| xxx |D| xxx |S| xxx | PA[35-12] | 973 * +-------+-+-------+-+-------+-+-----------+----------------+ 974 * 975 * PA: bits 12-35 of the physical address 976 * S: line shared bit 977 * D: line dirty bit 978 * V: line valid bit 979 */ 980 981#define VIKING_DCACHETAG_S 0x0000010000000000ULL /* line valid bit */ 982#define VIKING_DCACHETAG_D 0x0001000000000000ULL /* line dirty bit */ 983#define VIKING_DCACHETAG_V 0x0100000000000000ULL /* line shared bit */ 984#define VIKING_DCACHETAG_PAMASK 0x0000000000ffffffULL /* PA tag field */ 985 986 for (set = 0; set < 128; set++) { 987 /* Set set number and access type */ 988 tagaddr = (set << 5) | (2 << 30); 989 990 /* Examine the tag for each line in the set */ 991 for (i = 0 ; i < 4; i++) { 992 tag = ldda(tagaddr | (i << 26), ASI_DCACHETAG); 993 /* 994 * If this is a valid tag and the PA field 995 * matches clear the tag. 996 */ 997 if ((tag & VIKING_DCACHETAG_PAMASK) == pa_tag && 998 (tag & VIKING_DCACHETAG_V) != 0) 999 stda(tagaddr | (i << 26), 1000 ASI_DCACHETAG, 0); 1001 } 1002 } 1003 1004 } else { 1005 extern char kernel_text[]; 1006 1007 /* 1008 * Force the cache to validate its backing memory 1009 * by displacing all cache lines with known read-only 1010 * content from the start of kernel text. 1011 * 1012 * Note that this thrashes the entire cache. However, 1013 * we currently only need to call upon this code 1014 * once at boot time. 1015 */ 1016 for (set = 0; set < 128; set++) { 1017 int *v = (int *)(kernel_text + (set << 5)); 1018 1019 /* 1020 * We need to read (2*associativity-1) different 1021 * locations to be sure to displace the entire set. 1022 */ 1023 i = 2 * 4 - 1; 1024 while (i--) { 1025 (*(volatile int *)v); 1026 v += 4096; 1027 } 1028 } 1029 } 1030} 1031#endif /* SUN4M || SUN4D */ 1032 1033 1034#if defined(MULTIPROCESSOR) 1035/* 1036 * Cache flushing on multi-processor systems involves sending 1037 * inter-processor messages to flush the cache on each module. 1038 * 1039 * The current context of the originating processor is passed in the 1040 * message. This assumes the allocation of CPU contextses is a global 1041 * operation (remember that the actual context tables for the CPUs 1042 * are distinct). 1043 */ 1044 1045void 1046smp_vcache_flush_page(int va, int ctx) 1047{ 1048 1049 FXCALL2(cpuinfo.sp_vcache_flush_page, cpuinfo.ft_vcache_flush_page, 1050 va, ctx, CPUSET_ALL); 1051} 1052 1053void 1054smp_vcache_flush_segment(int vr, int vs, int ctx) 1055{ 1056 1057 FXCALL3(cpuinfo.sp_vcache_flush_segment, cpuinfo.ft_vcache_flush_segment, 1058 vr, vs, ctx, CPUSET_ALL); 1059} 1060 1061void 1062smp_vcache_flush_region(int vr, int ctx) 1063{ 1064 1065 FXCALL2(cpuinfo.sp_vcache_flush_region, cpuinfo.ft_vcache_flush_region, 1066 vr, ctx, CPUSET_ALL); 1067} 1068 1069void 1070smp_vcache_flush_context(int ctx) 1071{ 1072 1073 FXCALL1(cpuinfo.sp_vcache_flush_context, cpuinfo.ft_vcache_flush_context, 1074 ctx, CPUSET_ALL); 1075} 1076#endif /* MULTIPROCESSOR */ 1077