1/*	$NetBSD: cache.c,v 1.96 2007/03/04 06:00:45 christos Exp $ */
2
3/*
4 * Copyright (c) 1996
5 *	The President and Fellows of Harvard College. All rights reserved.
6 * Copyright (c) 1992, 1993
7 *	The Regents of the University of California.  All rights reserved.
8 *
9 * This software was developed by the Computer Systems Engineering group
10 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
11 * contributed to Berkeley.
12 *
13 * All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 *	This product includes software developed by Harvard University.
16 *	This product includes software developed by the University of
17 *	California, Lawrence Berkeley Laboratory.
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions
21 * are met:
22 *
23 * 1. Redistributions of source code must retain the above copyright
24 *    notice, this list of conditions and the following disclaimer.
25 * 2. Redistributions in binary form must reproduce the above copyright
26 *    notice, this list of conditions and the following disclaimer in the
27 *    documentation and/or other materials provided with the distribution.
28 * 3. All advertising materials mentioning features or use of this software
29 *    must display the following acknowledgement:
30 *	This product includes software developed by Aaron Brown and
31 *	Harvard University.
32 *	This product includes software developed by the University of
33 *	California, Berkeley and its contributors.
34 * 4. Neither the name of the University nor the names of its contributors
35 *    may be used to endorse or promote products derived from this software
36 *    without specific prior written permission.
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
39 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
41 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
42 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
43 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
44 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
46 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
47 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48 * SUCH DAMAGE.
49 *
50 *	@(#)cache.c	8.2 (Berkeley) 10/30/93
51 *
52 */
53
54/*
55 * Cache routines.
56 *
57 * TODO:
58 *	- rework range flush
59 */
60
61#include <sys/cdefs.h>
62__KERNEL_RCSID(0, "$NetBSD: cache.c,v 1.96 2007/03/04 06:00:45 christos Exp $");
63
64#include "opt_multiprocessor.h"
65#include "opt_sparc_arch.h"
66
67#include <sys/param.h>
68#include <sys/systm.h>
69#include <sys/kernel.h>
70
71#include <uvm/uvm_extern.h>
72
73#include <machine/ctlreg.h>
74#include <machine/pte.h>
75
76#include <sparc/sparc/asm.h>
77#include <sparc/sparc/cache.h>
78#include <sparc/sparc/cpuvar.h>
79
80struct evcnt vcache_flush_pg =
81	EVCNT_INITIALIZER(EVCNT_TYPE_INTR,0,"vcfl","pg");
82EVCNT_ATTACH_STATIC(vcache_flush_pg);
83struct evcnt vcache_flush_seg =
84	EVCNT_INITIALIZER(EVCNT_TYPE_INTR,0,"vcfl","seg");
85EVCNT_ATTACH_STATIC(vcache_flush_seg);
86struct evcnt vcache_flush_reg =
87	EVCNT_INITIALIZER(EVCNT_TYPE_INTR,0,"vcfl","reg");
88EVCNT_ATTACH_STATIC(vcache_flush_reg);
89struct evcnt vcache_flush_ctx =
90	EVCNT_INITIALIZER(EVCNT_TYPE_INTR,0,"vcfl","ctx");
91EVCNT_ATTACH_STATIC(vcache_flush_ctx);
92struct evcnt vcache_flush_range =
93	EVCNT_INITIALIZER(EVCNT_TYPE_INTR,0,"vcfl","rng");
94EVCNT_ATTACH_STATIC(vcache_flush_range);
95
96int cache_alias_dist;		/* Cache anti-aliasing constants */
97int cache_alias_bits;
98u_long dvma_cachealign;
99
100/*
101 * Enable the cache.
102 * We need to clear out the valid bits first.
103 */
104void
105sun4_cache_enable(void)
106{
107	u_int i, lim, ls, ts;
108
109	cache_alias_bits = CPU_ISSUN4
110				? CACHE_ALIAS_BITS_SUN4
111				: CACHE_ALIAS_BITS_SUN4C;
112	cache_alias_dist = CPU_ISSUN4
113				? CACHE_ALIAS_DIST_SUN4
114				: CACHE_ALIAS_DIST_SUN4C;
115
116	ls = CACHEINFO.c_linesize;
117	ts = CACHEINFO.c_totalsize;
118
119	for (i = AC_CACHETAGS, lim = i + ts; i < lim; i += ls)
120		sta(i, ASI_CONTROL, 0);
121
122	stba(AC_SYSENABLE, ASI_CONTROL,
123	     lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_CACHE);
124	CACHEINFO.c_enabled = 1;
125
126#ifdef notyet
127	if (cpuinfo.flags & SUN4_IOCACHE) {
128		stba(AC_SYSENABLE, ASI_CONTROL,
129		     lduba(AC_SYSENABLE, ASI_CONTROL) | SYSEN_IOCACHE);
130		printf("iocache enabled\n");
131	}
132#endif
133}
134
135/*
136 * XXX Hammer is a bit too big, here; SUN4D systems only have Viking.
137 */
138#if defined(SUN4M) || defined(SUN4D)
139void
140ms1_cache_enable(void)
141{
142	u_int pcr;
143
144	cache_alias_dist = max(
145		CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
146		CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
147	cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
148
149	pcr = lda(SRMMU_PCR, ASI_SRMMU);
150
151	/* We "flash-clear" the I/D caches. */
152	if ((pcr & MS1_PCR_ICE) == 0)
153		sta(0, ASI_ICACHECLR, 0);
154	if ((pcr & MS1_PCR_DCE) == 0)
155		sta(0, ASI_DCACHECLR, 0);
156
157	/* Turn on caches */
158	sta(SRMMU_PCR, ASI_SRMMU, pcr | MS1_PCR_DCE | MS1_PCR_ICE);
159
160	CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1;
161
162	/*
163	 * When zeroing or copying pages, there might still be entries in
164	 * the cache, since we don't flush pages from the cache when
165	 * unmapping them (`vactype' is VAC_NONE).  Fortunately, the
166	 * MS1 cache is write-through and not write-allocate, so we can
167	 * use cacheable access while not displacing cache lines.
168	 */
169	cpuinfo.flags |= CPUFLG_CACHE_MANDATORY;
170}
171
172void
173viking_cache_enable(void)
174{
175	u_int pcr;
176
177	pcr = lda(SRMMU_PCR, ASI_SRMMU);
178
179	if ((pcr & VIKING_PCR_ICE) == 0) {
180		/* I-cache not on; "flash-clear" it now. */
181		sta(0x80000000, ASI_ICACHECLR, 0);	/* Unlock */
182		sta(0, ASI_ICACHECLR, 0);		/* clear */
183	}
184	if ((pcr & VIKING_PCR_DCE) == 0) {
185		/* D-cache not on: "flash-clear" it. */
186		sta(0x80000000, ASI_DCACHECLR, 0);
187		sta(0, ASI_DCACHECLR, 0);
188	}
189
190	/* Turn on caches via MMU */
191	sta(SRMMU_PCR, ASI_SRMMU, pcr | VIKING_PCR_DCE | VIKING_PCR_ICE);
192
193	CACHEINFO.c_enabled = CACHEINFO.dc_enabled = 1;
194
195	/* Now turn on MultiCache if it exists */
196	if (cpuinfo.mxcc && CACHEINFO.ec_totalsize > 0) {
197		/* Set external cache enable bit in MXCC control register */
198		stda(MXCC_CTRLREG, ASI_CONTROL,
199		     ldda(MXCC_CTRLREG, ASI_CONTROL) | MXCC_CTRLREG_CE);
200		cpuinfo.flags |= CPUFLG_CACHEPAGETABLES; /* Ok to cache PTEs */
201		CACHEINFO.ec_enabled = 1;
202	}
203}
204
205void
206hypersparc_cache_enable(void)
207{
208	int i, ls, ts;
209	u_int pcr, v;
210	int alias_dist;
211
212	/*
213	 * Setup the anti-aliasing constants and DVMA alignment constraint.
214	 */
215	alias_dist = CACHEINFO.c_totalsize;
216	if (alias_dist > cache_alias_dist) {
217		cache_alias_dist = alias_dist;
218		cache_alias_bits = (alias_dist - 1) & ~PGOFSET;
219		dvma_cachealign = cache_alias_dist;
220	}
221
222	ls = CACHEINFO.c_linesize;
223	ts = CACHEINFO.c_totalsize;
224	pcr = lda(SRMMU_PCR, ASI_SRMMU);
225
226	/* Now reset cache tag memory if cache not yet enabled */
227	if ((pcr & HYPERSPARC_PCR_CE) == 0)
228		for (i = 0; i < ts; i += ls)
229			sta(i, ASI_DCACHETAG, 0);
230
231	pcr &= ~(HYPERSPARC_PCR_CE | HYPERSPARC_PCR_CM);
232	hypersparc_cache_flush_all();
233
234	/* Enable write-back cache */
235	pcr |= HYPERSPARC_PCR_CE;
236	if (CACHEINFO.c_vactype == VAC_WRITEBACK)
237		pcr |= HYPERSPARC_PCR_CM;
238
239	sta(SRMMU_PCR, ASI_SRMMU, pcr);
240	CACHEINFO.c_enabled = 1;
241
242	/* XXX: should add support */
243	if (CACHEINFO.c_hwflush)
244		panic("cache_enable: can't handle 4M with hw-flush cache");
245
246	/*
247	 * Enable instruction cache and, on single-processor machines,
248	 * disable `Unimplemented Flush Traps'.
249	 */
250	v = HYPERSPARC_ICCR_ICE | (sparc_ncpus <= 1 ? HYPERSPARC_ICCR_FTD : 0);
251	wrasr(v, HYPERSPARC_ASRNUM_ICCR);
252}
253
254
255void
256swift_cache_enable(void)
257{
258	int i, ls, ts;
259	u_int pcr;
260
261	cache_alias_dist = max(
262		CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
263		CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
264	cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
265
266	pcr = lda(SRMMU_PCR, ASI_SRMMU);
267
268	/* Now reset cache tag memory if cache not yet enabled */
269	ls = CACHEINFO.ic_linesize;
270	ts = CACHEINFO.ic_totalsize;
271	if ((pcr & SWIFT_PCR_ICE) == 0)
272		for (i = 0; i < ts; i += ls)
273			sta(i, ASI_ICACHETAG, 0);
274
275	ls = CACHEINFO.dc_linesize;
276	ts = CACHEINFO.dc_totalsize;
277	if ((pcr & SWIFT_PCR_DCE) == 0)
278		for (i = 0; i < ts; i += ls)
279			sta(i, ASI_DCACHETAG, 0);
280
281	pcr |= (SWIFT_PCR_ICE | SWIFT_PCR_DCE);
282	sta(SRMMU_PCR, ASI_SRMMU, pcr);
283	CACHEINFO.c_enabled = 1;
284}
285
286void
287cypress_cache_enable(void)
288{
289	int i, ls, ts;
290	u_int pcr;
291	int alias_dist;
292
293	alias_dist = CACHEINFO.c_totalsize;
294	if (alias_dist > cache_alias_dist) {
295		cache_alias_dist = alias_dist;
296		cache_alias_bits = (alias_dist - 1) & ~PGOFSET;
297		dvma_cachealign = alias_dist;
298	}
299
300	pcr = lda(SRMMU_PCR, ASI_SRMMU);
301	pcr &= ~CYPRESS_PCR_CM;
302
303	/* Now reset cache tag memory if cache not yet enabled */
304	ls = CACHEINFO.c_linesize;
305	ts = CACHEINFO.c_totalsize;
306	if ((pcr & CYPRESS_PCR_CE) == 0)
307		for (i = 0; i < ts; i += ls)
308			sta(i, ASI_DCACHETAG, 0);
309
310	pcr |= CYPRESS_PCR_CE;
311	/* If put in write-back mode, turn it on */
312	if (CACHEINFO.c_vactype == VAC_WRITEBACK)
313		pcr |= CYPRESS_PCR_CM;
314	sta(SRMMU_PCR, ASI_SRMMU, pcr);
315	CACHEINFO.c_enabled = 1;
316}
317
318void
319turbosparc_cache_enable(void)
320{
321	int i, ls, ts;
322	u_int pcr, pcf;
323	/* External cache sizes in KB; see Turbo sparc manual */
324	static const int ts_ecache_table[8] = {0,256,512,1024,512,1024,1024,0};
325
326	cache_alias_dist = max(
327		CACHEINFO.ic_totalsize / CACHEINFO.ic_associativity,
328		CACHEINFO.dc_totalsize / CACHEINFO.dc_associativity);
329	cache_alias_bits = (cache_alias_dist - 1) & ~PGOFSET;
330
331	pcr = lda(SRMMU_PCR, ASI_SRMMU);
332
333	/* Now reset cache tag memory if cache not yet enabled */
334	ls = CACHEINFO.ic_linesize;
335	ts = CACHEINFO.ic_totalsize;
336	if ((pcr & TURBOSPARC_PCR_ICE) == 0)
337		for (i = 0; i < ts; i += ls)
338			sta(i, ASI_ICACHETAG, 0);
339
340	ls = CACHEINFO.dc_linesize;
341	ts = CACHEINFO.dc_totalsize;
342	if ((pcr & TURBOSPARC_PCR_DCE) == 0)
343		for (i = 0; i < ts; i += ls)
344			sta(i, ASI_DCACHETAG, 0);
345
346	pcr |= (TURBOSPARC_PCR_ICE | TURBOSPARC_PCR_DCE);
347	sta(SRMMU_PCR, ASI_SRMMU, pcr);
348
349	pcf = lda(SRMMU_PCFG, ASI_SRMMU);
350	if (pcf & TURBOSPARC_PCFG_SE) {
351		/*
352		 * Record external cache info. The Turbosparc's second-
353		 * level cache is physically addressed/tagged and is
354		 * not exposed by the PROM.
355		 */
356		CACHEINFO.ec_totalsize = 1024 *
357			ts_ecache_table[(pcf & TURBOSPARC_PCFG_SCC)];
358		CACHEINFO.ec_linesize = 32;
359	}
360	if (pcf & TURBOSPARC_PCFG_SNP)
361		printf(": DVMA coherent ");
362
363	CACHEINFO.c_enabled = 1;
364}
365#endif /* SUN4M || SUN4D */
366
367
368/*
369 * Note: the sun4 & sun4c the cache flush functions ignore the `ctx'
370 * parameter. This can be done since the pmap operations that need
371 * to flush cache lines will already have switched to the proper
372 * context to manipulate the MMU. Hence we can avoid the overhead
373 * if saving and restoring the context here.
374 */
375
376/*
377 * Flush the current context from the cache.
378 *
379 * This is done by writing to each cache line in the `flush context'
380 * address space (or, for hardware flush, once to each page in the
381 * hardware flush space, for all cache pages).
382 */
383void
384sun4_vcache_flush_context(int ctx)
385{
386	char *p;
387	int i, ls;
388
389	vcache_flush_ctx.ev_count++;
390	p = (char *)0;	/* addresses 0..cacheinfo.c_totalsize will do fine */
391	if (CACHEINFO.c_hwflush) {
392		ls = PAGE_SIZE;
393		i = CACHEINFO.c_totalsize >> PGSHIFT;
394		for (; --i >= 0; p += ls)
395			sta(p, ASI_HWFLUSHCTX, 0);
396	} else {
397		ls = CACHEINFO.c_linesize;
398		i = CACHEINFO.c_nlines;
399		for (; --i >= 0; p += ls)
400			sta(p, ASI_FLUSHCTX, 0);
401	}
402}
403
404/*
405 * Flush the given virtual region from the cache.
406 *
407 * This is also done by writing to each cache line, except that
408 * now the addresses must include the virtual region number, and
409 * we use the `flush region' space.
410 *
411 * This function is only called on sun4's with 3-level MMUs; there's
412 * no hw-flush space.
413 */
414void
415sun4_vcache_flush_region(int vreg, int ctx)
416{
417	int i, ls;
418	char *p;
419
420	vcache_flush_reg.ev_count++;
421	p = (char *)VRTOVA(vreg);	/* reg..reg+sz rather than 0..sz */
422	ls = CACHEINFO.c_linesize;
423	i = CACHEINFO.c_nlines;
424	for (; --i >= 0; p += ls)
425		sta(p, ASI_FLUSHREG, 0);
426}
427
428/*
429 * Flush the given virtual segment from the cache.
430 *
431 * This is also done by writing to each cache line, except that
432 * now the addresses must include the virtual segment number, and
433 * we use the `flush segment' space.
434 *
435 * Again, for hardware, we just write each page (in hw-flush space).
436 */
437void
438sun4_vcache_flush_segment(int vreg, int vseg, int ctx)
439{
440	int i, ls;
441	char *p;
442
443	vcache_flush_seg.ev_count++;
444	p = (char *)VSTOVA(vreg, vseg);	/* seg..seg+sz rather than 0..sz */
445	if (CACHEINFO.c_hwflush) {
446		ls = PAGE_SIZE;
447		i = CACHEINFO.c_totalsize >> PGSHIFT;
448		for (; --i >= 0; p += ls)
449			sta(p, ASI_HWFLUSHSEG, 0);
450	} else {
451		ls = CACHEINFO.c_linesize;
452		i = CACHEINFO.c_nlines;
453		for (; --i >= 0; p += ls)
454			sta(p, ASI_FLUSHSEG, 0);
455	}
456}
457
458/*
459 * Flush the given virtual page from the cache.
460 * (va is the actual address, and must be aligned on a page boundary.)
461 * Again we write to each cache line.
462 */
463void
464sun4_vcache_flush_page(int va, int ctx)
465{
466	int i, ls;
467	char *p;
468
469#ifdef DEBUG
470	if (va & PGOFSET)
471		panic("cache_flush_page: asked to flush misaligned va 0x%x",va);
472#endif
473
474	vcache_flush_pg.ev_count++;
475	p = (char *)va;
476	ls = CACHEINFO.c_linesize;
477	i = PAGE_SIZE >> CACHEINFO.c_l2linesize;
478	for (; --i >= 0; p += ls)
479		sta(p, ASI_FLUSHPG, 0);
480}
481
482/*
483 * Flush the given virtual page from the cache.
484 * (va is the actual address, and must be aligned on a page boundary.)
485 * This version uses hardware-assisted flush operation and just needs
486 * one write into ASI_HWFLUSHPG space to flush all cache lines.
487 */
488void
489sun4_vcache_flush_page_hw(int va, int ctx)
490{
491	char *p;
492
493#ifdef DEBUG
494	if (va & PGOFSET)
495		panic("cache_flush_page: asked to flush misaligned va 0x%x",va);
496#endif
497
498	vcache_flush_pg.ev_count++;
499	p = (char *)va;
500	sta(p, ASI_HWFLUSHPG, 0);
501}
502
503/*
504 * Flush a range of virtual addresses (in the current context).
505 * The first byte is at (base&~PGOFSET) and the last one is just
506 * before byte (base+len).
507 *
508 * We choose the best of (context,segment,page) here.
509 */
510
511#define CACHE_FLUSH_MAGIC	(CACHEINFO.c_totalsize / PAGE_SIZE)
512
513void
514sun4_cache_flush(void *base, u_int len)
515{
516	int i, ls, baseoff;
517	char *p;
518
519	if (CACHEINFO.c_vactype == VAC_NONE)
520		return;
521
522	/*
523	 * Figure out how much must be flushed.
524	 *
525	 * If we need to do CACHE_FLUSH_MAGIC pages,  we can do a segment
526	 * in the same number of loop iterations.  We can also do the whole
527	 * region. If we need to do between 2 and NSEGRG, do the region.
528	 * If we need to do two or more regions, just go ahead and do the
529	 * whole context. This might not be ideal (e.g., fsck likes to do
530	 * 65536-byte reads, which might not necessarily be aligned).
531	 *
532	 * We could try to be sneaky here and use the direct mapping
533	 * to avoid flushing things `below' the start and `above' the
534	 * ending address (rather than rounding to whole pages and
535	 * segments), but I did not want to debug that now and it is
536	 * not clear it would help much.
537	 *
538	 * (XXX the magic number 16 is now wrong, must review policy)
539	 */
540	baseoff = (int)base & PGOFSET;
541	i = (baseoff + len + PGOFSET) >> PGSHIFT;
542
543	vcache_flush_range.ev_count++;
544
545	if (__predict_true(i < CACHE_FLUSH_MAGIC)) {
546		/* cache_flush_page, for i pages */
547		p = (char *)((int)base & ~baseoff);
548		if (CACHEINFO.c_hwflush) {
549			for (; --i >= 0; p += PAGE_SIZE)
550				sta(p, ASI_HWFLUSHPG, 0);
551		} else {
552			ls = CACHEINFO.c_linesize;
553			i <<= PGSHIFT - CACHEINFO.c_l2linesize;
554			for (; --i >= 0; p += ls)
555				sta(p, ASI_FLUSHPG, 0);
556		}
557		return;
558	}
559
560	baseoff = (u_int)base & SGOFSET;
561	i = (baseoff + len + SGOFSET) >> SGSHIFT;
562	if (__predict_true(i == 1)) {
563		sun4_vcache_flush_segment(VA_VREG(base), VA_VSEG(base), 0);
564		return;
565	}
566
567	if (HASSUN4_MMU3L) {
568		baseoff = (u_int)base & RGOFSET;
569		i = (baseoff + len + RGOFSET) >> RGSHIFT;
570		if (i == 1)
571			sun4_vcache_flush_region(VA_VREG(base), 0);
572		else
573			sun4_vcache_flush_context(0);
574	} else
575		sun4_vcache_flush_context(0);
576}
577
578
579#if defined(SUN4M) || defined(SUN4D)
580#define trapoff()	do { setpsr(getpsr() & ~PSR_ET); } while(0)
581#define trapon()	do { setpsr(getpsr() | PSR_ET); } while(0)
582/*
583 * Flush the current context from the cache.
584 *
585 * This is done by writing to each cache line in the `flush context'
586 * address space.
587 */
588void
589srmmu_vcache_flush_context(int ctx)
590{
591	int i, ls, octx;
592	char *p;
593
594	vcache_flush_ctx.ev_count++;
595	p = (char *)0;	/* addresses 0..cacheinfo.c_totalsize will do fine */
596	ls = CACHEINFO.c_linesize;
597	i = CACHEINFO.c_nlines;
598	octx = getcontext4m();
599	trapoff();
600	setcontext4m(ctx);
601	for (; --i >= 0; p += ls)
602		sta(p, ASI_IDCACHELFC, 0);
603	setcontext4m(octx);
604	trapon();
605}
606
607/*
608 * Flush the given virtual region from the cache.
609 *
610 * This is also done by writing to each cache line, except that
611 * now the addresses must include the virtual region number, and
612 * we use the `flush region' space.
613 */
614void
615srmmu_vcache_flush_region(int vreg, int ctx)
616{
617	int i, ls, octx;
618	char *p;
619
620	vcache_flush_reg.ev_count++;
621	p = (char *)VRTOVA(vreg);	/* reg..reg+sz rather than 0..sz */
622	ls = CACHEINFO.c_linesize;
623	i = CACHEINFO.c_nlines;
624	octx = getcontext4m();
625	trapoff();
626	setcontext4m(ctx);
627	for (; --i >= 0; p += ls)
628		sta(p, ASI_IDCACHELFR, 0);
629	setcontext4m(octx);
630	trapon();
631}
632
633/*
634 * Flush the given virtual segment from the cache.
635 *
636 * This is also done by writing to each cache line, except that
637 * now the addresses must include the virtual segment number, and
638 * we use the `flush segment' space.
639 *
640 * Again, for hardware, we just write each page (in hw-flush space).
641 */
642void
643srmmu_vcache_flush_segment(int vreg, int vseg, int ctx)
644{
645	int i, ls, octx;
646	char *p;
647
648	vcache_flush_seg.ev_count++;
649	p = (char *)VSTOVA(vreg, vseg);	/* seg..seg+sz rather than 0..sz */
650	ls = CACHEINFO.c_linesize;
651	i = CACHEINFO.c_nlines;
652	octx = getcontext4m();
653	trapoff();
654	setcontext4m(ctx);
655	for (; --i >= 0; p += ls)
656		sta(p, ASI_IDCACHELFS, 0);
657	setcontext4m(octx);
658	trapon();
659}
660
661/*
662 * Flush the given virtual page from the cache.
663 * (va is the actual address, and must be aligned on a page boundary.)
664 * Again we write to each cache line.
665 */
666void
667srmmu_vcache_flush_page(int va, int ctx)
668{
669	int i, ls, octx;
670	char *p;
671
672#ifdef DEBUG
673	if (va & PGOFSET)
674		panic("cache_flush_page: asked to flush misaligned va 0x%x",va);
675#endif
676
677	vcache_flush_pg.ev_count++;
678	p = (char *)va;
679
680	/*
681	 * XXX - if called early during bootstrap, we don't have the cache
682	 *	 info yet. Make up a cache line size (double-word aligned)
683	 */
684	if ((ls = CACHEINFO.c_linesize) == 0)
685		ls = 8;
686	i = PAGE_SIZE;
687	octx = getcontext4m();
688	trapoff();
689	setcontext4m(ctx);
690	for (; i > 0; p += ls, i -= ls)
691		sta(p, ASI_IDCACHELFP, 0);
692#if defined(MULTIPROCESSOR)
693	/*
694	 * The page flush operation will have caused a MMU table walk
695	 * on Hypersparc because the is physically tagged. Since the pmap
696	 * functions will not always cross flush it in the MP case (because
697	 * may not be active on this CPU) we flush the TLB entry now.
698	 */
699	/*if (cpuinfo.cpu_type == CPUTYP_HS_MBUS) -- more work than it's worth */
700		sta(va | ASI_SRMMUFP_L3, ASI_SRMMUFP, 0);
701
702#endif
703	setcontext4m(octx);
704	trapon();
705}
706
707/*
708 * Flush entire cache.
709 */
710void
711srmmu_cache_flush_all(void)
712{
713
714	srmmu_vcache_flush_context(0);
715}
716
717void
718srmmu_vcache_flush_range(int va, int len, int ctx)
719{
720	int i, ls, offset;
721	char *p;
722	int octx;
723
724	/*
725	 * XXX - if called early during bootstrap, we don't have the cache
726	 *	 info yet. Make up a cache line size (double-word aligned)
727	 */
728	if ((ls = CACHEINFO.c_linesize) == 0)
729		ls = 8;
730
731	vcache_flush_range.ev_count++;
732
733	/* Compute # of cache lines covered by this range */
734	offset = va & (ls - 1);
735	i = len + offset;
736	p = (char *)(va & ~(ls - 1));
737
738	octx = getcontext4m();
739	trapoff();
740	setcontext4m(ctx);
741	for (; i > 0; p += ls, i -= ls)
742		sta(p, ASI_IDCACHELFP, 0);
743
744#if defined(MULTIPROCESSOR)
745	if (cpuinfo.cpu_type == CPUTYP_HS_MBUS) {
746		/*
747		 * See hypersparc comment in srmmu_vcache_flush_page().
748		 */
749		offset = va & PGOFSET;
750		i = (offset + len + PGOFSET) >> PGSHIFT;
751
752		va = va & ~PGOFSET;
753		for (; --i >= 0; va += PAGE_SIZE)
754			sta(va | ASI_SRMMUFP_L3, ASI_SRMMUFP, 0);
755	}
756#endif
757	setcontext4m(octx);
758	trapon();
759	return;
760}
761
762/*
763 * Flush a range of virtual addresses (in the current context).
764 *
765 * We choose the best of (context,segment,page) here.
766 */
767
768void
769srmmu_cache_flush(void *base, u_int len)
770{
771	int ctx = getcontext4m();
772	int i, baseoff;
773
774
775	/*
776	 * Figure out the most efficient way to flush.
777	 *
778	 * If we need to do CACHE_FLUSH_MAGIC pages,  we can do a segment
779	 * in the same number of loop iterations.  We can also do the whole
780	 * region. If we need to do between 2 and NSEGRG, do the region.
781	 * If we need to do two or more regions, just go ahead and do the
782	 * whole context. This might not be ideal (e.g., fsck likes to do
783	 * 65536-byte reads, which might not necessarily be aligned).
784	 *
785	 * We could try to be sneaky here and use the direct mapping
786	 * to avoid flushing things `below' the start and `above' the
787	 * ending address (rather than rounding to whole pages and
788	 * segments), but I did not want to debug that now and it is
789	 * not clear it would help much.
790	 *
791	 */
792
793	if (__predict_true(len < CACHEINFO.c_totalsize)) {
794#if defined(MULTIPROCESSOR)
795		FXCALL3(cpuinfo.sp_vcache_flush_range,
796			cpuinfo.ft_vcache_flush_range,
797			(int)base, len, ctx, CPUSET_ALL);
798#else
799		cpuinfo.sp_vcache_flush_range((int)base, len, ctx);
800#endif
801		return;
802	}
803
804	baseoff = (u_int)base & SGOFSET;
805	i = (baseoff + len + SGOFSET) >> SGSHIFT;
806	if (__predict_true(i == 1)) {
807#if defined(MULTIPROCESSOR)
808		FXCALL3(cpuinfo.sp_vcache_flush_segment,
809			cpuinfo.ft_vcache_flush_segment,
810			VA_VREG(base), VA_VSEG(base), ctx, CPUSET_ALL);
811#else
812		srmmu_vcache_flush_segment(VA_VREG(base), VA_VSEG(base), ctx);
813#endif
814		return;
815	}
816
817	baseoff = (u_int)base & RGOFSET;
818	i = (baseoff + len + RGOFSET) >> RGSHIFT;
819	while (i--) {
820#if defined(MULTIPROCESSOR)
821		FXCALL2(cpuinfo.sp_vcache_flush_region,
822		       cpuinfo.ft_vcache_flush_region,
823		       VA_VREG(base), ctx, CPUSET_ALL);
824#else
825		srmmu_vcache_flush_region(VA_VREG(base), ctx);
826#endif
827		base = ((char *)base + NBPRG);
828	}
829}
830
831int ms1_cacheflush_magic = 0;
832#define MS1_CACHEFLUSH_MAGIC	ms1_cacheflush_magic
833
834void
835ms1_cache_flush(void *base, u_int len)
836{
837
838	/*
839	 * Although physically tagged, we still need to flush the
840	 * data cache after (if we have a write-through cache) or before
841	 * (in case of write-back caches) DMA operations.
842	 */
843
844#if MS1_CACHEFLUSH_MAGIC
845	if (len <= MS1_CACHEFLUSH_MAGIC) {
846		/*
847		 * If the range to be flushed is sufficiently small
848		 * invalidate the covered cache lines by hand.
849		 *
850		 * The MicroSPARC I has a direct-mapped virtually addressed
851		 * physically tagged data cache which is organised as
852		 * 128 lines of 16 bytes. Virtual address bits [4-10]
853		 * select the cache line. The cache tags are accessed
854		 * through the standard DCACHE control space using the
855		 * same address bits as those used to select the cache
856		 * line in the virtual address.
857		 *
858		 * Note: we don't bother to compare the actual tags
859		 * since that would require looking up physical addresses.
860		 *
861		 * The format of the tags we read from ASI_DCACHE control
862		 * space is:
863		 *
864		 * 31     27 26            11 10         1 0
865		 * +--------+----------------+------------+-+
866		 * |  xxx   |    PA[26-11]   |    xxx     |V|
867		 * +--------+----------------+------------+-+
868		 *
869		 * PA: bits 11-26 of the physical address
870		 * V:  line valid bit
871		 */
872		int tagaddr = ((u_int)base & 0x7f0);
873
874		len = roundup(len, 16);
875		while (len != 0) {
876			int tag = lda(tagaddr, ASI_DCACHETAG);
877			if ((tag & 1) == 1) {
878				/* Mark this cache line invalid */
879				sta(tagaddr, ASI_DCACHETAG, 0);
880			}
881			len -= 16;
882			tagaddr = (tagaddr + 16) & 0x7f0;
883		}
884	} else
885#endif
886		/* Flush entire data cache */
887		sta(0, ASI_DCACHECLR, 0);
888}
889
890
891/*
892 * Flush entire cache.
893 */
894void
895ms1_cache_flush_all(void)
896{
897
898	/* Flash-clear both caches */
899	sta(0, ASI_ICACHECLR, 0);
900	sta(0, ASI_DCACHECLR, 0);
901}
902
903void
904hypersparc_cache_flush_all(void)
905{
906
907	srmmu_vcache_flush_context(getcontext4m());
908	/* Flush instruction cache */
909	hypersparc_pure_vcache_flush();
910}
911
912void
913cypress_cache_flush_all(void)
914{
915	extern char kernel_text[];
916
917	char *p;
918	int i, ls;
919
920	/* Fill the cache with known read-only content */
921	p = (char *)kernel_text;
922	ls = CACHEINFO.c_linesize;
923	i = CACHEINFO.c_nlines;
924	for (; --i >= 0; p += ls)
925		(*(volatile char *)p);
926}
927
928
929void
930viking_cache_flush(void *base, u_int len)
931{
932}
933
934void
935viking_pcache_flush_page(paddr_t pa, int invalidate_only)
936{
937	int set, i;
938
939	/*
940	 * The viking's on-chip data cache is 4-way set associative,
941	 * consisting of 128 sets, each holding 4 lines of 32 bytes.
942	 * Note that one 4096 byte page exactly covers all 128 sets
943	 * in the cache.
944	 */
945	if (invalidate_only) {
946		u_int pa_tag = (pa >> 12);
947		u_int tagaddr;
948		uint64_t tag;
949
950		/*
951		 * Loop over all sets and invalidate all entries tagged
952		 * with the given physical address by resetting the cache
953		 * tag in ASI_DCACHETAG control space.
954		 *
955		 * The address format for accessing a tag is:
956		 *
957		 * 31   30      27   26                  11      5 4  3 2    0
958		 * +------+-----+------+-------//--------+--------+----+-----+
959		 * | type | xxx | line |       xxx       |  set   | xx | 0   |
960		 * +------+-----+------+-------//--------+--------+----+-----+
961		 *
962		 * set:  the cache set tag to be read (0-127)
963		 * line: the line within the set (0-3)
964		 * type: 1: read set tag; 2: read physical tag
965		 *
966		 * The (type 2) tag read from this address is a 64-bit word
967		 * formatted as follows:
968		 *
969		 *          5         4         4
970		 * 63       6         8         0            23               0
971		 * +-------+-+-------+-+-------+-+-----------+----------------+
972		 * |  xxx  |V|  xxx  |D|  xxx  |S|    xxx    |    PA[35-12]   |
973		 * +-------+-+-------+-+-------+-+-----------+----------------+
974		 *
975		 * PA: bits 12-35 of the physical address
976		 * S:  line shared bit
977		 * D:  line dirty bit
978		 * V:  line valid bit
979		 */
980
981#define VIKING_DCACHETAG_S	0x0000010000000000ULL	/* line valid bit */
982#define VIKING_DCACHETAG_D	0x0001000000000000ULL	/* line dirty bit */
983#define VIKING_DCACHETAG_V	0x0100000000000000ULL	/* line shared bit */
984#define VIKING_DCACHETAG_PAMASK	0x0000000000ffffffULL	/* PA tag field */
985
986		for (set = 0; set < 128; set++) {
987			/* Set set number and access type */
988			tagaddr = (set << 5) | (2 << 30);
989
990			/* Examine the tag for each line in the set */
991			for (i = 0 ; i < 4; i++) {
992				tag = ldda(tagaddr | (i << 26), ASI_DCACHETAG);
993				/*
994				 * If this is a valid tag and the PA field
995				 * matches clear the tag.
996				 */
997				if ((tag & VIKING_DCACHETAG_PAMASK) == pa_tag &&
998				    (tag & VIKING_DCACHETAG_V) != 0)
999					stda(tagaddr | (i << 26),
1000					     ASI_DCACHETAG, 0);
1001			}
1002		}
1003
1004	} else {
1005		extern char kernel_text[];
1006
1007		/*
1008		 * Force the cache to validate its backing memory
1009		 * by displacing all cache lines with known read-only
1010		 * content from the start of kernel text.
1011		 *
1012		 * Note that this thrashes the entire cache. However,
1013		 * we currently only need to call upon this code
1014		 * once at boot time.
1015		 */
1016		for (set = 0; set < 128; set++) {
1017			int *v = (int *)(kernel_text + (set << 5));
1018
1019			/*
1020			 * We need to read (2*associativity-1) different
1021			 * locations to be sure to displace the entire set.
1022			 */
1023			i = 2 * 4 - 1;
1024			while (i--) {
1025				(*(volatile int *)v);
1026				v += 4096;
1027			}
1028		}
1029	}
1030}
1031#endif /* SUN4M || SUN4D */
1032
1033
1034#if defined(MULTIPROCESSOR)
1035/*
1036 * Cache flushing on multi-processor systems involves sending
1037 * inter-processor messages to flush the cache on each module.
1038 *
1039 * The current context of the originating processor is passed in the
1040 * message. This assumes the allocation of CPU contextses is a global
1041 * operation (remember that the actual context tables for the CPUs
1042 * are distinct).
1043 */
1044
1045void
1046smp_vcache_flush_page(int va, int ctx)
1047{
1048
1049	FXCALL2(cpuinfo.sp_vcache_flush_page, cpuinfo.ft_vcache_flush_page,
1050		va, ctx, CPUSET_ALL);
1051}
1052
1053void
1054smp_vcache_flush_segment(int vr, int vs, int ctx)
1055{
1056
1057	FXCALL3(cpuinfo.sp_vcache_flush_segment, cpuinfo.ft_vcache_flush_segment,
1058		vr, vs, ctx, CPUSET_ALL);
1059}
1060
1061void
1062smp_vcache_flush_region(int vr, int ctx)
1063{
1064
1065	FXCALL2(cpuinfo.sp_vcache_flush_region, cpuinfo.ft_vcache_flush_region,
1066		vr, ctx, CPUSET_ALL);
1067}
1068
1069void
1070smp_vcache_flush_context(int ctx)
1071{
1072
1073	FXCALL1(cpuinfo.sp_vcache_flush_context, cpuinfo.ft_vcache_flush_context,
1074		ctx, CPUSET_ALL);
1075}
1076#endif /* MULTIPROCESSOR */
1077