cache_mipsNN.c revision 331722
1/*	$NetBSD: cache_mipsNN.c,v 1.10 2005/12/24 20:07:19 perry Exp $	*/
2
3/*
4 * Copyright 2001 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Jason R. Thorpe and Simon Burge for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed for the NetBSD Project by
20 *	Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include <sys/cdefs.h>
39__FBSDID("$FreeBSD: stable/11/sys/mips/mips/cache_mipsNN.c 331722 2018-03-29 02:50:57Z eadler $");
40
41#include <sys/types.h>
42#include <sys/systm.h>
43#include <sys/param.h>
44
45#include <machine/cache.h>
46#include <machine/cache_r4k.h>
47#include <machine/cpuinfo.h>
48
49#define	round_line16(x)		(((x) + 15) & ~15)
50#define	trunc_line16(x)		((x) & ~15)
51
52#define	round_line32(x)		(((x) + 31) & ~31)
53#define	trunc_line32(x)		((x) & ~31)
54
55#define	round_line64(x)		(((x) + 63) & ~63)
56#define	trunc_line64(x)		((x) & ~63)
57
58#define	round_line128(x)	(((x) + 127) & ~127)
59#define	trunc_line128(x)	((x) & ~127)
60
61#if defined(CPU_NLM)
62static __inline void
63xlp_sync(void)
64{
65        __asm __volatile (
66	    ".set push              \n"
67	    ".set noreorder         \n"
68	    ".set mips64            \n"
69	    "dla    $8, 1f          \n"
70	    "/* jr.hb $8 */         \n"
71	    ".word 0x1000408        \n"
72	    "nop                    \n"
73	 "1: nop                    \n"
74	    ".set pop               \n"
75	    : : : "$8");
76}
77#endif
78
79#if defined(SB1250_PASS1)
80#define	SYNC	__asm volatile("sync; sync")
81#elif defined(CPU_NLM)
82#define SYNC	xlp_sync()
83#else
84#define	SYNC	__asm volatile("sync")
85#endif
86
87#if defined(CPU_CNMIPS)
88#define SYNCI  mips_sync_icache();
89#elif defined(CPU_NLM)
90#define SYNCI	xlp_sync()
91#else
92#define SYNCI
93#endif
94
95/*
96 * Exported variables for consumers like bus_dma code
97 */
98int mips_picache_linesize;
99int mips_pdcache_linesize;
100
101static int picache_size;
102static int picache_stride;
103static int picache_loopcount;
104static int picache_way_mask;
105static int pdcache_size;
106static int pdcache_stride;
107static int pdcache_loopcount;
108static int pdcache_way_mask;
109static int sdcache_size;
110static int sdcache_stride;
111static int sdcache_loopcount;
112static int sdcache_way_mask;
113
114void
115mipsNN_cache_init(struct mips_cpuinfo * cpuinfo)
116{
117	int flush_multiple_lines_per_way;
118
119	flush_multiple_lines_per_way = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize * cpuinfo->l1.ic_linesize > PAGE_SIZE;
120	if (cpuinfo->icache_virtual) {
121		/*
122		 * With a virtual Icache we don't need to flush
123		 * multiples of the page size with index ops; we just
124		 * need to flush one pages' worth.
125		 */
126		flush_multiple_lines_per_way = 0;
127	}
128
129	if (flush_multiple_lines_per_way) {
130		picache_stride = PAGE_SIZE;
131		picache_loopcount = (cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize / PAGE_SIZE) *
132		    cpuinfo->l1.ic_nways;
133	} else {
134		picache_stride = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize;
135		picache_loopcount = cpuinfo->l1.ic_nways;
136	}
137
138	if (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize < PAGE_SIZE) {
139		pdcache_stride = cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize;
140		pdcache_loopcount = cpuinfo->l1.dc_nways;
141	} else {
142		pdcache_stride = PAGE_SIZE;
143		pdcache_loopcount = (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize / PAGE_SIZE) *
144		    cpuinfo->l1.dc_nways;
145	}
146
147	mips_picache_linesize = cpuinfo->l1.ic_linesize;
148	mips_pdcache_linesize = cpuinfo->l1.dc_linesize;
149
150	picache_size = cpuinfo->l1.ic_size;
151	picache_way_mask = cpuinfo->l1.ic_nways - 1;
152	pdcache_size = cpuinfo->l1.dc_size;
153	pdcache_way_mask = cpuinfo->l1.dc_nways - 1;
154
155	sdcache_stride = cpuinfo->l2.dc_nsets * cpuinfo->l2.dc_linesize;
156	sdcache_loopcount = cpuinfo->l2.dc_nways;
157	sdcache_size = cpuinfo->l2.dc_size;
158	sdcache_way_mask = cpuinfo->l2.dc_nways - 1;
159
160#define CACHE_DEBUG
161#ifdef CACHE_DEBUG
162	printf("Cache info:\n");
163	if (cpuinfo->icache_virtual)
164		printf("  icache is virtual\n");
165	printf("  picache_stride    = %d\n", picache_stride);
166	printf("  picache_loopcount = %d\n", picache_loopcount);
167	printf("  pdcache_stride    = %d\n", pdcache_stride);
168	printf("  pdcache_loopcount = %d\n", pdcache_loopcount);
169#endif
170}
171
172void
173mipsNN_icache_sync_all_16(void)
174{
175	vm_offset_t va, eva;
176
177	va = MIPS_PHYS_TO_KSEG0(0);
178	eva = va + picache_size;
179
180	/*
181	 * Since we're hitting the whole thing, we don't have to
182	 * worry about the N different "ways".
183	 */
184
185	mips_intern_dcache_wbinv_all();
186
187	while (va < eva) {
188		cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
189		va += (32 * 16);
190	}
191
192	SYNC;
193}
194
195void
196mipsNN_icache_sync_all_32(void)
197{
198	vm_offset_t va, eva;
199
200	va = MIPS_PHYS_TO_KSEG0(0);
201	eva = va + picache_size;
202
203	/*
204	 * Since we're hitting the whole thing, we don't have to
205	 * worry about the N different "ways".
206	 */
207
208	mips_intern_dcache_wbinv_all();
209
210	while (va < eva) {
211		cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
212		va += (32 * 32);
213	}
214
215	SYNC;
216}
217
218void
219mipsNN_icache_sync_all_64(void)
220{
221	vm_offset_t va, eva;
222
223	va = MIPS_PHYS_TO_KSEG0(0);
224	eva = va + picache_size;
225
226	/*
227	 * Since we're hitting the whole thing, we don't have to
228	 * worry about the N different "ways".
229	 */
230
231	mips_intern_dcache_wbinv_all();
232
233	while (va < eva) {
234		cache_r4k_op_32lines_64(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
235		va += (32 * 64);
236	}
237
238	SYNC;
239}
240
241void
242mipsNN_icache_sync_range_16(vm_offset_t va, vm_size_t size)
243{
244	vm_offset_t eva;
245
246	eva = round_line16(va + size);
247	va = trunc_line16(va);
248
249	mips_intern_dcache_wb_range(va, (eva - va));
250
251	while ((eva - va) >= (32 * 16)) {
252		cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
253		va += (32 * 16);
254	}
255
256	while (va < eva) {
257		cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
258		va += 16;
259	}
260
261	SYNC;
262}
263
264void
265mipsNN_icache_sync_range_32(vm_offset_t va, vm_size_t size)
266{
267	vm_offset_t eva;
268
269	eva = round_line32(va + size);
270	va = trunc_line32(va);
271
272	mips_intern_dcache_wb_range(va, (eva - va));
273
274	while ((eva - va) >= (32 * 32)) {
275		cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
276		va += (32 * 32);
277	}
278
279	while (va < eva) {
280		cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
281		va += 32;
282	}
283
284	SYNC;
285}
286
287void
288mipsNN_icache_sync_range_64(vm_offset_t va, vm_size_t size)
289{
290	vm_offset_t eva;
291
292	eva = round_line64(va + size);
293	va = trunc_line64(va);
294
295	mips_intern_dcache_wb_range(va, (eva - va));
296
297	while ((eva - va) >= (32 * 64)) {
298		cache_r4k_op_32lines_64(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
299		va += (32 * 64);
300	}
301
302	while (va < eva) {
303		cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
304		va += 64;
305	}
306
307	SYNC;
308}
309
310void
311mipsNN_icache_sync_range_index_16(vm_offset_t va, vm_size_t size)
312{
313	vm_offset_t eva, tmpva;
314	int i, stride, loopcount;
315
316	/*
317	 * Since we're doing Index ops, we expect to not be able
318	 * to access the address we've been given.  So, get the
319	 * bits that determine the cache index, and make a KSEG0
320	 * address out of them.
321	 */
322	va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
323
324	eva = round_line16(va + size);
325	va = trunc_line16(va);
326
327	/*
328	 * GCC generates better code in the loops if we reference local
329	 * copies of these global variables.
330	 */
331	stride = picache_stride;
332	loopcount = picache_loopcount;
333
334	mips_intern_dcache_wbinv_range_index(va, (eva - va));
335
336	while ((eva - va) >= (8 * 16)) {
337		tmpva = va;
338		for (i = 0; i < loopcount; i++, tmpva += stride)
339			cache_r4k_op_8lines_16(tmpva,
340			    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
341		va += 8 * 16;
342	}
343
344	while (va < eva) {
345		tmpva = va;
346		for (i = 0; i < loopcount; i++, tmpva += stride)
347			cache_op_r4k_line(tmpva,
348			    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
349		va += 16;
350	}
351}
352
353void
354mipsNN_icache_sync_range_index_32(vm_offset_t va, vm_size_t size)
355{
356	vm_offset_t eva, tmpva;
357	int i, stride, loopcount;
358
359	/*
360	 * Since we're doing Index ops, we expect to not be able
361	 * to access the address we've been given.  So, get the
362	 * bits that determine the cache index, and make a KSEG0
363	 * address out of them.
364	 */
365	va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
366
367	eva = round_line32(va + size);
368	va = trunc_line32(va);
369
370	/*
371	 * GCC generates better code in the loops if we reference local
372	 * copies of these global variables.
373	 */
374	stride = picache_stride;
375	loopcount = picache_loopcount;
376
377	mips_intern_dcache_wbinv_range_index(va, (eva - va));
378
379	while ((eva - va) >= (8 * 32)) {
380		tmpva = va;
381		for (i = 0; i < loopcount; i++, tmpva += stride)
382			cache_r4k_op_8lines_32(tmpva,
383			    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
384		va += 8 * 32;
385	}
386
387	while (va < eva) {
388		tmpva = va;
389		for (i = 0; i < loopcount; i++, tmpva += stride)
390			cache_op_r4k_line(tmpva,
391			    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
392		va += 32;
393	}
394}
395
396void
397mipsNN_icache_sync_range_index_64(vm_offset_t va, vm_size_t size)
398{
399	vm_offset_t eva, tmpva;
400	int i, stride, loopcount;
401
402	/*
403	 * Since we're doing Index ops, we expect to not be able
404	 * to access the address we've been given.  So, get the
405	 * bits that determine the cache index, and make a KSEG0
406	 * address out of them.
407	 */
408	va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
409
410	eva = round_line64(va + size);
411	va = trunc_line64(va);
412
413	/*
414	 * GCC generates better code in the loops if we reference local
415	 * copies of these global variables.
416	 */
417	stride = picache_stride;
418	loopcount = picache_loopcount;
419
420	mips_intern_dcache_wbinv_range_index(va, (eva - va));
421
422	while ((eva - va) >= (8 * 64)) {
423		tmpva = va;
424		for (i = 0; i < loopcount; i++, tmpva += stride)
425			cache_r4k_op_8lines_64(tmpva,
426			    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
427		va += 8 * 64;
428	}
429
430	while (va < eva) {
431		tmpva = va;
432		for (i = 0; i < loopcount; i++, tmpva += stride)
433			cache_op_r4k_line(tmpva,
434			    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
435		va += 64;
436	}
437}
438
439void
440mipsNN_pdcache_wbinv_all_16(void)
441{
442	vm_offset_t va, eva;
443
444	va = MIPS_PHYS_TO_KSEG0(0);
445	eva = va + pdcache_size;
446
447	/*
448	 * Since we're hitting the whole thing, we don't have to
449	 * worry about the N different "ways".
450	 */
451
452	while (va < eva) {
453		cache_r4k_op_32lines_16(va,
454		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
455		va += (32 * 16);
456	}
457
458	SYNC;
459}
460
461void
462mipsNN_pdcache_wbinv_all_32(void)
463{
464	vm_offset_t va, eva;
465
466	va = MIPS_PHYS_TO_KSEG0(0);
467	eva = va + pdcache_size;
468
469	/*
470	 * Since we're hitting the whole thing, we don't have to
471	 * worry about the N different "ways".
472	 */
473
474	while (va < eva) {
475		cache_r4k_op_32lines_32(va,
476		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
477		va += (32 * 32);
478	}
479
480	SYNC;
481}
482
483void
484mipsNN_pdcache_wbinv_all_64(void)
485{
486	vm_offset_t va, eva;
487
488	va = MIPS_PHYS_TO_KSEG0(0);
489	eva = va + pdcache_size;
490
491	/*
492	 * Since we're hitting the whole thing, we don't have to
493	 * worry about the N different "ways".
494	 */
495
496	while (va < eva) {
497		cache_r4k_op_32lines_64(va,
498		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
499		va += (32 * 64);
500	}
501
502	SYNC;
503}
504
505void
506mipsNN_pdcache_wbinv_range_16(vm_offset_t va, vm_size_t size)
507{
508	vm_offset_t eva;
509
510	eva = round_line16(va + size);
511	va = trunc_line16(va);
512
513	while ((eva - va) >= (32 * 16)) {
514		cache_r4k_op_32lines_16(va,
515		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
516		va += (32 * 16);
517	}
518
519	while (va < eva) {
520		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
521		va += 16;
522	}
523
524	SYNC;
525}
526
527void
528mipsNN_pdcache_wbinv_range_32(vm_offset_t va, vm_size_t size)
529{
530	vm_offset_t eva;
531
532	eva = round_line32(va + size);
533	va = trunc_line32(va);
534
535	while ((eva - va) >= (32 * 32)) {
536		cache_r4k_op_32lines_32(va,
537		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
538		va += (32 * 32);
539	}
540
541	while (va < eva) {
542		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
543		va += 32;
544	}
545
546	SYNC;
547}
548
549void
550mipsNN_pdcache_wbinv_range_64(vm_offset_t va, vm_size_t size)
551{
552	vm_offset_t eva;
553
554	eva = round_line64(va + size);
555	va = trunc_line64(va);
556
557	while ((eva - va) >= (32 * 64)) {
558		cache_r4k_op_32lines_64(va,
559		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
560		va += (32 * 64);
561	}
562
563	while (va < eva) {
564		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
565		va += 64;
566	}
567
568	SYNC;
569}
570
571void
572mipsNN_pdcache_wbinv_range_index_16(vm_offset_t va, vm_size_t size)
573{
574	vm_offset_t eva, tmpva;
575	int i, stride, loopcount;
576
577	/*
578	 * Since we're doing Index ops, we expect to not be able
579	 * to access the address we've been given.  So, get the
580	 * bits that determine the cache index, and make a KSEG0
581	 * address out of them.
582	 */
583	va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
584
585	eva = round_line16(va + size);
586	va = trunc_line16(va);
587
588	/*
589	 * GCC generates better code in the loops if we reference local
590	 * copies of these global variables.
591	 */
592	stride = pdcache_stride;
593	loopcount = pdcache_loopcount;
594
595	while ((eva - va) >= (8 * 16)) {
596		tmpva = va;
597		for (i = 0; i < loopcount; i++, tmpva += stride)
598			cache_r4k_op_8lines_16(tmpva,
599			    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
600		va += 8 * 16;
601	}
602
603	while (va < eva) {
604		tmpva = va;
605		for (i = 0; i < loopcount; i++, tmpva += stride)
606			cache_op_r4k_line(tmpva,
607			    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
608		va += 16;
609	}
610}
611
612void
613mipsNN_pdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size)
614{
615	vm_offset_t eva, tmpva;
616	int i, stride, loopcount;
617
618	/*
619	 * Since we're doing Index ops, we expect to not be able
620	 * to access the address we've been given.  So, get the
621	 * bits that determine the cache index, and make a KSEG0
622	 * address out of them.
623	 */
624	va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
625
626	eva = round_line32(va + size);
627	va = trunc_line32(va);
628
629	/*
630	 * GCC generates better code in the loops if we reference local
631	 * copies of these global variables.
632	 */
633	stride = pdcache_stride;
634	loopcount = pdcache_loopcount;
635
636	while ((eva - va) >= (8 * 32)) {
637		tmpva = va;
638		for (i = 0; i < loopcount; i++, tmpva += stride)
639			cache_r4k_op_8lines_32(tmpva,
640			    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
641		va += 8 * 32;
642	}
643
644	while (va < eva) {
645		tmpva = va;
646		for (i = 0; i < loopcount; i++, tmpva += stride)
647			cache_op_r4k_line(tmpva,
648			    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
649		va += 32;
650	}
651}
652
653void
654mipsNN_pdcache_wbinv_range_index_64(vm_offset_t va, vm_size_t size)
655{
656	vm_offset_t eva, tmpva;
657	int i, stride, loopcount;
658
659	/*
660	 * Since we're doing Index ops, we expect to not be able
661	 * to access the address we've been given.  So, get the
662	 * bits that determine the cache index, and make a KSEG0
663	 * address out of them.
664	 */
665	va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
666
667	eva = round_line64(va + size);
668	va = trunc_line64(va);
669
670	/*
671	 * GCC generates better code in the loops if we reference local
672	 * copies of these global variables.
673	 */
674	stride = pdcache_stride;
675	loopcount = pdcache_loopcount;
676
677	while ((eva - va) >= (8 * 64)) {
678		tmpva = va;
679		for (i = 0; i < loopcount; i++, tmpva += stride)
680			cache_r4k_op_8lines_64(tmpva,
681			    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
682		va += 8 * 64;
683	}
684
685	while (va < eva) {
686		tmpva = va;
687		for (i = 0; i < loopcount; i++, tmpva += stride)
688			cache_op_r4k_line(tmpva,
689			    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
690		va += 64;
691	}
692}
693
694void
695mipsNN_pdcache_inv_range_16(vm_offset_t va, vm_size_t size)
696{
697	vm_offset_t eva;
698
699	eva = round_line16(va + size);
700	va = trunc_line16(va);
701
702	while ((eva - va) >= (32 * 16)) {
703		cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
704		va += (32 * 16);
705	}
706
707	while (va < eva) {
708		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
709		va += 16;
710	}
711
712	SYNC;
713}
714
715void
716mipsNN_pdcache_inv_range_32(vm_offset_t va, vm_size_t size)
717{
718	vm_offset_t eva;
719
720	eva = round_line32(va + size);
721	va = trunc_line32(va);
722
723	while ((eva - va) >= (32 * 32)) {
724		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
725		va += (32 * 32);
726	}
727
728	while (va < eva) {
729		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
730		va += 32;
731	}
732
733	SYNC;
734}
735
736void
737mipsNN_pdcache_inv_range_64(vm_offset_t va, vm_size_t size)
738{
739	vm_offset_t eva;
740
741	eva = round_line64(va + size);
742	va = trunc_line64(va);
743
744	while ((eva - va) >= (32 * 64)) {
745		cache_r4k_op_32lines_64(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
746		va += (32 * 64);
747	}
748
749	while (va < eva) {
750		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
751		va += 64;
752	}
753
754	SYNC;
755}
756
757void
758mipsNN_pdcache_wb_range_16(vm_offset_t va, vm_size_t size)
759{
760	vm_offset_t eva;
761
762	eva = round_line16(va + size);
763	va = trunc_line16(va);
764
765	while ((eva - va) >= (32 * 16)) {
766		cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
767		va += (32 * 16);
768	}
769
770	while (va < eva) {
771		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
772		va += 16;
773	}
774
775	SYNC;
776}
777
778void
779mipsNN_pdcache_wb_range_32(vm_offset_t va, vm_size_t size)
780{
781	vm_offset_t eva;
782
783	eva = round_line32(va + size);
784	va = trunc_line32(va);
785
786	while ((eva - va) >= (32 * 32)) {
787		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
788		va += (32 * 32);
789	}
790
791	while (va < eva) {
792		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
793		va += 32;
794	}
795
796	SYNC;
797}
798
799void
800mipsNN_pdcache_wb_range_64(vm_offset_t va, vm_size_t size)
801{
802	vm_offset_t eva;
803
804	eva = round_line64(va + size);
805	va = trunc_line64(va);
806
807	while ((eva - va) >= (32 * 64)) {
808		cache_r4k_op_32lines_64(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
809		va += (32 * 64);
810	}
811
812	while (va < eva) {
813		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
814		va += 64;
815	}
816
817	SYNC;
818}
819
820#ifdef CPU_CNMIPS
821
822void
823mipsNN_icache_sync_all_128(void)
824{
825        SYNCI
826}
827
828void
829mipsNN_icache_sync_range_128(vm_offset_t va, vm_size_t size)
830{
831	SYNC;
832}
833
834void
835mipsNN_icache_sync_range_index_128(vm_offset_t va, vm_size_t size)
836{
837}
838
839
840void
841mipsNN_pdcache_wbinv_all_128(void)
842{
843}
844
845
846void
847mipsNN_pdcache_wbinv_range_128(vm_offset_t va, vm_size_t size)
848{
849	SYNC;
850}
851
852void
853mipsNN_pdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size)
854{
855}
856
857void
858mipsNN_pdcache_inv_range_128(vm_offset_t va, vm_size_t size)
859{
860}
861
862void
863mipsNN_pdcache_wb_range_128(vm_offset_t va, vm_size_t size)
864{
865	SYNC;
866}
867
868#else
869
870void
871mipsNN_icache_sync_all_128(void)
872{
873	vm_offset_t va, eva;
874
875	va = MIPS_PHYS_TO_KSEG0(0);
876	eva = va + picache_size;
877
878	/*
879	 * Since we're hitting the whole thing, we don't have to
880	 * worry about the N different "ways".
881	 */
882
883	mips_intern_dcache_wbinv_all();
884
885	while (va < eva) {
886		cache_r4k_op_32lines_128(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
887		va += (32 * 128);
888	}
889
890	SYNC;
891}
892
893void
894mipsNN_icache_sync_range_128(vm_offset_t va, vm_size_t size)
895{
896	vm_offset_t eva;
897
898	eva = round_line128(va + size);
899	va = trunc_line128(va);
900
901	mips_intern_dcache_wb_range(va, (eva - va));
902
903	while ((eva - va) >= (32 * 128)) {
904		cache_r4k_op_32lines_128(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
905		va += (32 * 128);
906	}
907
908	while (va < eva) {
909		cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
910		va += 128;
911	}
912
913	SYNC;
914}
915
916void
917mipsNN_icache_sync_range_index_128(vm_offset_t va, vm_size_t size)
918{
919	vm_offset_t eva, tmpva;
920	int i, stride, loopcount;
921
922	/*
923	 * Since we're doing Index ops, we expect to not be able
924	 * to access the address we've been given.  So, get the
925	 * bits that determine the cache index, and make a KSEG0
926	 * address out of them.
927	 */
928	va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
929
930	eva = round_line128(va + size);
931	va = trunc_line128(va);
932
933	/*
934	 * GCC generates better code in the loops if we reference local
935	 * copies of these global variables.
936	 */
937	stride = picache_stride;
938	loopcount = picache_loopcount;
939
940	mips_intern_dcache_wbinv_range_index(va, (eva - va));
941
942	while ((eva - va) >= (32 * 128)) {
943		tmpva = va;
944		for (i = 0; i < loopcount; i++, tmpva += stride)
945			cache_r4k_op_32lines_128(tmpva,
946			    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
947		va += 32 * 128;
948	}
949
950	while (va < eva) {
951		tmpva = va;
952		for (i = 0; i < loopcount; i++, tmpva += stride)
953			cache_op_r4k_line(tmpva,
954			    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
955		va += 128;
956	}
957}
958
959void
960mipsNN_pdcache_wbinv_all_128(void)
961{
962	vm_offset_t va, eva;
963
964	va = MIPS_PHYS_TO_KSEG0(0);
965	eva = va + pdcache_size;
966
967	/*
968	 * Since we're hitting the whole thing, we don't have to
969	 * worry about the N different "ways".
970	 */
971
972	while (va < eva) {
973		cache_r4k_op_32lines_128(va,
974		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
975		va += (32 * 128);
976	}
977
978	SYNC;
979}
980
981
982void
983mipsNN_pdcache_wbinv_range_128(vm_offset_t va, vm_size_t size)
984{
985	vm_offset_t eva;
986
987	eva = round_line128(va + size);
988	va = trunc_line128(va);
989
990	while ((eva - va) >= (32 * 128)) {
991		cache_r4k_op_32lines_128(va,
992		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
993		va += (32 * 128);
994	}
995
996	while (va < eva) {
997		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
998		va += 128;
999	}
1000
1001	SYNC;
1002}
1003
1004void
1005mipsNN_pdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size)
1006{
1007	vm_offset_t eva, tmpva;
1008	int i, stride, loopcount;
1009
1010	/*
1011	 * Since we're doing Index ops, we expect to not be able
1012	 * to access the address we've been given.  So, get the
1013	 * bits that determine the cache index, and make a KSEG0
1014	 * address out of them.
1015	 */
1016	va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
1017
1018	eva = round_line128(va + size);
1019	va = trunc_line128(va);
1020
1021	/*
1022	 * GCC generates better code in the loops if we reference local
1023	 * copies of these global variables.
1024	 */
1025	stride = pdcache_stride;
1026	loopcount = pdcache_loopcount;
1027
1028	while ((eva - va) >= (32 * 128)) {
1029		tmpva = va;
1030		for (i = 0; i < loopcount; i++, tmpva += stride)
1031			cache_r4k_op_32lines_128(tmpva,
1032			    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
1033		va += 32 * 128;
1034	}
1035
1036	while (va < eva) {
1037		tmpva = va;
1038		for (i = 0; i < loopcount; i++, tmpva += stride)
1039			cache_op_r4k_line(tmpva,
1040			    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
1041		va += 128;
1042	}
1043}
1044
1045void
1046mipsNN_pdcache_inv_range_128(vm_offset_t va, vm_size_t size)
1047{
1048	vm_offset_t eva;
1049
1050	eva = round_line128(va + size);
1051	va = trunc_line128(va);
1052
1053	while ((eva - va) >= (32 * 128)) {
1054		cache_r4k_op_32lines_128(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
1055		va += (32 * 128);
1056	}
1057
1058	while (va < eva) {
1059		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
1060		va += 128;
1061	}
1062
1063	SYNC;
1064}
1065
1066void
1067mipsNN_pdcache_wb_range_128(vm_offset_t va, vm_size_t size)
1068{
1069	vm_offset_t eva;
1070
1071	eva = round_line128(va + size);
1072	va = trunc_line128(va);
1073
1074	while ((eva - va) >= (32 * 128)) {
1075		cache_r4k_op_32lines_128(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
1076		va += (32 * 128);
1077	}
1078
1079	while (va < eva) {
1080		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
1081		va += 128;
1082	}
1083
1084	SYNC;
1085}
1086
1087#endif
1088
1089void
1090mipsNN_sdcache_wbinv_all_32(void)
1091{
1092	vm_offset_t va = MIPS_PHYS_TO_KSEG0(0);
1093	vm_offset_t eva = va + sdcache_size;
1094
1095	while (va < eva) {
1096		cache_r4k_op_32lines_32(va,
1097		    CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1098		va += (32 * 32);
1099	}
1100}
1101
1102void
1103mipsNN_sdcache_wbinv_all_64(void)
1104{
1105	vm_offset_t va = MIPS_PHYS_TO_KSEG0(0);
1106	vm_offset_t eva = va + sdcache_size;
1107
1108	while (va < eva) {
1109		cache_r4k_op_32lines_64(va,
1110		    CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1111		va += (32 * 64);
1112	}
1113}
1114
1115void
1116mipsNN_sdcache_wbinv_range_32(vm_offset_t va, vm_size_t size)
1117{
1118	vm_offset_t eva = round_line32(va + size);
1119
1120	va = trunc_line32(va);
1121
1122	while ((eva - va) >= (32 * 32)) {
1123		cache_r4k_op_32lines_32(va,
1124		    CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1125		va += (32 * 32);
1126	}
1127
1128	while (va < eva) {
1129		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1130		va += 32;
1131	}
1132}
1133
1134void
1135mipsNN_sdcache_wbinv_range_64(vm_offset_t va, vm_size_t size)
1136{
1137	vm_offset_t eva = round_line64(va + size);
1138
1139	va = trunc_line64(va);
1140
1141	while ((eva - va) >= (32 * 64)) {
1142		cache_r4k_op_32lines_64(va,
1143		    CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1144		va += (32 * 64);
1145	}
1146
1147	while (va < eva) {
1148		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1149		va += 64;
1150	}
1151}
1152
1153void
1154mipsNN_sdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size)
1155{
1156	vm_offset_t eva;
1157
1158	/*
1159	 * Since we're doing Index ops, we expect to not be able
1160	 * to access the address we've been given.  So, get the
1161	 * bits that determine the cache index, and make a KSEG0
1162	 * address out of them.
1163	 */
1164	va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1));
1165
1166	eva = round_line32(va + size);
1167	va = trunc_line32(va);
1168
1169	while ((eva - va) >= (32 * 32)) {
1170		cache_r4k_op_32lines_32(va,
1171		    CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1172		va += (32 * 32);
1173	}
1174
1175	while (va < eva) {
1176		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1177		va += 32;
1178	}
1179}
1180
1181void
1182mipsNN_sdcache_wbinv_range_index_64(vm_offset_t va, vm_size_t size)
1183{
1184	vm_offset_t eva;
1185
1186	/*
1187	 * Since we're doing Index ops, we expect to not be able
1188	 * to access the address we've been given.  So, get the
1189	 * bits that determine the cache index, and make a KSEG0
1190	 * address out of them.
1191	 */
1192	va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1));
1193
1194	eva = round_line64(va + size);
1195	va = trunc_line64(va);
1196
1197	while ((eva - va) >= (32 * 64)) {
1198		cache_r4k_op_32lines_64(va,
1199		    CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1200		va += (32 * 64);
1201	}
1202
1203	while (va < eva) {
1204		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1205		va += 64;
1206	}
1207}
1208
1209void
1210mipsNN_sdcache_inv_range_32(vm_offset_t va, vm_size_t size)
1211{
1212	vm_offset_t eva = round_line32(va + size);
1213
1214	va = trunc_line32(va);
1215
1216	while ((eva - va) >= (32 * 32)) {
1217		cache_r4k_op_32lines_32(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1218		va += (32 * 32);
1219	}
1220
1221	while (va < eva) {
1222		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1223		va += 32;
1224	}
1225}
1226
1227void
1228mipsNN_sdcache_inv_range_64(vm_offset_t va, vm_size_t size)
1229{
1230	vm_offset_t eva = round_line64(va + size);
1231
1232	va = trunc_line64(va);
1233
1234	while ((eva - va) >= (32 * 64)) {
1235		cache_r4k_op_32lines_64(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1236		va += (32 * 64);
1237	}
1238
1239	while (va < eva) {
1240		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1241		va += 64;
1242	}
1243}
1244
1245void
1246mipsNN_sdcache_wb_range_32(vm_offset_t va, vm_size_t size)
1247{
1248	vm_offset_t eva = round_line32(va + size);
1249
1250	va = trunc_line32(va);
1251
1252	while ((eva - va) >= (32 * 32)) {
1253		cache_r4k_op_32lines_32(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1254		va += (32 * 32);
1255	}
1256
1257	while (va < eva) {
1258		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1259		va += 32;
1260	}
1261}
1262
1263void
1264mipsNN_sdcache_wb_range_64(vm_offset_t va, vm_size_t size)
1265{
1266	vm_offset_t eva = round_line64(va + size);
1267
1268	va = trunc_line64(va);
1269
1270	while ((eva - va) >= (32 * 64)) {
1271		cache_r4k_op_32lines_64(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1272		va += (32 * 64);
1273	}
1274
1275	while (va < eva) {
1276		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1277		va += 64;
1278	}
1279}
1280
1281void
1282mipsNN_sdcache_wbinv_all_128(void)
1283{
1284	vm_offset_t va = MIPS_PHYS_TO_KSEG0(0);
1285	vm_offset_t eva = va + sdcache_size;
1286
1287	while (va < eva) {
1288		cache_r4k_op_32lines_128(va,
1289		    CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1290		va += (32 * 128);
1291	}
1292}
1293
1294void
1295mipsNN_sdcache_wbinv_range_128(vm_offset_t va, vm_size_t size)
1296{
1297	vm_offset_t eva = round_line128(va + size);
1298
1299	va = trunc_line128(va);
1300
1301	while ((eva - va) >= (32 * 128)) {
1302		cache_r4k_op_32lines_128(va,
1303		    CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1304		va += (32 * 128);
1305	}
1306
1307	while (va < eva) {
1308		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
1309		va += 128;
1310	}
1311}
1312
1313void
1314mipsNN_sdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size)
1315{
1316	vm_offset_t eva;
1317
1318	/*
1319	 * Since we're doing Index ops, we expect to not be able
1320	 * to access the address we've been given.  So, get the
1321	 * bits that determine the cache index, and make a KSEG0
1322	 * address out of them.
1323	 */
1324	va = MIPS_PHYS_TO_KSEG0(va & (sdcache_size - 1));
1325
1326	eva = round_line128(va + size);
1327	va = trunc_line128(va);
1328
1329	while ((eva - va) >= (32 * 128)) {
1330		cache_r4k_op_32lines_128(va,
1331		    CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1332		va += (32 * 128);
1333	}
1334
1335	while (va < eva) {
1336		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
1337		va += 128;
1338	}
1339}
1340
1341void
1342mipsNN_sdcache_inv_range_128(vm_offset_t va, vm_size_t size)
1343{
1344	vm_offset_t eva = round_line128(va + size);
1345
1346	va = trunc_line128(va);
1347
1348	while ((eva - va) >= (32 * 128)) {
1349		cache_r4k_op_32lines_128(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1350		va += (32 * 128);
1351	}
1352
1353	while (va < eva) {
1354		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
1355		va += 128;
1356	}
1357}
1358
1359void
1360mipsNN_sdcache_wb_range_128(vm_offset_t va, vm_size_t size)
1361{
1362	vm_offset_t eva = round_line128(va + size);
1363
1364	va = trunc_line128(va);
1365
1366	while ((eva - va) >= (32 * 128)) {
1367		cache_r4k_op_32lines_128(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1368		va += (32 * 128);
1369	}
1370
1371	while (va < eva) {
1372		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB);
1373		va += 128;
1374	}
1375}
1376