1/*	$OpenBSD: m8820x_machdep.c,v 1.62 2017/05/29 14:19:50 mpi Exp $	*/
2/*
3 * Copyright (c) 2004, 2007, 2010, 2011, 2013, Miodrag Vallat.
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17/*
18 * Copyright (c) 2001 Steve Murphree, Jr.
19 * Copyright (c) 1996 Nivas Madhur
20 * All rights reserved.
21 *
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
24 * are met:
25 * 1. Redistributions of source code must retain the above copyright
26 *    notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 *    notice, this list of conditions and the following disclaimer in the
29 *    documentation and/or other materials provided with the distribution.
30 * 3. All advertising materials mentioning features or use of this software
31 *    must display the following acknowledgement:
32 *      This product includes software developed by Nivas Madhur.
33 * 4. The name of the author may not be used to endorse or promote products
34 *    derived from this software without specific prior written permission
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
37 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
39 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
40 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
42 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
43 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
44 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
45 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 *
47 */
48/*
49 * Mach Operating System
50 * Copyright (c) 1993-1991 Carnegie Mellon University
51 * Copyright (c) 1991 OMRON Corporation
52 * All Rights Reserved.
53 *
54 * Permission to use, copy, modify and distribute this software and its
55 * documentation is hereby granted, provided that both the copyright
56 * notice and this permission notice appear in all copies of the
57 * software, derivative works or modified versions, and any portions
58 * thereof, and that both notices appear in supporting documentation.
59 *
60 * CARNEGIE MELLON AND OMRON ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS IS"
61 * CONDITION.  CARNEGIE MELLON AND OMRON DISCLAIM ANY LIABILITY OF ANY KIND
62 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
63 *
64 * Carnegie Mellon requests users of this software to return to
65 *
66 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
67 *  School of Computer Science
68 *  Carnegie Mellon University
69 *  Pittsburgh PA 15213-3890
70 *
71 * any improvements or extensions that they make and grant Carnegie the
72 * rights to redistribute these changes.
73 */
74
75#include <sys/param.h>
76#include <sys/systm.h>
77
78#include <uvm/uvm_extern.h>
79
80#include <machine/asm_macro.h>
81#include <machine/cmmu.h>
82#include <machine/cpu.h>
83#include <machine/m8820x.h>
84#include <machine/psl.h>
85
86extern	void m8820x_zeropage(vaddr_t);
87extern	void m8820x_copypage(vaddr_t, vaddr_t);
88
89cpuid_t	m8820x_init(void);
90void	m8820x_batc_setup(cpuid_t, apr_t);
91void	m8820x_cpu_configuration_print(int);
92void	m8820x_shutdown(void);
93apr_t	m8820x_apr_cmode(void);
94apr_t	m8820x_pte_cmode(void);
95void	m8820x_set_sapr(apr_t);
96void	m8820x_set_uapr(apr_t);
97void	m8820x_tlbis(cpuid_t, vaddr_t, pt_entry_t);
98void	m8820x_tlbiu(cpuid_t, vaddr_t, pt_entry_t);
99void	m8820x_tlbia(cpuid_t);
100void	m8820x_cache_wbinv(cpuid_t, paddr_t, psize_t);
101void	m8820x_dcache_wb(cpuid_t, paddr_t, psize_t);
102void	m8820x_icache_inv(cpuid_t, paddr_t, psize_t);
103void	m8820x_dma_cachectl(paddr_t, psize_t, int);
104void	m8820x_dma_cachectl_local(paddr_t, psize_t, int);
105void	m8820x_initialize_cpu(cpuid_t);
106
107const struct cmmu_p cmmu8820x = {
108	m8820x_init,
109	m8820x_batc_setup,
110	m8820x_setup_board_config,
111	m8820x_cpu_configuration_print,
112	m8820x_shutdown,
113	m8820x_cpu_number,
114	m8820x_apr_cmode,
115	m8820x_pte_cmode,
116	m8820x_set_sapr,
117	m8820x_set_uapr,
118	m8820x_tlbis,
119	m8820x_tlbiu,
120	m8820x_tlbia,
121	m8820x_cache_wbinv,
122	m8820x_dcache_wb,
123	m8820x_icache_inv,
124	m8820x_dma_cachectl,
125#ifdef MULTIPROCESSOR
126	m8820x_dma_cachectl_local,
127	m8820x_initialize_cpu,
128#endif
129};
130
131/*
132 * Systems with more than 2 CMMUs per CPU use split schemes, which sometimes
133 * are programmable (well, no more than having a few hardwired choices).
134 *
135 * The following schemes are available on MVME188 boards:
136 * - split on A12 address bit (A14 for 88204)
137 * - split on supervisor/user access
138 * - split on SRAM/non-SRAM addresses, with either supervisor-only or all
139 *   access to SRAM.
140 *
141 * MVME188 configuration 6, with 4 CMMUs par CPU, also forces a split on
142 * A14 address bit (A16 for 88204).
143 *
144 * Under OpenBSD, we will only split on A12 and A14 address bits, since we
145 * do not want to waste CMMU resources on the SRAM, and user/supervisor
146 * splits seem less efficient.
147 *
148 * The really nasty part of this choice is in the exception handling code,
149 * when it needs to get error information from up to 4 CMMUs. See eh.S for
150 * the gory details.
151 */
152
153struct m8820x_cmmu m8820x_cmmu[MAX_CMMUS]
154    __attribute__ ((__section__(".rodata")));
155u_int max_cmmus
156    __attribute__ ((__section__(".rodata")));
157u_int cmmu_shift
158    __attribute__ ((__section__(".rodata")));
159
160/* local prototypes */
161void	m8820x_cmmu_configuration_print(int, int);
162void	m8820x_cmmu_set_reg(int, u_int, int);
163void	m8820x_cmmu_set_reg_if_mode(int, u_int, int, int);
164void	m8820x_cmmu_set_cmd(u_int, int, vaddr_t);
165void	m8820x_cmmu_set_cmd_if_addr(u_int, int, vaddr_t);
166void	m8820x_cmmu_set_cmd_if_mode(u_int, int, vaddr_t, int);
167void	m8820x_cmmu_wait(int);
168void	m8820x_cmmu_wb_locked(int, paddr_t, psize_t);
169void	m8820x_cmmu_wbinv_locked(int, paddr_t, psize_t);
170void	m8820x_cmmu_inv_locked(int, paddr_t, psize_t);
171#if defined(__luna88k__) && !defined(MULTIPROCESSOR)
172void	m8820x_enable_other_cmmu_cache(void);
173#endif
174
175static inline
176void	m8820x_dbatc_set(cpuid_t, uint, batc_t);
177static inline
178void	m8820x_ibatc_set(cpuid_t, uint, batc_t);
179
180/* Flags passed to m8820x_cmmu_set_*() */
181#define MODE_VAL		0x01
182#define ADDR_VAL		0x02
183
184/*
185 * Helper functions to poke values into the appropriate CMMU registers.
186 */
187
188void
189m8820x_cmmu_set_reg(int reg, u_int val, int cpu)
190{
191	struct m8820x_cmmu *cmmu;
192	int mmu, cnt;
193
194	mmu = cpu << cmmu_shift;
195	cmmu = m8820x_cmmu + mmu;
196
197	/*
198	 * We scan all CMMUs to find the matching ones and store the
199	 * values there.
200	 */
201	for (cnt = 1 << cmmu_shift; cnt != 0; cnt--, mmu++, cmmu++) {
202#ifdef M88200_HAS_ASYMMETRICAL_ASSOCIATION
203		if (cmmu->cmmu_regs == NULL)
204			continue;
205#endif
206		cmmu->cmmu_regs[reg] = val;
207	}
208}
209
210void
211m8820x_cmmu_set_reg_if_mode(int reg, u_int val, int cpu, int mode)
212{
213	struct m8820x_cmmu *cmmu;
214	int mmu, cnt;
215
216	mmu = cpu << cmmu_shift;
217	cmmu = m8820x_cmmu + mmu;
218
219	/*
220	 * We scan all CMMUs to find the matching ones and store the
221	 * values there.
222	 */
223	for (cnt = 1 << cmmu_shift; cnt != 0; cnt--, mmu++, cmmu++) {
224#ifdef M88200_HAS_ASYMMETRICAL_ASSOCIATION
225		if (cmmu->cmmu_regs == NULL)
226			continue;
227#endif
228		if (CMMU_MODE(mmu) != mode)
229			continue;
230		cmmu->cmmu_regs[reg] = val;
231	}
232}
233
234void
235m8820x_cmmu_set_cmd(u_int cmd, int cpu, vaddr_t addr)
236{
237	struct m8820x_cmmu *cmmu;
238	int mmu, cnt;
239
240	mmu = cpu << cmmu_shift;
241	cmmu = m8820x_cmmu + mmu;
242
243	/*
244	 * We scan all CMMUs to find the matching ones and store the
245	 * values there.
246	 */
247	for (cnt = 1 << cmmu_shift; cnt != 0; cnt--, mmu++, cmmu++) {
248#ifdef M88200_HAS_ASYMMETRICAL_ASSOCIATION
249		if (cmmu->cmmu_regs == NULL)
250			continue;
251#endif
252		cmmu->cmmu_regs[CMMU_SAR] = addr;
253		cmmu->cmmu_regs[CMMU_SCR] = cmd;
254	}
255}
256
257void
258m8820x_cmmu_set_cmd_if_mode(u_int cmd, int cpu, vaddr_t addr, int mode)
259{
260	struct m8820x_cmmu *cmmu;
261	int mmu, cnt;
262
263	mmu = cpu << cmmu_shift;
264	cmmu = m8820x_cmmu + mmu;
265
266	/*
267	 * We scan all CMMUs to find the matching ones and store the
268	 * values there.
269	 */
270	for (cnt = 1 << cmmu_shift; cnt != 0; cnt--, mmu++, cmmu++) {
271#ifdef M88200_HAS_ASYMMETRICAL_ASSOCIATION
272		if (cmmu->cmmu_regs == NULL)
273			continue;
274#endif
275		if (CMMU_MODE(mmu) != mode)
276			continue;
277		cmmu->cmmu_regs[CMMU_SAR] = addr;
278		cmmu->cmmu_regs[CMMU_SCR] = cmd;
279	}
280}
281
282#ifdef M88200_HAS_SPLIT_ADDRESS
283void
284m8820x_cmmu_set_cmd_if_addr(u_int cmd, int cpu, vaddr_t addr)
285{
286	struct m8820x_cmmu *cmmu;
287	int mmu, cnt;
288
289	mmu = cpu << cmmu_shift;
290	cmmu = m8820x_cmmu + mmu;
291
292	/*
293	 * We scan all CMMUs to find the matching ones and store the
294	 * values there.
295	 */
296	for (cnt = 1 << cmmu_shift; cnt != 0; cnt--, mmu++, cmmu++) {
297#ifdef M88200_HAS_ASYMMETRICAL_ASSOCIATION
298		if (cmmu->cmmu_regs == NULL)
299			continue;
300#endif
301		if (cmmu->cmmu_addr_mask != 0) {
302			if ((addr & cmmu->cmmu_addr_mask) != cmmu->cmmu_addr)
303				continue;
304		}
305		cmmu->cmmu_regs[CMMU_SAR] = addr;
306		cmmu->cmmu_regs[CMMU_SCR] = cmd;
307	}
308}
309#else
310#define	m8820x_cmmu_set_cmd_if_addr	m8820x_cmmu_set_cmd
311#endif
312
313/*
314 * Force a read from the CMMU status register, thereby forcing execution to
315 * stop until all pending CMMU operations are finished.
316 * This is used by the various cache invalidation functions.
317 */
318void
319m8820x_cmmu_wait(int cpu)
320{
321	struct m8820x_cmmu *cmmu;
322	int mmu, cnt;
323
324	mmu = cpu << cmmu_shift;
325	cmmu = m8820x_cmmu + mmu;
326
327	/*
328	 * We scan all related CMMUs and read their status register.
329	 */
330	for (cnt = 1 << cmmu_shift; cnt != 0; cnt--, mmu++, cmmu++) {
331#ifdef M88200_HAS_ASYMMETRICAL_ASSOCIATION
332		if (cmmu->cmmu_regs == NULL)
333			continue;
334#endif
335#ifdef DEBUG
336		if (cmmu->cmmu_regs[CMMU_SSR] & CMMU_SSR_BE) {
337			panic("cache flush failed!");
338		}
339#else
340		(void)cmmu->cmmu_regs[CMMU_SSR];
341#endif
342	}
343}
344
345/*
346 * BATC routines
347 */
348
349static inline
350void
351m8820x_dbatc_set(cpuid_t cpu, uint batcno, batc_t batc)
352{
353	m8820x_cmmu_set_reg_if_mode(CMMU_BWP(batcno), batc, cpu, DATA_CMMU);
354}
355
356static inline
357void
358m8820x_ibatc_set(cpuid_t cpu, uint batcno, batc_t batc)
359{
360	m8820x_cmmu_set_reg_if_mode(CMMU_BWP(batcno), batc, cpu, INST_CMMU);
361}
362
363void
364m8820x_batc_setup(cpuid_t cpu, apr_t cmode)
365{
366	paddr_t s_text, e_text, s_data, e_data,	e_rodata;
367	uint batcno;
368	batc_t batc, proto;
369	extern caddr_t kernelstart;
370	extern caddr_t etext;
371	extern caddr_t erodata;
372	extern caddr_t end;
373
374	proto = BATC_SO | BATC_V;
375	if (cmode & CACHE_WT)
376		proto |= BATC_WT;
377	if (cmode & CACHE_INH)
378		proto |= BATC_INH;
379
380	s_text = round_batc((paddr_t)&kernelstart);
381	s_data = e_text = round_batc((paddr_t)&etext);
382	e_rodata = round_batc((paddr_t)&erodata);
383#if 0 /* not until pmap makes sure kvm starts on a BATC boundary */
384	e_data = round_batc((paddr_t)&end);
385#else
386	e_data = trunc_batc((paddr_t)&end);
387#endif
388
389	/* map s_text..e_text with IBATC */
390	batcno = 0;
391	while (s_text != e_text) {
392		batc = (s_text >> BATC_BLKSHIFT) << BATC_VSHIFT;
393		batc |= (s_text >> BATC_BLKSHIFT) << BATC_PSHIFT;
394		batc |= proto;
395#ifdef DEBUG
396		printf("cpu%d ibat%d %p(%08x)\n", cpu, batcno, s_text, batc);
397#endif
398		global_ibatc[batcno] = batc;
399		s_text += BATC_BLKBYTES;
400		if (++batcno == BATC_MAX)
401			break;
402	}
403
404	/* map e_text..e_data with DBATC */
405	if (cmode & CACHE_GLOBAL)
406		proto |= BATC_GLOBAL;
407	batcno = 0;
408	while (s_data != e_data) {
409		batc = (s_data >> BATC_BLKSHIFT) << BATC_VSHIFT;
410		batc |= (s_data >> BATC_BLKSHIFT) << BATC_PSHIFT;
411		batc |= proto;
412		if (s_data < e_rodata)
413			batc |= BATC_PROT;
414#if defined(MULTIPROCESSOR)	/* XXX */
415		else
416			break;
417#endif
418#ifdef DEBUG
419		printf("cpu%d dbat%d %p(%08x)\n", cpu, batcno, s_data, batc);
420#endif
421		global_dbatc[batcno] = batc;
422		s_data += BATC_BLKBYTES;
423		if (++batcno == BATC_MAX)
424			break;
425	}
426
427	for (batcno = 0; batcno < BATC_MAX; batcno++) {
428		m8820x_dbatc_set(cpu, batcno, global_dbatc[batcno]);
429		m8820x_ibatc_set(cpu, batcno, global_ibatc[batcno]);
430	}
431}
432
433/*
434 * Should only be called after the calling cpus knows its cpu
435 * number and main/secondary status. Should be called first
436 * by the main processor, before the others are started.
437*/
438void
439m8820x_cpu_configuration_print(int main)
440{
441#ifdef M88200_HAS_ASYMMETRICAL_ASSOCIATION
442	struct m8820x_cmmu *cmmu;
443#endif
444	int pid = get_cpu_pid();
445	int proctype = (pid & PID_ARN) >> ARN_SHIFT;
446	int procvers = (pid & PID_VN) >> VN_SHIFT;
447	int kind, nmmu, mmuno, cnt, cpu = cpu_number();
448
449	printf("cpu%d: ", cpu);
450	switch (proctype) {
451	default:
452		printf("unknown model arch 0x%x rev 0x%x\n",
453		    proctype, procvers);
454		break;
455	case ARN_88100:
456		printf("M88100 rev 0x%x", procvers);
457#ifdef MULTIPROCESSOR
458		if (main == 0)
459			printf(", secondary");
460#endif
461		nmmu = 1 << cmmu_shift;
462#ifdef M88200_HAS_ASYMMETRICAL_ASSOCIATION
463		cmmu = m8820x_cmmu + (cpu << cmmu_shift);
464		for (cnt = 1 << cmmu_shift; cnt != 0; cnt--, cmmu++)
465			if (cmmu->cmmu_regs == NULL)
466				nmmu--;
467#endif
468		printf(", %d CMMU\n", nmmu);
469
470		for (kind = INST_CMMU; kind <= DATA_CMMU; kind++) {
471			mmuno = (cpu << cmmu_shift) + kind;
472			for (cnt = 1 << cmmu_shift; cnt != 0;
473			    cnt -= 2, mmuno += 2)
474				m8820x_cmmu_configuration_print(cpu, mmuno);
475		}
476		break;
477	}
478
479#ifndef ERRATA__XXX_USR
480	{
481		static int errata_warn = 0;
482
483		if (proctype == ARN_88100 && procvers <= 10) {
484			if (!errata_warn++)
485				printf("WARNING: M88100 bug workaround code "
486				    "not enabled.\nPlease recompile the kernel "
487				    "with option ERRATA__XXX_USR !\n");
488		}
489	}
490#endif
491}
492
493void
494m8820x_cmmu_configuration_print(int cpu, int mmuno)
495{
496	struct m8820x_cmmu *cmmu;
497	int mmuid, cssp;
498	u_int line;
499	uint32_t linestatus;
500#ifdef M88200_HAS_SPLIT_ADDRESS
501	int aline, abit, amask;
502#endif
503
504	cmmu = m8820x_cmmu + mmuno;
505#ifdef M88200_HAS_ASYMMETRICAL_ASSOCIATION
506	if (cmmu->cmmu_regs == NULL)
507		return;
508#endif
509
510	mmuid = CMMU_TYPE(cmmu->cmmu_idr);
511
512	printf("cpu%d: ", cpu);
513	switch (mmuid) {
514	case M88200_ID:
515		printf("M88200 (16K)");
516		break;
517	case M88204_ID:
518		printf("M88204 (64K)");
519		break;
520	default:
521		printf("unknown CMMU id 0x%x", mmuid);
522		break;
523	}
524	printf(" rev 0x%x,", CMMU_VERSION(cmmu->cmmu_idr));
525#ifdef M88200_HAS_SPLIT_ADDRESS
526	/*
527	 * Print address lines
528	 */
529	amask = cmmu->cmmu_addr_mask;
530	if (amask != 0) {
531		aline = 0;
532		while (amask != 0) {
533			abit = ff1(amask);
534			if ((cmmu->cmmu_addr & (1 << abit)) != 0)
535				printf("%cA%02d",
536				    aline != 0 ? '/' : ' ', abit);
537			else
538				printf("%cA%02d*",
539				    aline != 0 ? '/' : ' ', abit);
540			amask ^= 1 << abit;
541		}
542	} else if (cmmu_shift != 1) {
543		/* unknown split scheme */
544		printf(" split");
545	} else
546#endif
547		printf(" full");
548	printf(" %ccache\n", CMMU_MODE(mmuno) == INST_CMMU ? 'I' : 'D');
549
550	/*
551	 * Report disabled cache lines.
552	 */
553	for (cssp = mmuid == M88204_ID ? 3 : 0; cssp >= 0; cssp--)
554		for (line = 0; line <= 255; line++) {
555			cmmu->cmmu_regs[CMMU_SAR] =
556			    line << MC88200_CACHE_SHIFT;
557			linestatus = cmmu->cmmu_regs[CMMU_CSSP(cssp)];
558			if (linestatus & (CMMU_CSSP_D3 | CMMU_CSSP_D2 |
559			     CMMU_CSSP_D1 | CMMU_CSSP_D0)) {
560				printf("cpu%d: cache line 0x%03x disabled\n",
561				    cpu, (cssp << 8) | line);
562				}
563			}
564}
565
566/*
567 * CMMU initialization routine
568 */
569cpuid_t
570m8820x_init()
571{
572	cpuid_t cpu;
573
574	cpu = m8820x_cpu_number();
575	m8820x_initialize_cpu(cpu);
576#if defined(__luna88k__) && !defined(MULTIPROCESSOR)
577	m8820x_enable_other_cmmu_cache();
578#endif
579	return (cpu);
580}
581
582/*
583 * Initialize the set of CMMUs tied to a particular CPU.
584 */
585void
586m8820x_initialize_cpu(cpuid_t cpu)
587{
588	struct cpu_info *ci;
589	struct m8820x_cmmu *cmmu;
590	int mmuid, cssp;
591	u_int line, cnt;
592	uint32_t linestatus;
593	apr_t apr;
594
595	apr = ((0x00000 << PG_BITS) | CACHE_GLOBAL | CACHE_INH) & ~APR_V;
596
597	cmmu = m8820x_cmmu + (cpu << cmmu_shift);
598
599	/*
600	 * Setup CMMU pointers for faster exception processing.
601	 * This relies on the board-dependent code putting instruction
602	 * CMMUs and data CMMUs interleaved with instruction CMMUs first.
603	 */
604	ci = &m88k_cpus[cpu];
605	switch (cmmu_shift) {
606	default:
607		/* exception code may not use ci_pfsr fields, compute anyway */
608		/* FALLTHROUGH */
609	case 2:
610		ci->ci_pfsr_d1 = (u_int)cmmu[3].cmmu_regs + CMMU_PFSR * 4;
611		ci->ci_pfsr_i1 = (u_int)cmmu[2].cmmu_regs + CMMU_PFSR * 4;
612		/* FALLTHROUGH */
613	case 1:
614		ci->ci_pfsr_d0 = (u_int)cmmu[1].cmmu_regs + CMMU_PFSR * 4;
615		ci->ci_pfsr_i0 = (u_int)cmmu[0].cmmu_regs + CMMU_PFSR * 4;
616		break;
617	}
618
619	for (cnt = 1 << cmmu_shift; cnt != 0; cnt--, cmmu++) {
620#ifdef M88200_HAS_ASYMMETRICAL_ASSOCIATION
621		if (cmmu->cmmu_regs == NULL)
622			continue;
623#endif
624		cmmu->cmmu_idr = cmmu->cmmu_regs[CMMU_IDR];
625		mmuid = CMMU_TYPE(cmmu->cmmu_idr);
626
627		/*
628		 * Reset cache, but keep disabled lines disabled.
629		 *
630		 * Note that early Luna88k PROM apparently forget to
631		 * initialize the last line (#255) correctly, and the
632		 * CMMU initializes with whatever its state upon powerup
633		 * happens to be.
634		 *
635		 * It is then unlikely that these particular cache lines
636		 * have been exercized by the self-tests; better disable
637		 * the whole line.
638		 */
639		for (cssp = mmuid == M88204_ID ? 3 : 0; cssp >= 0; cssp--)
640			for (line = 0; line <= 255; line++) {
641				cmmu->cmmu_regs[CMMU_SAR] =
642				    line << MC88200_CACHE_SHIFT;
643				linestatus = cmmu->cmmu_regs[CMMU_CSSP(cssp)];
644				if (linestatus & (CMMU_CSSP_D3 | CMMU_CSSP_D2 |
645				     CMMU_CSSP_D1 | CMMU_CSSP_D0))
646					linestatus =
647					    CMMU_CSSP_D3 | CMMU_CSSP_D2 |
648					    CMMU_CSSP_D1 | CMMU_CSSP_D0;
649				else
650					linestatus = 0;
651				cmmu->cmmu_regs[CMMU_CSSP(cssp)] = linestatus |
652				    CMMU_CSSP_L5 | CMMU_CSSP_L4 |
653				    CMMU_CSSP_L3 | CMMU_CSSP_L2 |
654				    CMMU_CSSP_L1 | CMMU_CSSP_L0 |
655				    CMMU_CSSP_VV(3, CMMU_VV_INVALID) |
656				    CMMU_CSSP_VV(2, CMMU_VV_INVALID) |
657				    CMMU_CSSP_VV(1, CMMU_VV_INVALID) |
658				    CMMU_CSSP_VV(0, CMMU_VV_INVALID);
659			}
660
661		/*
662		 * Set the SCTR, SAPR, and UAPR to some known state.
663		 * Snooping is always enabled, so that we do not need to
664		 * writeback userland code pages when they first get filled
665		 * as data pages.
666		 */
667		cmmu->cmmu_regs[CMMU_SCTR] = CMMU_SCTR_SE;
668
669		cmmu->cmmu_regs[CMMU_SAPR] = cmmu->cmmu_regs[CMMU_UAPR] = apr;
670
671		cmmu->cmmu_regs[CMMU_BWP0] = cmmu->cmmu_regs[CMMU_BWP1] =
672		cmmu->cmmu_regs[CMMU_BWP2] = cmmu->cmmu_regs[CMMU_BWP3] =
673		cmmu->cmmu_regs[CMMU_BWP4] = cmmu->cmmu_regs[CMMU_BWP5] =
674		cmmu->cmmu_regs[CMMU_BWP6] = cmmu->cmmu_regs[CMMU_BWP7] = 0;
675		cmmu->cmmu_regs[CMMU_SCR] = CMMU_FLUSH_CACHE_INV_ALL;
676		(void)cmmu->cmmu_regs[CMMU_SSR];
677		cmmu->cmmu_regs[CMMU_SCR] = CMMU_FLUSH_SUPER_ALL;
678		cmmu->cmmu_regs[CMMU_SCR] = CMMU_FLUSH_USER_ALL;
679	}
680
681	/*
682	 * Enable instruction cache.
683	 */
684	apr &= ~CACHE_INH;
685	m8820x_cmmu_set_reg_if_mode(CMMU_SAPR, apr, cpu, INST_CMMU);
686
687	/*
688	 * Data cache will be enabled at pmap_bootstrap_cpu() time,
689	 * because the PROM won't likely expect its work area in memory
690	 * to be cached. On at least aviion, starting secondary processors
691	 * returns an error code although the processor has correctly spun
692	 * up, if the PROM work area is cached.
693	 */
694#ifdef dont_do_this_at_home
695	apr |= CACHE_WT;
696	m8820x_cmmu_set_reg_if_mode(CMMU_SAPR, apr, cpu, DATA_CMMU);
697#endif
698
699	ci->ci_zeropage = m8820x_zeropage;
700	ci->ci_copypage = m8820x_copypage;
701}
702
703/*
704 * Just before poweroff or reset....
705 */
706void
707m8820x_shutdown()
708{
709	u_int cmmu_num;
710	struct m8820x_cmmu *cmmu;
711
712	CMMU_LOCK;
713
714	cmmu = m8820x_cmmu;
715	for (cmmu_num = 0; cmmu_num < max_cmmus; cmmu_num++, cmmu++) {
716#ifdef M88200_HAS_ASYMMETRICAL_ASSOCIATION
717		if (cmmu->cmmu_regs == NULL)
718			continue;
719#endif
720		cmmu->cmmu_regs[CMMU_SAPR] = cmmu->cmmu_regs[CMMU_UAPR] =
721		    ((0x00000 << PG_BITS) | CACHE_INH) &
722		    ~(CACHE_WT | CACHE_GLOBAL | APR_V);
723	}
724
725	CMMU_UNLOCK;
726}
727
728/*
729 * Older systems do not xmem correctly on writeback cache lines, causing
730 * the remainder of the cache line to be corrupted.
731 * This behaviour has been observed on a system with 88100 rev 8 and
732 * 88200 rev 5; it is unknown whether the culprit is the 88100 or the 88200;
733 * however we can rely upon 88100 rev 10 onwards and 88200 rev 7 onwards
734 * (as well as all 88204 revs) to be safe.
735 */
736apr_t
737m8820x_apr_cmode()
738{
739	u_int cmmu_num;
740	struct m8820x_cmmu *cmmu;
741
742	cmmu = m8820x_cmmu;
743	for (cmmu_num = max_cmmus; cmmu_num != 0; cmmu_num--, cmmu++) {
744#ifdef M88200_HAS_ASYMMETRICAL_ASSOCIATION
745		if (cmmu->cmmu_regs == NULL)
746			continue;
747#endif
748		/*
749		 * XXX 88200 v6 could not be tested. Do 88200 ever have
750		 * XXX even version numbers anyway?
751		 */
752		if (CMMU_TYPE(cmmu->cmmu_idr) == M88200_ID &&
753		    CMMU_VERSION(cmmu->cmmu_idr) <= 6)
754			return CACHE_WT;
755	}
756	/*
757	 * XXX 88100 v9 could not be tested. Might be unaffected, but
758	 * XXX better be safe than sorry.
759	 */
760	if (((get_cpu_pid() & PID_VN) >> VN_SHIFT) <= 9)
761		return CACHE_WT;
762
763	return CACHE_DFL;
764}
765
766/*
767 * Older systems require page tables to be cache inhibited (write-through
768 * won't even cut it).
769 * We can rely upon 88200 rev 9 onwards to be safe (as well as all 88204
770 * revs).
771 */
772apr_t
773m8820x_pte_cmode()
774{
775	u_int cmmu_num;
776	struct m8820x_cmmu *cmmu;
777
778	cmmu = m8820x_cmmu;
779	for (cmmu_num = max_cmmus; cmmu_num != 0; cmmu_num--, cmmu++) {
780#ifdef M88200_HAS_ASYMMETRICAL_ASSOCIATION
781		if (cmmu->cmmu_regs == NULL)
782			continue;
783#endif
784		/*
785		 * XXX 88200 v8 could not be tested. Do 88200 ever have
786		 * XXX even version numbers anyway?
787		 */
788		if (CMMU_TYPE(cmmu->cmmu_idr) == M88200_ID &&
789		    CMMU_VERSION(cmmu->cmmu_idr) <= 8)
790			return CACHE_INH;
791	}
792
793	return CACHE_WT;
794}
795
796void
797m8820x_set_sapr(apr_t ap)
798{
799	int cpu = cpu_number();
800
801	CMMU_LOCK;
802
803	m8820x_cmmu_set_reg(CMMU_SAPR, ap, cpu);
804
805	CMMU_UNLOCK;
806}
807
808void
809m8820x_set_uapr(apr_t ap)
810{
811	u_int32_t psr;
812	int cpu = cpu_number();
813
814	psr = get_psr();
815	set_psr(psr | PSR_IND);
816	CMMU_LOCK;
817
818	m8820x_cmmu_set_reg(CMMU_UAPR, ap, cpu);
819
820	CMMU_UNLOCK;
821	set_psr(psr);
822}
823
824/*
825 * Functions that invalidate TLB entries.
826 */
827
828void
829m8820x_tlbis(cpuid_t cpu, vaddr_t va, pt_entry_t pte)
830{
831	u_int32_t psr;
832
833	psr = get_psr();
834	set_psr(psr | PSR_IND);
835	CMMU_LOCK;
836	m8820x_cmmu_set_cmd_if_addr(CMMU_FLUSH_SUPER_PAGE, cpu, va);
837	CMMU_UNLOCK;
838	set_psr(psr);
839}
840
841void
842m8820x_tlbiu(cpuid_t cpu, vaddr_t va, pt_entry_t pte)
843{
844	u_int32_t psr;
845
846	psr = get_psr();
847	set_psr(psr | PSR_IND);
848	CMMU_LOCK;
849	m8820x_cmmu_set_cmd_if_addr(CMMU_FLUSH_USER_PAGE, cpu, va);
850	CMMU_UNLOCK;
851	set_psr(psr);
852}
853
854void
855m8820x_tlbia(cpuid_t cpu)
856{
857	u_int32_t psr;
858
859	psr = get_psr();
860	set_psr(psr | PSR_IND);
861	CMMU_LOCK;
862	m8820x_cmmu_set_reg(CMMU_SCR, CMMU_FLUSH_USER_ALL, cpu);
863	CMMU_UNLOCK;
864	set_psr(psr);
865}
866
867/*
868 * Functions that invalidate caches.
869 *
870 * Cache operations require physical addresses.
871 *
872 * We don't writeback instruction caches prior to invalidate because they
873 * are never modified.
874 *
875 * Note that on systems with more than two CMMUs per CPU, we can not benefit
876 * from the address split - the split is done on virtual (not translated yet)
877 * addresses, but caches are physically indexed.
878 */
879
880#define	trunc_cache_line(a)	((a) & ~(MC88200_CACHE_LINE - 1))
881#define	round_cache_line(a)	trunc_cache_line((a) + MC88200_CACHE_LINE - 1)
882
883/*
884 * invalidate I$, writeback and invalidate D$
885 */
886void
887m8820x_cache_wbinv(cpuid_t cpu, paddr_t pa, psize_t size)
888{
889	u_int32_t psr;
890	psize_t count;
891
892	size = round_cache_line(pa + size) - trunc_cache_line(pa);
893	pa = trunc_cache_line(pa);
894
895	psr = get_psr();
896	set_psr(psr | PSR_IND);
897	CMMU_LOCK;
898
899	while (size != 0) {
900		if ((pa & PAGE_MASK) == 0 && size >= PAGE_SIZE) {
901			m8820x_cmmu_set_cmd(CMMU_FLUSH_CACHE_CBI_PAGE, cpu, pa);
902			count = PAGE_SIZE;
903		} else {
904			m8820x_cmmu_set_cmd(CMMU_FLUSH_CACHE_CBI_LINE, cpu, pa);
905			count = MC88200_CACHE_LINE;
906		}
907		pa += count;
908		size -= count;
909		m8820x_cmmu_wait(cpu);
910	}
911
912	CMMU_UNLOCK;
913	set_psr(psr);
914}
915
916/*
917 * writeback D$
918 */
919void
920m8820x_dcache_wb(cpuid_t cpu, paddr_t pa, psize_t size)
921{
922	u_int32_t psr;
923	psize_t count;
924
925	size = round_cache_line(pa + size) - trunc_cache_line(pa);
926	pa = trunc_cache_line(pa);
927
928	psr = get_psr();
929	set_psr(psr | PSR_IND);
930	CMMU_LOCK;
931
932	while (size != 0) {
933		if ((pa & PAGE_MASK) == 0 && size >= PAGE_SIZE) {
934			m8820x_cmmu_set_cmd_if_mode(CMMU_FLUSH_CACHE_CB_PAGE,
935			    cpu, pa, DATA_CMMU);
936			count = PAGE_SIZE;
937		} else {
938			m8820x_cmmu_set_cmd_if_mode(CMMU_FLUSH_CACHE_CB_LINE,
939			    cpu, pa, DATA_CMMU);
940			count = MC88200_CACHE_LINE;
941		}
942		pa += count;
943		size -= count;
944		m8820x_cmmu_wait(cpu);
945	}
946
947	CMMU_UNLOCK;
948	set_psr(psr);
949}
950
951/*
952 * invalidate I$
953 */
954void
955m8820x_icache_inv(cpuid_t cpu, paddr_t pa, psize_t size)
956{
957	u_int32_t psr;
958	psize_t count;
959
960	size = round_cache_line(pa + size) - trunc_cache_line(pa);
961	pa = trunc_cache_line(pa);
962
963	psr = get_psr();
964	set_psr(psr | PSR_IND);
965	CMMU_LOCK;
966
967	while (size != 0) {
968		if ((pa & PAGE_MASK) == 0 && size >= PAGE_SIZE) {
969			m8820x_cmmu_set_cmd_if_mode(CMMU_FLUSH_CACHE_INV_PAGE,
970			    cpu, pa, INST_CMMU);
971			count = PAGE_SIZE;
972		} else {
973			m8820x_cmmu_set_cmd_if_mode(CMMU_FLUSH_CACHE_INV_LINE,
974			    cpu, pa, INST_CMMU);
975			count = MC88200_CACHE_LINE;
976		}
977		pa += count;
978		size -= count;
979		m8820x_cmmu_wait(cpu);
980	}
981
982	CMMU_UNLOCK;
983	set_psr(psr);
984}
985
986/*
987 * writeback D$
988 */
989void
990m8820x_cmmu_wb_locked(int cpu, paddr_t pa, psize_t size)
991{
992	if (size <= MC88200_CACHE_LINE) {
993		m8820x_cmmu_set_cmd_if_mode(CMMU_FLUSH_CACHE_CB_LINE,
994		    cpu, pa, DATA_CMMU);
995	} else {
996		m8820x_cmmu_set_cmd_if_mode(CMMU_FLUSH_CACHE_CB_PAGE,
997		    cpu, pa, DATA_CMMU);
998	}
999}
1000
1001/*
1002 * invalidate I$, writeback and invalidate D$
1003 */
1004void
1005m8820x_cmmu_wbinv_locked(int cpu, paddr_t pa, psize_t size)
1006{
1007	if (size <= MC88200_CACHE_LINE)
1008		m8820x_cmmu_set_cmd(CMMU_FLUSH_CACHE_CBI_LINE, cpu, pa);
1009	else
1010		m8820x_cmmu_set_cmd(CMMU_FLUSH_CACHE_CBI_PAGE, cpu, pa);
1011}
1012
1013/*
1014 * invalidate I$ and D$
1015 */
1016void
1017m8820x_cmmu_inv_locked(int cpu, paddr_t pa, psize_t size)
1018{
1019	if (size <= MC88200_CACHE_LINE)
1020		m8820x_cmmu_set_cmd(CMMU_FLUSH_CACHE_INV_LINE, cpu, pa);
1021	else
1022		m8820x_cmmu_set_cmd(CMMU_FLUSH_CACHE_INV_PAGE, cpu, pa);
1023}
1024
1025/*
1026 * High level cache handling functions (used by bus_dma).
1027 *
1028 * On multiprocessor systems, since the CMMUs snoop each other, they
1029 * all have a coherent view of the data. Thus, we only need to writeback
1030 * on a single CMMU. However, invalidations need to be done on all CMMUs.
1031 */
1032
1033void
1034m8820x_dma_cachectl(paddr_t _pa, psize_t _size, int op)
1035{
1036	u_int32_t psr;
1037	int cpu;
1038#ifdef MULTIPROCESSOR
1039	struct cpu_info *ci = curcpu();
1040#endif
1041	paddr_t pa;
1042	psize_t size, count;
1043	void (*flusher)(int, paddr_t, psize_t);
1044	uint8_t lines[2 * MC88200_CACHE_LINE];
1045	paddr_t pa1, pa2;
1046	psize_t sz1, sz2;
1047
1048	pa = trunc_cache_line(_pa);
1049	size = round_cache_line(_pa + _size) - pa;
1050	sz1 = sz2 = 0;
1051
1052	switch (op) {
1053	case DMA_CACHE_SYNC:
1054		flusher = m8820x_cmmu_wb_locked;
1055		break;
1056	case DMA_CACHE_SYNC_INVAL:
1057		flusher = m8820x_cmmu_wbinv_locked;
1058		break;
1059	default:
1060	case DMA_CACHE_INV:
1061		pa1 = pa;
1062		sz1 = _pa - pa1;
1063		pa2 = _pa + _size;
1064		sz2 = pa + size - pa2;
1065		flusher = m8820x_cmmu_inv_locked;
1066		break;
1067	}
1068
1069#ifndef MULTIPROCESSOR
1070	cpu = cpu_number();
1071#endif
1072
1073	psr = get_psr();
1074	set_psr(psr | PSR_IND);
1075	CMMU_LOCK;
1076
1077	/*
1078	 * Preserve the data from incomplete cache lines about to be
1079	 * invalidated, if necessary.
1080	 */
1081	if (sz1 != 0)
1082		bcopy((void *)pa1, lines, sz1);
1083	if (sz2 != 0)
1084		bcopy((void *)pa2, lines + MC88200_CACHE_LINE, sz2);
1085
1086	while (size != 0) {
1087		count = (pa & PAGE_MASK) == 0 && size >= PAGE_SIZE ?
1088		    PAGE_SIZE : MC88200_CACHE_LINE;
1089
1090#ifdef MULTIPROCESSOR
1091		/*
1092		 * Theoretically, it should be possible to issue the writeback
1093		 * operation only on the CMMU which has the affected cache
1094		 * lines in its memory; snooping would force the other CMMUs
1095		 * to invalidate their own copy of the line, if any.
1096		 *
1097		 * Unfortunately, there is no cheap way to figure out
1098		 * which CMMU has the lines (and has them as dirty).
1099		 */
1100		for (cpu = 0; cpu < MAX_CPUS; cpu++) {
1101			if (!ISSET(m88k_cpus[cpu].ci_flags, CIF_ALIVE))
1102				continue;
1103			(*flusher)(cpu, pa, count);
1104		}
1105		for (cpu = 0; cpu < MAX_CPUS; cpu++) {
1106			if (!ISSET(m88k_cpus[cpu].ci_flags, CIF_ALIVE))
1107				continue;
1108			m8820x_cmmu_wait(cpu);
1109		}
1110#else	/* MULTIPROCESSOR */
1111		(*flusher)(cpu, pa, count);
1112		m8820x_cmmu_wait(cpu);
1113#endif	/* MULTIPROCESSOR */
1114
1115		pa += count;
1116		size -= count;
1117	}
1118
1119	/*
1120	 * Restore data from incomplete cache lines having been invalidated,
1121	 * if necessary, write them back, and invalidate them again.
1122	 * (Note that these lines have been invalidated from all processors
1123	 *  in the loop above, so there is no need to remote invalidate them
1124	 *  again.)
1125	 */
1126	if (sz1 != 0)
1127		bcopy(lines, (void *)pa1, sz1);
1128	if (sz2 != 0)
1129		bcopy(lines + MC88200_CACHE_LINE, (void *)pa2, sz2);
1130	if (sz1 != 0) {
1131#ifdef MULTIPROCESSOR
1132		m8820x_cmmu_wbinv_locked(ci->ci_cpuid, pa1, MC88200_CACHE_LINE);
1133		m8820x_cmmu_wait(ci->ci_cpuid);
1134#else
1135		m8820x_cmmu_wbinv_locked(cpu, pa1, MC88200_CACHE_LINE);
1136		m8820x_cmmu_wait(cpu);
1137#endif
1138	}
1139	if (sz2 != 0) {
1140		pa2 = trunc_cache_line(pa2);
1141#ifdef MULTIPROCESSOR
1142		m8820x_cmmu_wbinv_locked(ci->ci_cpuid, pa2, MC88200_CACHE_LINE);
1143		m8820x_cmmu_wait(ci->ci_cpuid);
1144#else
1145		m8820x_cmmu_wbinv_locked(cpu, pa2, MC88200_CACHE_LINE);
1146		m8820x_cmmu_wait(cpu);
1147#endif
1148	}
1149
1150	CMMU_UNLOCK;
1151	set_psr(psr);
1152}
1153
1154#ifdef MULTIPROCESSOR
1155void
1156m8820x_dma_cachectl_local(paddr_t pa, psize_t size, int op)
1157{
1158	/* This function is not used on 88100 systems */
1159}
1160#endif
1161
1162#if defined(__luna88k__) && !defined(MULTIPROCESSOR)
1163/*
1164 * On luna88k, secondary processors are not disabled while the kernel
1165 * is initializing.  They are running an infinite loop in
1166 * locore.S:secondary_init on non-MULTIPROCESSOR kernel.  Then, after
1167 * initializing the CMMUs tied to the currently-running processor, we
1168 * turn on the instruction cache of other processors to make them
1169 * happier.
1170 */
1171void
1172m8820x_enable_other_cmmu_cache()
1173{
1174	int cpu, master_cpu = cpu_number();
1175
1176	for (cpu = 0; cpu < ncpusfound; cpu++) {
1177		if (cpu == master_cpu)
1178			continue;
1179		/* Enable other processor's instruction cache */
1180		m8820x_cmmu_set_reg_if_mode(CMMU_SAPR, CACHE_GLOBAL,
1181			cpu, INST_CMMU);
1182	}
1183}
1184#endif
1185