1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (C) 2020 Justin Hibbits
5 * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com>
6 * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
21 * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * Some hw specific parts of this pmap were derived or influenced
30 * by NetBSD's ibm4xx pmap module. More generic code is shared with
31 * a few other pmap modules from the FreeBSD tree.
32 */
33
34 /*
35  * VM layout notes:
36  *
37  * Kernel and user threads run within one common virtual address space
38  * defined by AS=0.
39  *
40  * 32-bit pmap:
41  * Virtual address space layout:
42  * -----------------------------
43  * 0x0000_0000 - 0x7fff_ffff	: user process
44  * 0x8000_0000 - 0xbfff_ffff	: pmap_mapdev()-ed area (PCI/PCIE etc.)
45  * 0xc000_0000 - 0xffff_efff	: KVA
46  */
47
48#include <sys/cdefs.h>
49__FBSDID("$FreeBSD$");
50
51#include "opt_ddb.h"
52#include "opt_kstack_pages.h"
53
54#include <sys/param.h>
55#include <sys/conf.h>
56#include <sys/malloc.h>
57#include <sys/ktr.h>
58#include <sys/proc.h>
59#include <sys/user.h>
60#include <sys/queue.h>
61#include <sys/systm.h>
62#include <sys/kernel.h>
63#include <sys/kerneldump.h>
64#include <sys/linker.h>
65#include <sys/msgbuf.h>
66#include <sys/lock.h>
67#include <sys/mutex.h>
68#include <sys/rwlock.h>
69#include <sys/sched.h>
70#include <sys/smp.h>
71#include <sys/vmmeter.h>
72
73#include <vm/vm.h>
74#include <vm/vm_page.h>
75#include <vm/vm_kern.h>
76#include <vm/vm_pageout.h>
77#include <vm/vm_extern.h>
78#include <vm/vm_object.h>
79#include <vm/vm_param.h>
80#include <vm/vm_map.h>
81#include <vm/vm_pager.h>
82#include <vm/vm_phys.h>
83#include <vm/vm_pagequeue.h>
84#include <vm/uma.h>
85
86#include <machine/_inttypes.h>
87#include <machine/cpu.h>
88#include <machine/pcb.h>
89#include <machine/platform.h>
90
91#include <machine/tlb.h>
92#include <machine/spr.h>
93#include <machine/md_var.h>
94#include <machine/mmuvar.h>
95#include <machine/pmap.h>
96#include <machine/pte.h>
97
98#include <ddb/ddb.h>
99
100#define	PRI0ptrX	"08x"
101
102/* Reserved KVA space and mutex for mmu_booke_zero_page. */
103static vm_offset_t zero_page_va;
104static struct mtx zero_page_mutex;
105
106/* Reserved KVA space and mutex for mmu_booke_copy_page. */
107static vm_offset_t copy_page_src_va;
108static vm_offset_t copy_page_dst_va;
109static struct mtx copy_page_mutex;
110
111static vm_offset_t kernel_ptbl_root;
112static unsigned int kernel_ptbls;	/* Number of KVA ptbls. */
113
114/**************************************************************************/
115/* PMAP */
116/**************************************************************************/
117
118#define	VM_MAPDEV_BASE	((vm_offset_t)VM_MAXUSER_ADDRESS + PAGE_SIZE)
119
120static void tid_flush(tlbtid_t tid);
121static unsigned long ilog2(unsigned long);
122
123/**************************************************************************/
124/* Page table management */
125/**************************************************************************/
126
127#define PMAP_ROOT_SIZE	(sizeof(pte_t**) * PDIR_NENTRIES)
128static void ptbl_init(void);
129static struct ptbl_buf *ptbl_buf_alloc(void);
130static void ptbl_buf_free(struct ptbl_buf *);
131static void ptbl_free_pmap_ptbl(pmap_t, pte_t *);
132
133static pte_t *ptbl_alloc(pmap_t, unsigned int, boolean_t);
134static void ptbl_free(pmap_t, unsigned int);
135static void ptbl_hold(pmap_t, unsigned int);
136static int ptbl_unhold(pmap_t, unsigned int);
137
138static vm_paddr_t pte_vatopa(pmap_t, vm_offset_t);
139static int pte_enter(pmap_t, vm_page_t, vm_offset_t, uint32_t, boolean_t);
140static int pte_remove(pmap_t, vm_offset_t, uint8_t);
141static pte_t *pte_find(pmap_t, vm_offset_t);
142
143struct ptbl_buf {
144	TAILQ_ENTRY(ptbl_buf) link;	/* list link */
145	vm_offset_t kva;		/* va of mapping */
146};
147
148/* Number of kva ptbl buffers, each covering one ptbl (PTBL_PAGES). */
149#define PTBL_BUFS		(128 * 16)
150
151/* ptbl free list and a lock used for access synchronization. */
152static TAILQ_HEAD(, ptbl_buf) ptbl_buf_freelist;
153static struct mtx ptbl_buf_freelist_lock;
154
155/* Base address of kva space allocated fot ptbl bufs. */
156static vm_offset_t ptbl_buf_pool_vabase;
157
158/* Pointer to ptbl_buf structures. */
159static struct ptbl_buf *ptbl_bufs;
160
161/**************************************************************************/
162/* Page table related */
163/**************************************************************************/
164
165/* Initialize pool of kva ptbl buffers. */
166static void
167ptbl_init(void)
168{
169	int i;
170
171	CTR3(KTR_PMAP, "%s: s (ptbl_bufs = 0x%08x size 0x%08x)", __func__,
172	    (uint32_t)ptbl_bufs, sizeof(struct ptbl_buf) * PTBL_BUFS);
173	CTR3(KTR_PMAP, "%s: s (ptbl_buf_pool_vabase = 0x%08x size = 0x%08x)",
174	    __func__, ptbl_buf_pool_vabase, PTBL_BUFS * PTBL_PAGES * PAGE_SIZE);
175
176	mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF);
177	TAILQ_INIT(&ptbl_buf_freelist);
178
179	for (i = 0; i < PTBL_BUFS; i++) {
180		ptbl_bufs[i].kva =
181		    ptbl_buf_pool_vabase + i * PTBL_PAGES * PAGE_SIZE;
182		TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link);
183	}
184}
185
186/* Get a ptbl_buf from the freelist. */
187static struct ptbl_buf *
188ptbl_buf_alloc(void)
189{
190	struct ptbl_buf *buf;
191
192	mtx_lock(&ptbl_buf_freelist_lock);
193	buf = TAILQ_FIRST(&ptbl_buf_freelist);
194	if (buf != NULL)
195		TAILQ_REMOVE(&ptbl_buf_freelist, buf, link);
196	mtx_unlock(&ptbl_buf_freelist_lock);
197
198	CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf);
199
200	return (buf);
201}
202
203/* Return ptbl buff to free pool. */
204static void
205ptbl_buf_free(struct ptbl_buf *buf)
206{
207
208	CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf);
209
210	mtx_lock(&ptbl_buf_freelist_lock);
211	TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link);
212	mtx_unlock(&ptbl_buf_freelist_lock);
213}
214
215/*
216 * Search the list of allocated ptbl bufs and find on list of allocated ptbls
217 */
218static void
219ptbl_free_pmap_ptbl(pmap_t pmap, pte_t *ptbl)
220{
221	struct ptbl_buf *pbuf;
222
223	CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl);
224
225	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
226
227	TAILQ_FOREACH(pbuf, &pmap->pm_ptbl_list, link)
228		if (pbuf->kva == (vm_offset_t)ptbl) {
229			/* Remove from pmap ptbl buf list. */
230			TAILQ_REMOVE(&pmap->pm_ptbl_list, pbuf, link);
231
232			/* Free corresponding ptbl buf. */
233			ptbl_buf_free(pbuf);
234			break;
235		}
236}
237
238/* Allocate page table. */
239static pte_t *
240ptbl_alloc(pmap_t pmap, unsigned int pdir_idx, boolean_t nosleep)
241{
242	vm_page_t mtbl[PTBL_PAGES];
243	vm_page_t m;
244	struct ptbl_buf *pbuf;
245	unsigned int pidx;
246	pte_t *ptbl;
247	int i, j;
248
249	CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap,
250	    (pmap == kernel_pmap), pdir_idx);
251
252	KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)),
253	    ("ptbl_alloc: invalid pdir_idx"));
254	KASSERT((pmap->pm_pdir[pdir_idx] == NULL),
255	    ("pte_alloc: valid ptbl entry exists!"));
256
257	pbuf = ptbl_buf_alloc();
258	if (pbuf == NULL)
259		panic("pte_alloc: couldn't alloc kernel virtual memory");
260
261	ptbl = (pte_t *)pbuf->kva;
262
263	CTR2(KTR_PMAP, "%s: ptbl kva = %p", __func__, ptbl);
264
265	for (i = 0; i < PTBL_PAGES; i++) {
266		pidx = (PTBL_PAGES * pdir_idx) + i;
267		while ((m = vm_page_alloc(NULL, pidx,
268		    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
269			if (nosleep) {
270				ptbl_free_pmap_ptbl(pmap, ptbl);
271				for (j = 0; j < i; j++)
272					vm_page_free(mtbl[j]);
273				vm_wire_sub(i);
274				return (NULL);
275			}
276			PMAP_UNLOCK(pmap);
277			rw_wunlock(&pvh_global_lock);
278			vm_wait(NULL);
279			rw_wlock(&pvh_global_lock);
280			PMAP_LOCK(pmap);
281		}
282		mtbl[i] = m;
283	}
284
285	/* Map allocated pages into kernel_pmap. */
286	mmu_booke_qenter((vm_offset_t)ptbl, mtbl, PTBL_PAGES);
287
288	/* Zero whole ptbl. */
289	bzero((caddr_t)ptbl, PTBL_PAGES * PAGE_SIZE);
290
291	/* Add pbuf to the pmap ptbl bufs list. */
292	TAILQ_INSERT_TAIL(&pmap->pm_ptbl_list, pbuf, link);
293
294	return (ptbl);
295}
296
297/* Free ptbl pages and invalidate pdir entry. */
298static void
299ptbl_free(pmap_t pmap, unsigned int pdir_idx)
300{
301	pte_t *ptbl;
302	vm_paddr_t pa;
303	vm_offset_t va;
304	vm_page_t m;
305	int i;
306
307	CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap,
308	    (pmap == kernel_pmap), pdir_idx);
309
310	KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)),
311	    ("ptbl_free: invalid pdir_idx"));
312
313	ptbl = pmap->pm_pdir[pdir_idx];
314
315	CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl);
316
317	KASSERT((ptbl != NULL), ("ptbl_free: null ptbl"));
318
319	/*
320	 * Invalidate the pdir entry as soon as possible, so that other CPUs
321	 * don't attempt to look up the page tables we are releasing.
322	 */
323	mtx_lock_spin(&tlbivax_mutex);
324	tlb_miss_lock();
325
326	pmap->pm_pdir[pdir_idx] = NULL;
327
328	tlb_miss_unlock();
329	mtx_unlock_spin(&tlbivax_mutex);
330
331	for (i = 0; i < PTBL_PAGES; i++) {
332		va = ((vm_offset_t)ptbl + (i * PAGE_SIZE));
333		pa = pte_vatopa(kernel_pmap, va);
334		m = PHYS_TO_VM_PAGE(pa);
335		vm_page_free_zero(m);
336		vm_wire_sub(1);
337		mmu_booke_kremove(va);
338	}
339
340	ptbl_free_pmap_ptbl(pmap, ptbl);
341}
342
343/*
344 * Decrement ptbl pages hold count and attempt to free ptbl pages.
345 * Called when removing pte entry from ptbl.
346 *
347 * Return 1 if ptbl pages were freed.
348 */
349static int
350ptbl_unhold(pmap_t pmap, unsigned int pdir_idx)
351{
352	pte_t *ptbl;
353	vm_paddr_t pa;
354	vm_page_t m;
355	int i;
356
357	CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap,
358	    (pmap == kernel_pmap), pdir_idx);
359
360	KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)),
361	    ("ptbl_unhold: invalid pdir_idx"));
362	KASSERT((pmap != kernel_pmap),
363	    ("ptbl_unhold: unholding kernel ptbl!"));
364
365	ptbl = pmap->pm_pdir[pdir_idx];
366
367	//debugf("ptbl_unhold: ptbl = 0x%08x\n", (u_int32_t)ptbl);
368	KASSERT(((vm_offset_t)ptbl >= VM_MIN_KERNEL_ADDRESS),
369	    ("ptbl_unhold: non kva ptbl"));
370
371	/* decrement hold count */
372	for (i = 0; i < PTBL_PAGES; i++) {
373		pa = pte_vatopa(kernel_pmap,
374		    (vm_offset_t)ptbl + (i * PAGE_SIZE));
375		m = PHYS_TO_VM_PAGE(pa);
376		m->ref_count--;
377	}
378
379	/*
380	 * Free ptbl pages if there are no pte etries in this ptbl.
381	 * ref_count has the same value for all ptbl pages, so check the last
382	 * page.
383	 */
384	if (m->ref_count == 0) {
385		ptbl_free(pmap, pdir_idx);
386
387		//debugf("ptbl_unhold: e (freed ptbl)\n");
388		return (1);
389	}
390
391	return (0);
392}
393
394/*
395 * Increment hold count for ptbl pages. This routine is used when a new pte
396 * entry is being inserted into the ptbl.
397 */
398static void
399ptbl_hold(pmap_t pmap, unsigned int pdir_idx)
400{
401	vm_paddr_t pa;
402	pte_t *ptbl;
403	vm_page_t m;
404	int i;
405
406	CTR3(KTR_PMAP, "%s: pmap = %p pdir_idx = %d", __func__, pmap,
407	    pdir_idx);
408
409	KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)),
410	    ("ptbl_hold: invalid pdir_idx"));
411	KASSERT((pmap != kernel_pmap),
412	    ("ptbl_hold: holding kernel ptbl!"));
413
414	ptbl = pmap->pm_pdir[pdir_idx];
415
416	KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl"));
417
418	for (i = 0; i < PTBL_PAGES; i++) {
419		pa = pte_vatopa(kernel_pmap,
420		    (vm_offset_t)ptbl + (i * PAGE_SIZE));
421		m = PHYS_TO_VM_PAGE(pa);
422		m->ref_count++;
423	}
424}
425
426/*
427 * Clean pte entry, try to free page table page if requested.
428 *
429 * Return 1 if ptbl pages were freed, otherwise return 0.
430 */
431static int
432pte_remove(pmap_t pmap, vm_offset_t va, uint8_t flags)
433{
434	unsigned int pdir_idx = PDIR_IDX(va);
435	unsigned int ptbl_idx = PTBL_IDX(va);
436	vm_page_t m;
437	pte_t *ptbl;
438	pte_t *pte;
439
440	//int su = (pmap == kernel_pmap);
441	//debugf("pte_remove: s (su = %d pmap = 0x%08x va = 0x%08x flags = %d)\n",
442	//		su, (u_int32_t)pmap, va, flags);
443
444	ptbl = pmap->pm_pdir[pdir_idx];
445	KASSERT(ptbl, ("pte_remove: null ptbl"));
446
447	pte = &ptbl[ptbl_idx];
448
449	if (pte == NULL || !PTE_ISVALID(pte))
450		return (0);
451
452	if (PTE_ISWIRED(pte))
453		pmap->pm_stats.wired_count--;
454
455	/* Get vm_page_t for mapped pte. */
456	m = PHYS_TO_VM_PAGE(PTE_PA(pte));
457
458	/* Handle managed entry. */
459	if (PTE_ISMANAGED(pte)) {
460		if (PTE_ISMODIFIED(pte))
461			vm_page_dirty(m);
462
463		if (PTE_ISREFERENCED(pte))
464			vm_page_aflag_set(m, PGA_REFERENCED);
465
466		pv_remove(pmap, va, m);
467	} else if (pmap == kernel_pmap && m && m->md.pv_tracked) {
468		/*
469		 * Always pv_insert()/pv_remove() on MPC85XX, in case DPAA is
470		 * used.  This is needed by the NCSW support code for fast
471		 * VA<->PA translation.
472		 */
473		pv_remove(pmap, va, m);
474		if (TAILQ_EMPTY(&m->md.pv_list))
475			m->md.pv_tracked = false;
476	}
477
478	mtx_lock_spin(&tlbivax_mutex);
479	tlb_miss_lock();
480
481	tlb0_flush_entry(va);
482	*pte = 0;
483
484	tlb_miss_unlock();
485	mtx_unlock_spin(&tlbivax_mutex);
486
487	pmap->pm_stats.resident_count--;
488
489	if (flags & PTBL_UNHOLD) {
490		//debugf("pte_remove: e (unhold)\n");
491		return (ptbl_unhold(pmap, pdir_idx));
492	}
493
494	//debugf("pte_remove: e\n");
495	return (0);
496}
497
498/*
499 * Insert PTE for a given page and virtual address.
500 */
501static int
502pte_enter(pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags,
503    boolean_t nosleep)
504{
505	unsigned int pdir_idx = PDIR_IDX(va);
506	unsigned int ptbl_idx = PTBL_IDX(va);
507	pte_t *ptbl, *pte, pte_tmp;
508
509	CTR4(KTR_PMAP, "%s: su = %d pmap = %p va = %p", __func__,
510	    pmap == kernel_pmap, pmap, va);
511
512	/* Get the page table pointer. */
513	ptbl = pmap->pm_pdir[pdir_idx];
514
515	if (ptbl == NULL) {
516		/* Allocate page table pages. */
517		ptbl = ptbl_alloc(pmap, pdir_idx, nosleep);
518		if (ptbl == NULL) {
519			KASSERT(nosleep, ("nosleep and NULL ptbl"));
520			return (ENOMEM);
521		}
522		pmap->pm_pdir[pdir_idx] = ptbl;
523		pte = &ptbl[ptbl_idx];
524	} else {
525		/*
526		 * Check if there is valid mapping for requested
527		 * va, if there is, remove it.
528		 */
529		pte = &pmap->pm_pdir[pdir_idx][ptbl_idx];
530		if (PTE_ISVALID(pte)) {
531			pte_remove(pmap, va, PTBL_HOLD);
532		} else {
533			/*
534			 * pte is not used, increment hold count
535			 * for ptbl pages.
536			 */
537			if (pmap != kernel_pmap)
538				ptbl_hold(pmap, pdir_idx);
539		}
540	}
541
542	/*
543	 * Insert pv_entry into pv_list for mapped page if part of managed
544	 * memory.
545	 */
546	if ((m->oflags & VPO_UNMANAGED) == 0) {
547		flags |= PTE_MANAGED;
548
549		/* Create and insert pv entry. */
550		pv_insert(pmap, va, m);
551	}
552
553	pmap->pm_stats.resident_count++;
554
555	pte_tmp = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m));
556	pte_tmp |= (PTE_VALID | flags | PTE_PS_4KB); /* 4KB pages only */
557
558	mtx_lock_spin(&tlbivax_mutex);
559	tlb_miss_lock();
560
561	tlb0_flush_entry(va);
562	*pte = pte_tmp;
563
564	tlb_miss_unlock();
565	mtx_unlock_spin(&tlbivax_mutex);
566	return (0);
567}
568
569/* Return the pa for the given pmap/va. */
570static vm_paddr_t
571pte_vatopa(pmap_t pmap, vm_offset_t va)
572{
573	vm_paddr_t pa = 0;
574	pte_t *pte;
575
576	pte = pte_find(pmap, va);
577	if ((pte != NULL) && PTE_ISVALID(pte))
578		pa = (PTE_PA(pte) | (va & PTE_PA_MASK));
579	return (pa);
580}
581
582/* Get a pointer to a PTE in a page table. */
583static pte_t *
584pte_find(pmap_t pmap, vm_offset_t va)
585{
586	unsigned int pdir_idx = PDIR_IDX(va);
587	unsigned int ptbl_idx = PTBL_IDX(va);
588
589	KASSERT((pmap != NULL), ("pte_find: invalid pmap"));
590
591	if (pmap->pm_pdir[pdir_idx])
592		return (&(pmap->pm_pdir[pdir_idx][ptbl_idx]));
593
594	return (NULL);
595}
596
597/* Get a pointer to a PTE in a page table, or the next closest (greater) one. */
598static __inline pte_t *
599pte_find_next(pmap_t pmap, vm_offset_t *pva)
600{
601	vm_offset_t	va;
602	pte_t	      **pdir;
603	pte_t	       *pte;
604	unsigned long	i, j;
605
606	KASSERT((pmap != NULL), ("pte_find: invalid pmap"));
607
608	va = *pva;
609	i = PDIR_IDX(va);
610	j = PTBL_IDX(va);
611	pdir = pmap->pm_pdir;
612	for (; i < PDIR_NENTRIES; i++, j = 0) {
613		if (pdir[i] == NULL)
614			continue;
615		for (; j < PTBL_NENTRIES; j++) {
616			pte = &pdir[i][j];
617			if (!PTE_ISVALID(pte))
618				continue;
619			*pva = PDIR_SIZE * i + PAGE_SIZE * j;
620			return (pte);
621		}
622	}
623	return (NULL);
624}
625
626/* Set up kernel page tables. */
627static void
628kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr)
629{
630	pte_t		*pte;
631	vm_offset_t	va;
632	vm_offset_t	pdir_start;
633	int		i;
634
635	kptbl_min = VM_MIN_KERNEL_ADDRESS / PDIR_SIZE;
636	kernel_pmap->pm_pdir = (pte_t **)kernel_ptbl_root;
637
638	pdir_start = kernel_ptbl_root + PDIR_NENTRIES * sizeof(pte_t);
639
640	/* Initialize kernel pdir */
641	for (i = 0; i < kernel_ptbls; i++) {
642		kernel_pmap->pm_pdir[kptbl_min + i] =
643		    (pte_t *)(pdir_start + (i * PAGE_SIZE * PTBL_PAGES));
644	}
645
646	/*
647	 * Fill in PTEs covering kernel code and data. They are not required
648	 * for address translation, as this area is covered by static TLB1
649	 * entries, but for pte_vatopa() to work correctly with kernel area
650	 * addresses.
651	 */
652	for (va = addr; va < data_end; va += PAGE_SIZE) {
653		pte = &(kernel_pmap->pm_pdir[PDIR_IDX(va)][PTBL_IDX(va)]);
654		powerpc_sync();
655		*pte = PTE_RPN_FROM_PA(kernload + (va - kernstart));
656		*pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED |
657		    PTE_VALID | PTE_PS_4KB;
658	}
659}
660
661static vm_offset_t
662mmu_booke_alloc_kernel_pgtables(vm_offset_t data_end)
663{
664	/* Allocate space for ptbl_bufs. */
665	ptbl_bufs = (struct ptbl_buf *)data_end;
666	data_end += sizeof(struct ptbl_buf) * PTBL_BUFS;
667	debugf(" ptbl_bufs at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n",
668	    (uintptr_t)ptbl_bufs, data_end);
669
670	data_end = round_page(data_end);
671
672	kernel_ptbl_root = data_end;
673	data_end += PDIR_NENTRIES * sizeof(pte_t*);
674
675	/* Allocate PTE tables for kernel KVA. */
676	kernel_ptbls = howmany(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
677	    PDIR_SIZE);
678	data_end += kernel_ptbls * PTBL_PAGES * PAGE_SIZE;
679	debugf(" kernel ptbls: %d\n", kernel_ptbls);
680	debugf(" kernel pdir at %#jx end = %#jx\n",
681	    (uintmax_t)kernel_ptbl_root, (uintmax_t)data_end);
682
683	return (data_end);
684}
685
686/*
687 * Initialize a preallocated and zeroed pmap structure,
688 * such as one in a vmspace structure.
689 */
690static int
691mmu_booke_pinit(pmap_t pmap)
692{
693	int i;
694
695	CTR4(KTR_PMAP, "%s: pmap = %p, proc %d '%s'", __func__, pmap,
696	    curthread->td_proc->p_pid, curthread->td_proc->p_comm);
697
698	KASSERT((pmap != kernel_pmap), ("pmap_pinit: initializing kernel_pmap"));
699
700	for (i = 0; i < MAXCPU; i++)
701		pmap->pm_tid[i] = TID_NONE;
702	CPU_ZERO(&kernel_pmap->pm_active);
703	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
704	pmap->pm_pdir = uma_zalloc(ptbl_root_zone, M_WAITOK);
705	bzero(pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES);
706	TAILQ_INIT(&pmap->pm_ptbl_list);
707
708	return (1);
709}
710
711/*
712 * Release any resources held by the given physical map.
713 * Called when a pmap initialized by mmu_booke_pinit is being released.
714 * Should only be called if the map contains no valid mappings.
715 */
716static void
717mmu_booke_release(pmap_t pmap)
718{
719
720	KASSERT(pmap->pm_stats.resident_count == 0,
721	    ("pmap_release: pmap resident count %ld != 0",
722	    pmap->pm_stats.resident_count));
723	uma_zfree(ptbl_root_zone, pmap->pm_pdir);
724}
725
726static void
727mmu_booke_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
728{
729	pte_t *pte;
730	vm_paddr_t pa = 0;
731	int sync_sz, valid;
732	pmap_t pmap;
733	vm_page_t m;
734	vm_offset_t addr;
735	int active;
736
737	rw_wlock(&pvh_global_lock);
738	pmap = PCPU_GET(curpmap);
739	active = (pm == kernel_pmap || pm == pmap) ? 1 : 0;
740	while (sz > 0) {
741		PMAP_LOCK(pm);
742		pte = pte_find(pm, va);
743		valid = (pte != NULL && PTE_ISVALID(pte)) ? 1 : 0;
744		if (valid)
745			pa = PTE_PA(pte);
746		PMAP_UNLOCK(pm);
747		sync_sz = PAGE_SIZE - (va & PAGE_MASK);
748		sync_sz = min(sync_sz, sz);
749		if (valid) {
750			if (!active) {
751				/* Create a mapping in the active pmap. */
752				addr = 0;
753				m = PHYS_TO_VM_PAGE(pa);
754				PMAP_LOCK(pmap);
755				pte_enter(pmap, m, addr,
756				    PTE_SR | PTE_VALID, FALSE);
757				addr += (va & PAGE_MASK);
758				__syncicache((void *)addr, sync_sz);
759				pte_remove(pmap, addr, PTBL_UNHOLD);
760				PMAP_UNLOCK(pmap);
761			} else
762				__syncicache((void *)va, sync_sz);
763		}
764		va += sync_sz;
765		sz -= sync_sz;
766	}
767	rw_wunlock(&pvh_global_lock);
768}
769
770/*
771 * mmu_booke_zero_page_area zeros the specified hardware page by
772 * mapping it into virtual memory and using bzero to clear
773 * its contents.
774 *
775 * off and size must reside within a single page.
776 */
777static void
778mmu_booke_zero_page_area(vm_page_t m, int off, int size)
779{
780	vm_offset_t va;
781
782	/* XXX KASSERT off and size are within a single page? */
783
784	mtx_lock(&zero_page_mutex);
785	va = zero_page_va;
786
787	mmu_booke_kenter(va, VM_PAGE_TO_PHYS(m));
788	bzero((caddr_t)va + off, size);
789	mmu_booke_kremove(va);
790
791	mtx_unlock(&zero_page_mutex);
792}
793
794/*
795 * mmu_booke_zero_page zeros the specified hardware page.
796 */
797static void
798mmu_booke_zero_page(vm_page_t m)
799{
800	vm_offset_t off, va;
801
802	va = zero_page_va;
803	mtx_lock(&zero_page_mutex);
804
805	mmu_booke_kenter(va, VM_PAGE_TO_PHYS(m));
806
807	for (off = 0; off < PAGE_SIZE; off += cacheline_size)
808		__asm __volatile("dcbz 0,%0" :: "r"(va + off));
809
810	mmu_booke_kremove(va);
811
812	mtx_unlock(&zero_page_mutex);
813}
814
815/*
816 * mmu_booke_copy_page copies the specified (machine independent) page by
817 * mapping the page into virtual memory and using memcopy to copy the page,
818 * one machine dependent page at a time.
819 */
820static void
821mmu_booke_copy_page(vm_page_t sm, vm_page_t dm)
822{
823	vm_offset_t sva, dva;
824
825	sva = copy_page_src_va;
826	dva = copy_page_dst_va;
827
828	mtx_lock(&copy_page_mutex);
829	mmu_booke_kenter(sva, VM_PAGE_TO_PHYS(sm));
830	mmu_booke_kenter(dva, VM_PAGE_TO_PHYS(dm));
831
832	memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE);
833
834	mmu_booke_kremove(dva);
835	mmu_booke_kremove(sva);
836	mtx_unlock(&copy_page_mutex);
837}
838
839static inline void
840mmu_booke_copy_pages(vm_page_t *ma, vm_offset_t a_offset,
841    vm_page_t *mb, vm_offset_t b_offset, int xfersize)
842{
843	void *a_cp, *b_cp;
844	vm_offset_t a_pg_offset, b_pg_offset;
845	int cnt;
846
847	mtx_lock(&copy_page_mutex);
848	while (xfersize > 0) {
849		a_pg_offset = a_offset & PAGE_MASK;
850		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
851		mmu_booke_kenter(copy_page_src_va,
852		    VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT]));
853		a_cp = (char *)copy_page_src_va + a_pg_offset;
854		b_pg_offset = b_offset & PAGE_MASK;
855		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
856		mmu_booke_kenter(copy_page_dst_va,
857		    VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT]));
858		b_cp = (char *)copy_page_dst_va + b_pg_offset;
859		bcopy(a_cp, b_cp, cnt);
860		mmu_booke_kremove(copy_page_dst_va);
861		mmu_booke_kremove(copy_page_src_va);
862		a_offset += cnt;
863		b_offset += cnt;
864		xfersize -= cnt;
865	}
866	mtx_unlock(&copy_page_mutex);
867}
868
869static vm_offset_t
870mmu_booke_quick_enter_page(vm_page_t m)
871{
872	vm_paddr_t paddr;
873	vm_offset_t qaddr;
874	uint32_t flags;
875	pte_t *pte;
876
877	paddr = VM_PAGE_TO_PHYS(m);
878
879	flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID;
880	flags |= tlb_calc_wimg(paddr, pmap_page_get_memattr(m)) << PTE_MAS2_SHIFT;
881	flags |= PTE_PS_4KB;
882
883	critical_enter();
884	qaddr = PCPU_GET(qmap_addr);
885
886	pte = pte_find(kernel_pmap, qaddr);
887
888	KASSERT(*pte == 0, ("mmu_booke_quick_enter_page: PTE busy"));
889
890	/*
891	 * XXX: tlbivax is broadcast to other cores, but qaddr should
892 	 * not be present in other TLBs.  Is there a better instruction
893	 * sequence to use? Or just forget it & use mmu_booke_kenter()...
894	 */
895	__asm __volatile("tlbivax 0, %0" :: "r"(qaddr & MAS2_EPN_MASK));
896	__asm __volatile("isync; msync");
897
898	*pte = PTE_RPN_FROM_PA(paddr) | flags;
899
900	/* Flush the real memory from the instruction cache. */
901	if ((flags & (PTE_I | PTE_G)) == 0)
902		__syncicache((void *)qaddr, PAGE_SIZE);
903
904	return (qaddr);
905}
906
907static void
908mmu_booke_quick_remove_page(vm_offset_t addr)
909{
910	pte_t *pte;
911
912	pte = pte_find(kernel_pmap, addr);
913
914	KASSERT(PCPU_GET(qmap_addr) == addr,
915	    ("mmu_booke_quick_remove_page: invalid address"));
916	KASSERT(*pte != 0,
917	    ("mmu_booke_quick_remove_page: PTE not in use"));
918
919	*pte = 0;
920	critical_exit();
921}
922
923/**************************************************************************/
924/* TID handling */
925/**************************************************************************/
926
927/*
928 * Return the largest uint value log such that 2^log <= num.
929 */
930static unsigned long
931ilog2(unsigned long num)
932{
933	long lz;
934
935	__asm ("cntlzw %0, %1" : "=r" (lz) : "r" (num));
936	return (31 - lz);
937}
938
939/*
940 * Invalidate all TLB0 entries which match the given TID. Note this is
941 * dedicated for cases when invalidations should NOT be propagated to other
942 * CPUs.
943 */
944static void
945tid_flush(tlbtid_t tid)
946{
947	register_t msr;
948	uint32_t mas0, mas1, mas2;
949	int entry, way;
950
951	/* Don't evict kernel translations */
952	if (tid == TID_KERNEL)
953		return;
954
955	msr = mfmsr();
956	__asm __volatile("wrteei 0");
957
958	/*
959	 * Newer (e500mc and later) have tlbilx, which doesn't broadcast, so use
960	 * it for PID invalidation.
961	 */
962	switch ((mfpvr() >> 16) & 0xffff) {
963	case FSL_E500mc:
964	case FSL_E5500:
965	case FSL_E6500:
966		mtspr(SPR_MAS6, tid << MAS6_SPID0_SHIFT);
967		/* tlbilxpid */
968		__asm __volatile("isync; .long 0x7c200024; isync; msync");
969		__asm __volatile("wrtee %0" :: "r"(msr));
970		return;
971	}
972
973	for (way = 0; way < TLB0_WAYS; way++)
974		for (entry = 0; entry < TLB0_ENTRIES_PER_WAY; entry++) {
975			mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way);
976			mtspr(SPR_MAS0, mas0);
977
978			mas2 = entry << MAS2_TLB0_ENTRY_IDX_SHIFT;
979			mtspr(SPR_MAS2, mas2);
980
981			__asm __volatile("isync; tlbre");
982
983			mas1 = mfspr(SPR_MAS1);
984
985			if (!(mas1 & MAS1_VALID))
986				continue;
987			if (((mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT) != tid)
988				continue;
989			mas1 &= ~MAS1_VALID;
990			mtspr(SPR_MAS1, mas1);
991			__asm __volatile("isync; tlbwe; isync; msync");
992		}
993	__asm __volatile("wrtee %0" :: "r"(msr));
994}
995