1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 */
47
48#include <sys/cdefs.h>
49__FBSDID("$FreeBSD$");
50
51#include "opt_pmap.h"
52
53#include <sys/param.h>
54#include <sys/efi.h>
55#include <sys/kernel.h>
56#include <sys/ktr.h>
57#include <sys/lock.h>
58#include <sys/mman.h>
59#include <sys/mutex.h>
60#include <sys/proc.h>
61#include <sys/rwlock.h>
62#include <sys/smp.h>
63#include <sys/sysctl.h>
64#include <sys/systm.h>
65
66#include <vm/vm.h>
67#include <vm/vm_param.h>
68#include <vm/vm_page.h>
69#include <vm/vm_map.h>
70#include <vm/vm_object.h>
71#include <vm/vm_pageout.h>
72#include <vm/uma.h>
73
74#include <machine/bootinfo.h>
75#include <machine/md_var.h>
76#include <machine/pal.h>
77
78/*
79 *	Manages physical address maps.
80 *
81 *	Since the information managed by this module is
82 *	also stored by the logical address mapping module,
83 *	this module may throw away valid virtual-to-physical
84 *	mappings at almost any time.  However, invalidations
85 *	of virtual-to-physical mappings must be done as
86 *	requested.
87 *
88 *	In order to cope with hardware architectures which
89 *	make virtual-to-physical map invalidates expensive,
90 *	this module may delay invalidate or reduced protection
91 *	operations until such time as they are actually
92 *	necessary.  This module is given full information as
93 *	to which processors are currently using which maps,
94 *	and to when physical maps must be made correct.
95 */
96
97/*
98 * Following the Linux model, region IDs are allocated in groups of
99 * eight so that a single region ID can be used for as many RRs as we
100 * want by encoding the RR number into the low bits of the ID.
101 *
102 * We reserve region ID 0 for the kernel and allocate the remaining
103 * IDs for user pmaps.
104 *
105 * Region 0-3:	User virtually mapped
106 * Region 4:	PBVM and special mappings
107 * Region 5:	Kernel virtual memory
108 * Region 6:	Direct-mapped uncacheable
109 * Region 7:	Direct-mapped cacheable
110 */
111
112/* XXX move to a header. */
113extern uint64_t ia64_gateway_page[];
114
115#if !defined(DIAGNOSTIC)
116#define PMAP_INLINE __inline
117#else
118#define PMAP_INLINE
119#endif
120
121#ifdef PV_STATS
122#define PV_STAT(x)	do { x ; } while (0)
123#else
124#define PV_STAT(x)	do { } while (0)
125#endif
126
127#define	pmap_accessed(lpte)		((lpte)->pte & PTE_ACCESSED)
128#define	pmap_dirty(lpte)		((lpte)->pte & PTE_DIRTY)
129#define	pmap_exec(lpte)			((lpte)->pte & PTE_AR_RX)
130#define	pmap_managed(lpte)		((lpte)->pte & PTE_MANAGED)
131#define	pmap_ppn(lpte)			((lpte)->pte & PTE_PPN_MASK)
132#define	pmap_present(lpte)		((lpte)->pte & PTE_PRESENT)
133#define	pmap_prot(lpte)			(((lpte)->pte & PTE_PROT_MASK) >> 56)
134#define	pmap_wired(lpte)		((lpte)->pte & PTE_WIRED)
135
136#define	pmap_clear_accessed(lpte)	(lpte)->pte &= ~PTE_ACCESSED
137#define	pmap_clear_dirty(lpte)		(lpte)->pte &= ~PTE_DIRTY
138#define	pmap_clear_present(lpte)	(lpte)->pte &= ~PTE_PRESENT
139#define	pmap_clear_wired(lpte)		(lpte)->pte &= ~PTE_WIRED
140
141#define	pmap_set_wired(lpte)		(lpte)->pte |= PTE_WIRED
142
143/*
144 * Individual PV entries are stored in per-pmap chunks.  This saves
145 * space by eliminating the need to record the pmap within every PV
146 * entry.
147 */
148#if PAGE_SIZE == 8192
149#define	_NPCM	6
150#define	_NPCPV	337
151#define	_NPCS	2
152#elif PAGE_SIZE == 16384
153#define	_NPCM	11
154#define	_NPCPV	677
155#define	_NPCS	1
156#endif
157struct pv_chunk {
158	pmap_t			pc_pmap;
159	TAILQ_ENTRY(pv_chunk)	pc_list;
160	u_long			pc_map[_NPCM];	/* bitmap; 1 = free */
161	TAILQ_ENTRY(pv_chunk)	pc_lru;
162	u_long			pc_spare[_NPCS];
163	struct pv_entry		pc_pventry[_NPCPV];
164};
165
166/*
167 * The VHPT bucket head structure.
168 */
169struct ia64_bucket {
170	uint64_t	chain;
171	struct mtx	mutex;
172	u_int		length;
173};
174
175/*
176 * Statically allocated kernel pmap
177 */
178struct pmap kernel_pmap_store;
179
180vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
181vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
182
183/*
184 * Kernel virtual memory management.
185 */
186static int nkpt;
187extern struct ia64_lpte ***ia64_kptdir;
188
189#define KPTE_DIR0_INDEX(va) \
190	(((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1))
191#define KPTE_DIR1_INDEX(va) \
192	(((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
193#define KPTE_PTE_INDEX(va) \
194	(((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
195#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
196
197vm_offset_t kernel_vm_end;
198
199/* Defaults for ptc.e. */
200static uint64_t pmap_ptc_e_base = 0;
201static uint32_t pmap_ptc_e_count1 = 1;
202static uint32_t pmap_ptc_e_count2 = 1;
203static uint32_t pmap_ptc_e_stride1 = 0;
204static uint32_t pmap_ptc_e_stride2 = 0;
205
206struct mtx pmap_ptc_mutex;
207
208/*
209 * Data for the RID allocator
210 */
211static int pmap_ridcount;
212static int pmap_rididx;
213static int pmap_ridmapsz;
214static int pmap_ridmax;
215static uint64_t *pmap_ridmap;
216struct mtx pmap_ridmutex;
217
218static struct rwlock_padalign pvh_global_lock;
219
220/*
221 * Data for the pv entry allocation mechanism
222 */
223static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
224static int pv_entry_count;
225
226/*
227 * Data for allocating PTEs for user processes.
228 */
229static uma_zone_t ptezone;
230
231/*
232 * Virtual Hash Page Table (VHPT) data.
233 */
234/* SYSCTL_DECL(_machdep); */
235static SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
236
237struct ia64_bucket *pmap_vhpt_bucket;
238
239int pmap_vhpt_nbuckets;
240SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
241    &pmap_vhpt_nbuckets, 0, "");
242
243int pmap_vhpt_log2size = 0;
244TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
245SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
246    &pmap_vhpt_log2size, 0, "");
247
248static int pmap_vhpt_inserts;
249SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
250    &pmap_vhpt_inserts, 0, "");
251
252static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
253SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
254    NULL, 0, pmap_vhpt_population, "I", "");
255
256static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
257
258static void free_pv_chunk(struct pv_chunk *pc);
259static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
260static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
261static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap);
262
263static void	pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
264		    vm_page_t m, vm_prot_t prot);
265static void	pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va);
266static int	pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
267		    vm_offset_t va, pv_entry_t pv, int freepte);
268static int	pmap_remove_vhpt(vm_offset_t va);
269static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
270		    vm_page_t m);
271
272static void
273pmap_initialize_vhpt(vm_offset_t vhpt)
274{
275	struct ia64_lpte *pte;
276	u_int i;
277
278	pte = (struct ia64_lpte *)vhpt;
279	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
280		pte[i].pte = 0;
281		pte[i].itir = 0;
282		pte[i].tag = 1UL << 63; /* Invalid tag */
283		pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
284	}
285}
286
287#ifdef SMP
288vm_offset_t
289pmap_alloc_vhpt(void)
290{
291	vm_offset_t vhpt;
292	vm_page_t m;
293	vm_size_t size;
294
295	size = 1UL << pmap_vhpt_log2size;
296	m = vm_page_alloc_contig(NULL, 0, VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ |
297	    VM_ALLOC_WIRED, atop(size), 0UL, ~0UL, size, 0UL,
298	    VM_MEMATTR_DEFAULT);
299	if (m != NULL) {
300		vhpt = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
301		pmap_initialize_vhpt(vhpt);
302		return (vhpt);
303	}
304	return (0);
305}
306#endif
307
308/*
309 *	Bootstrap the system enough to run with virtual memory.
310 */
311void
312pmap_bootstrap()
313{
314	struct ia64_pal_result res;
315	vm_offset_t base;
316	size_t size;
317	int i, ridbits;
318
319	/*
320	 * Query the PAL Code to find the loop parameters for the
321	 * ptc.e instruction.
322	 */
323	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
324	if (res.pal_status != 0)
325		panic("Can't configure ptc.e parameters");
326	pmap_ptc_e_base = res.pal_result[0];
327	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
328	pmap_ptc_e_count2 = res.pal_result[1];
329	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
330	pmap_ptc_e_stride2 = res.pal_result[2];
331	if (bootverbose)
332		printf("ptc.e base=0x%lx, count1=%u, count2=%u, "
333		       "stride1=0x%x, stride2=0x%x\n",
334		       pmap_ptc_e_base,
335		       pmap_ptc_e_count1,
336		       pmap_ptc_e_count2,
337		       pmap_ptc_e_stride1,
338		       pmap_ptc_e_stride2);
339
340	mtx_init(&pmap_ptc_mutex, "PTC.G mutex", NULL, MTX_SPIN);
341
342	/*
343	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
344	 *
345	 * We currently need at least 19 bits in the RID because PID_MAX
346	 * can only be encoded in 17 bits and we need RIDs for 4 regions
347	 * per process. With PID_MAX equalling 99999 this means that we
348	 * need to be able to encode 399996 (=4*PID_MAX).
349	 * The Itanium processor only has 18 bits and the architected
350	 * minimum is exactly that. So, we cannot use a PID based scheme
351	 * in those cases. Enter pmap_ridmap...
352	 * We should avoid the map when running on a processor that has
353	 * implemented enough bits. This means that we should pass the
354	 * process/thread ID to pmap. This we currently don't do, so we
355	 * use the map anyway. However, we don't want to allocate a map
356	 * that is large enough to cover the range dictated by the number
357	 * of bits in the RID, because that may result in a RID map of
358	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
359	 * The bottomline: we create a 32KB map when the processor only
360	 * implements 18 bits (or when we can't figure it out). Otherwise
361	 * we create a 64KB map.
362	 */
363	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
364	if (res.pal_status != 0) {
365		if (bootverbose)
366			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
367		ridbits = 18; /* guaranteed minimum */
368	} else {
369		ridbits = (res.pal_result[1] >> 8) & 0xff;
370		if (bootverbose)
371			printf("Processor supports %d Region ID bits\n",
372			    ridbits);
373	}
374	if (ridbits > 19)
375		ridbits = 19;
376
377	pmap_ridmax = (1 << ridbits);
378	pmap_ridmapsz = pmap_ridmax / 64;
379	pmap_ridmap = ia64_physmem_alloc(pmap_ridmax / 8, PAGE_SIZE);
380	pmap_ridmap[0] |= 0xff;
381	pmap_rididx = 0;
382	pmap_ridcount = 8;
383	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
384
385	/*
386	 * Allocate some memory for initial kernel 'page tables'.
387	 */
388	ia64_kptdir = ia64_physmem_alloc(PAGE_SIZE, PAGE_SIZE);
389	nkpt = 0;
390	kernel_vm_end = VM_INIT_KERNEL_ADDRESS;
391
392	/*
393	 * Determine a valid (mappable) VHPT size.
394	 */
395	TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
396	if (pmap_vhpt_log2size == 0)
397		pmap_vhpt_log2size = 20;
398	else if (pmap_vhpt_log2size < 16)
399		pmap_vhpt_log2size = 16;
400	else if (pmap_vhpt_log2size > 28)
401		pmap_vhpt_log2size = 28;
402	if (pmap_vhpt_log2size & 1)
403		pmap_vhpt_log2size--;
404
405	size = 1UL << pmap_vhpt_log2size;
406	base = (uintptr_t)ia64_physmem_alloc(size, size);
407	if (base == 0)
408		panic("Unable to allocate VHPT");
409
410	PCPU_SET(md.vhpt, base);
411	if (bootverbose)
412		printf("VHPT: address=%#lx, size=%#lx\n", base, size);
413
414	pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
415	pmap_vhpt_bucket = ia64_physmem_alloc(pmap_vhpt_nbuckets *
416	    sizeof(struct ia64_bucket), PAGE_SIZE);
417	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
418		/* Stolen memory is zeroed. */
419		mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
420		    MTX_NOWITNESS | MTX_SPIN);
421	}
422
423	pmap_initialize_vhpt(base);
424	map_vhpt(base);
425	ia64_set_pta(base + (1 << 8) + (pmap_vhpt_log2size << 2) + 1);
426	ia64_srlz_i();
427
428	virtual_avail = VM_INIT_KERNEL_ADDRESS;
429	virtual_end = VM_MAX_KERNEL_ADDRESS;
430
431	/*
432	 * Initialize the kernel pmap (which is statically allocated).
433	 */
434	PMAP_LOCK_INIT(kernel_pmap);
435	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
436		kernel_pmap->pm_rid[i] = 0;
437	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
438	PCPU_SET(md.current_pmap, kernel_pmap);
439
440 	/*
441	 * Initialize the global pv list lock.
442	 */
443	rw_init(&pvh_global_lock, "pmap pv global");
444
445	/* Region 5 is mapped via the VHPT. */
446	ia64_set_rr(IA64_RR_BASE(5), (5 << 8) | (PAGE_SHIFT << 2) | 1);
447
448	/*
449	 * Clear out any random TLB entries left over from booting.
450	 */
451	pmap_invalidate_all();
452
453	map_gateway_page();
454}
455
456static int
457pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
458{
459	int count, error, i;
460
461	count = 0;
462	for (i = 0; i < pmap_vhpt_nbuckets; i++)
463		count += pmap_vhpt_bucket[i].length;
464
465	error = SYSCTL_OUT(req, &count, sizeof(count));
466	return (error);
467}
468
469vm_offset_t
470pmap_page_to_va(vm_page_t m)
471{
472	vm_paddr_t pa;
473	vm_offset_t va;
474
475	pa = VM_PAGE_TO_PHYS(m);
476	va = (m->md.memattr == VM_MEMATTR_UNCACHEABLE) ? IA64_PHYS_TO_RR6(pa) :
477	    IA64_PHYS_TO_RR7(pa);
478	return (va);
479}
480
481/*
482 *	Initialize a vm_page's machine-dependent fields.
483 */
484void
485pmap_page_init(vm_page_t m)
486{
487
488	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
489
490	TAILQ_INIT(&m->md.pv_list);
491	m->md.memattr = VM_MEMATTR_DEFAULT;
492}
493
494/*
495 *	Initialize the pmap module.
496 *	Called by vm_init, to initialize any structures that the pmap
497 *	system needs to map virtual memory.
498 */
499void
500pmap_init(void)
501{
502
503	CTR1(KTR_PMAP, "%s()", __func__);
504
505	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
506	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
507}
508
509
510/***************************************************
511 * Manipulate TLBs for a pmap
512 ***************************************************/
513
514static void
515pmap_invalidate_page(vm_offset_t va)
516{
517	struct ia64_lpte *pte;
518	struct pcpu *pc;
519	uint64_t tag;
520	u_int vhpt_ofs;
521
522	critical_enter();
523
524	vhpt_ofs = ia64_thash(va) - PCPU_GET(md.vhpt);
525	tag = ia64_ttag(va);
526	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
527		pte = (struct ia64_lpte *)(pc->pc_md.vhpt + vhpt_ofs);
528		atomic_cmpset_64(&pte->tag, tag, 1UL << 63);
529	}
530
531	mtx_lock_spin(&pmap_ptc_mutex);
532
533	ia64_ptc_ga(va, PAGE_SHIFT << 2);
534	ia64_mf();
535	ia64_srlz_i();
536
537	mtx_unlock_spin(&pmap_ptc_mutex);
538
539	ia64_invala();
540
541	critical_exit();
542}
543
544void
545pmap_invalidate_all(void)
546{
547	uint64_t addr;
548	int i, j;
549
550	addr = pmap_ptc_e_base;
551	for (i = 0; i < pmap_ptc_e_count1; i++) {
552		for (j = 0; j < pmap_ptc_e_count2; j++) {
553			ia64_ptc_e(addr);
554			addr += pmap_ptc_e_stride2;
555		}
556		addr += pmap_ptc_e_stride1;
557	}
558	ia64_srlz_i();
559}
560
561static uint32_t
562pmap_allocate_rid(void)
563{
564	uint64_t bit, bits;
565	int rid;
566
567	mtx_lock(&pmap_ridmutex);
568	if (pmap_ridcount == pmap_ridmax)
569		panic("pmap_allocate_rid: All Region IDs used");
570
571	/* Find an index with a free bit. */
572	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
573		pmap_rididx++;
574		if (pmap_rididx == pmap_ridmapsz)
575			pmap_rididx = 0;
576	}
577	rid = pmap_rididx * 64;
578
579	/* Find a free bit. */
580	bit = 1UL;
581	while (bits & bit) {
582		rid++;
583		bit <<= 1;
584	}
585
586	pmap_ridmap[pmap_rididx] |= bit;
587	pmap_ridcount++;
588	mtx_unlock(&pmap_ridmutex);
589
590	return rid;
591}
592
593static void
594pmap_free_rid(uint32_t rid)
595{
596	uint64_t bit;
597	int idx;
598
599	idx = rid / 64;
600	bit = ~(1UL << (rid & 63));
601
602	mtx_lock(&pmap_ridmutex);
603	pmap_ridmap[idx] &= bit;
604	pmap_ridcount--;
605	mtx_unlock(&pmap_ridmutex);
606}
607
608/***************************************************
609 * Page table page management routines.....
610 ***************************************************/
611
612static void
613pmap_pinit_common(pmap_t pmap)
614{
615	int i;
616
617	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
618		pmap->pm_rid[i] = pmap_allocate_rid();
619	TAILQ_INIT(&pmap->pm_pvchunk);
620	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
621}
622
623void
624pmap_pinit0(pmap_t pmap)
625{
626
627	CTR2(KTR_PMAP, "%s(pm=%p)", __func__, pmap);
628
629	PMAP_LOCK_INIT(pmap);
630	pmap_pinit_common(pmap);
631}
632
633/*
634 * Initialize a preallocated and zeroed pmap structure,
635 * such as one in a vmspace structure.
636 */
637int
638pmap_pinit(pmap_t pmap)
639{
640
641	CTR2(KTR_PMAP, "%s(pm=%p)", __func__, pmap);
642
643	pmap_pinit_common(pmap);
644	return (1);
645}
646
647/***************************************************
648 * Pmap allocation/deallocation routines.
649 ***************************************************/
650
651/*
652 * Release any resources held by the given physical map.
653 * Called when a pmap initialized by pmap_pinit is being released.
654 * Should only be called if the map contains no valid mappings.
655 */
656void
657pmap_release(pmap_t pmap)
658{
659	int i;
660
661	CTR2(KTR_PMAP, "%s(pm=%p)", __func__, pmap);
662
663	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
664		if (pmap->pm_rid[i])
665			pmap_free_rid(pmap->pm_rid[i]);
666}
667
668/*
669 * grow the number of kernel page table entries, if needed
670 */
671void
672pmap_growkernel(vm_offset_t addr)
673{
674	struct ia64_lpte **dir1;
675	struct ia64_lpte *leaf;
676	vm_page_t nkpg;
677
678	CTR2(KTR_PMAP, "%s(va=%#lx)", __func__, addr);
679
680	while (kernel_vm_end <= addr) {
681		if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64)
682			panic("%s: out of kernel address space", __func__);
683
684		dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)];
685		if (dir1 == NULL) {
686			nkpg = vm_page_alloc(NULL, nkpt++,
687			    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
688			if (!nkpg)
689				panic("%s: cannot add dir. page", __func__);
690
691			dir1 = (struct ia64_lpte **)pmap_page_to_va(nkpg);
692			bzero(dir1, PAGE_SIZE);
693			ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1;
694		}
695
696		nkpg = vm_page_alloc(NULL, nkpt++,
697		    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
698		if (!nkpg)
699			panic("%s: cannot add PTE page", __func__);
700
701		leaf = (struct ia64_lpte *)pmap_page_to_va(nkpg);
702		bzero(leaf, PAGE_SIZE);
703		dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf;
704
705		kernel_vm_end += PAGE_SIZE * NKPTEPG;
706	}
707}
708
709/***************************************************
710 * page management routines.
711 ***************************************************/
712
713CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
714
715static __inline struct pv_chunk *
716pv_to_chunk(pv_entry_t pv)
717{
718
719	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
720}
721
722#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
723
724#define	PC_FREE_FULL	0xfffffffffffffffful
725#define	PC_FREE_PARTIAL	\
726	((1UL << (_NPCPV - sizeof(u_long) * 8 * (_NPCM - 1))) - 1)
727
728#if PAGE_SIZE == 8192
729static const u_long pc_freemask[_NPCM] = {
730	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
731	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_PARTIAL
732};
733#elif PAGE_SIZE == 16384
734static const u_long pc_freemask[_NPCM] = {
735	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
736	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
737	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
738	PC_FREE_FULL, PC_FREE_PARTIAL
739};
740#endif
741
742static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
743
744SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
745    "Current number of pv entries");
746
747#ifdef PV_STATS
748static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
749
750SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
751    "Current number of pv entry chunks");
752SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
753    "Current number of pv entry chunks allocated");
754SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
755    "Current number of pv entry chunks frees");
756SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
757    "Number of times tried to get a chunk page but failed.");
758
759static long pv_entry_frees, pv_entry_allocs;
760static int pv_entry_spare;
761
762SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
763    "Current number of pv entry frees");
764SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
765    "Current number of pv entry allocs");
766SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
767    "Current number of spare pv entries");
768#endif
769
770/*
771 * We are in a serious low memory condition.  Resort to
772 * drastic measures to free some pages so we can allocate
773 * another pv entry chunk.
774 */
775static vm_page_t
776pmap_pv_reclaim(pmap_t locked_pmap)
777{
778	struct pch newtail;
779	struct pv_chunk *pc;
780	struct ia64_lpte *pte;
781	pmap_t pmap;
782	pv_entry_t pv;
783	vm_offset_t va;
784	vm_page_t m, m_pc;
785	u_long inuse;
786	int bit, field, freed, idx;
787
788	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
789	pmap = NULL;
790	m_pc = NULL;
791	TAILQ_INIT(&newtail);
792	while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL) {
793		TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
794		if (pmap != pc->pc_pmap) {
795			if (pmap != NULL) {
796				if (pmap != locked_pmap) {
797					pmap_switch(locked_pmap);
798					PMAP_UNLOCK(pmap);
799				}
800			}
801			pmap = pc->pc_pmap;
802			/* Avoid deadlock and lock recursion. */
803			if (pmap > locked_pmap)
804				PMAP_LOCK(pmap);
805			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
806				pmap = NULL;
807				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
808				continue;
809			}
810			pmap_switch(pmap);
811		}
812
813		/*
814		 * Destroy every non-wired, 8 KB page mapping in the chunk.
815		 */
816		freed = 0;
817		for (field = 0; field < _NPCM; field++) {
818			for (inuse = ~pc->pc_map[field] & pc_freemask[field];
819			    inuse != 0; inuse &= ~(1UL << bit)) {
820				bit = ffsl(inuse) - 1;
821				idx = field * sizeof(inuse) * NBBY + bit;
822				pv = &pc->pc_pventry[idx];
823				va = pv->pv_va;
824				pte = pmap_find_vhpt(va);
825				KASSERT(pte != NULL, ("pte"));
826				if (pmap_wired(pte))
827					continue;
828				pmap_remove_vhpt(va);
829				pmap_invalidate_page(va);
830				m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
831				if (pmap_accessed(pte))
832					vm_page_aflag_set(m, PGA_REFERENCED);
833				if (pmap_dirty(pte))
834					vm_page_dirty(m);
835				pmap_free_pte(pte, va);
836				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
837				if (TAILQ_EMPTY(&m->md.pv_list))
838					vm_page_aflag_clear(m, PGA_WRITEABLE);
839				pc->pc_map[field] |= 1UL << bit;
840				freed++;
841			}
842		}
843		if (freed == 0) {
844			TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
845			continue;
846		}
847		/* Every freed mapping is for a 8 KB page. */
848		pmap->pm_stats.resident_count -= freed;
849		PV_STAT(pv_entry_frees += freed);
850		PV_STAT(pv_entry_spare += freed);
851		pv_entry_count -= freed;
852		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
853		for (field = 0; field < _NPCM; field++)
854			if (pc->pc_map[field] != pc_freemask[field]) {
855				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
856				    pc_list);
857				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
858
859				/*
860				 * One freed pv entry in locked_pmap is
861				 * sufficient.
862				 */
863				if (pmap == locked_pmap)
864					goto out;
865				break;
866			}
867		if (field == _NPCM) {
868			PV_STAT(pv_entry_spare -= _NPCPV);
869			PV_STAT(pc_chunk_count--);
870			PV_STAT(pc_chunk_frees++);
871			/* Entire chunk is free; return it. */
872			m_pc = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc));
873			break;
874		}
875	}
876out:
877	TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
878	if (pmap != NULL) {
879		if (pmap != locked_pmap) {
880			pmap_switch(locked_pmap);
881			PMAP_UNLOCK(pmap);
882		}
883	}
884	return (m_pc);
885}
886
887/*
888 * free the pv_entry back to the free list
889 */
890static void
891free_pv_entry(pmap_t pmap, pv_entry_t pv)
892{
893	struct pv_chunk *pc;
894	int bit, field, idx;
895
896	rw_assert(&pvh_global_lock, RA_WLOCKED);
897	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
898	PV_STAT(pv_entry_frees++);
899	PV_STAT(pv_entry_spare++);
900	pv_entry_count--;
901	pc = pv_to_chunk(pv);
902	idx = pv - &pc->pc_pventry[0];
903	field = idx / (sizeof(u_long) * NBBY);
904	bit = idx % (sizeof(u_long) * NBBY);
905	pc->pc_map[field] |= 1ul << bit;
906	for (idx = 0; idx < _NPCM; idx++)
907		if (pc->pc_map[idx] != pc_freemask[idx]) {
908			/*
909			 * 98% of the time, pc is already at the head of the
910			 * list.  If it isn't already, move it to the head.
911			 */
912			if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
913			    pc)) {
914				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
915				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
916				    pc_list);
917			}
918			return;
919		}
920	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
921	free_pv_chunk(pc);
922}
923
924static void
925free_pv_chunk(struct pv_chunk *pc)
926{
927	vm_page_t m;
928
929 	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
930	PV_STAT(pv_entry_spare -= _NPCPV);
931	PV_STAT(pc_chunk_count--);
932	PV_STAT(pc_chunk_frees++);
933	/* entire chunk is free, return it */
934	m = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc));
935	vm_page_unwire(m, 0);
936	vm_page_free(m);
937}
938
939/*
940 * get a new pv_entry, allocating a block from the system
941 * when needed.
942 */
943static pv_entry_t
944get_pv_entry(pmap_t pmap, boolean_t try)
945{
946	struct pv_chunk *pc;
947	pv_entry_t pv;
948	vm_page_t m;
949	int bit, field, idx;
950
951	rw_assert(&pvh_global_lock, RA_WLOCKED);
952	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
953	PV_STAT(pv_entry_allocs++);
954	pv_entry_count++;
955retry:
956	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
957	if (pc != NULL) {
958		for (field = 0; field < _NPCM; field++) {
959			if (pc->pc_map[field]) {
960				bit = ffsl(pc->pc_map[field]) - 1;
961				break;
962			}
963		}
964		if (field < _NPCM) {
965			idx = field * sizeof(pc->pc_map[field]) * NBBY + bit;
966			pv = &pc->pc_pventry[idx];
967			pc->pc_map[field] &= ~(1ul << bit);
968			/* If this was the last item, move it to tail */
969			for (field = 0; field < _NPCM; field++)
970				if (pc->pc_map[field] != 0) {
971					PV_STAT(pv_entry_spare--);
972					return (pv);	/* not full, return */
973				}
974			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
975			TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
976			PV_STAT(pv_entry_spare--);
977			return (pv);
978		}
979	}
980	/* No free items, allocate another chunk */
981	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
982	    VM_ALLOC_WIRED);
983	if (m == NULL) {
984		if (try) {
985			pv_entry_count--;
986			PV_STAT(pc_chunk_tryfail++);
987			return (NULL);
988		}
989		m = pmap_pv_reclaim(pmap);
990		if (m == NULL)
991			goto retry;
992	}
993	PV_STAT(pc_chunk_count++);
994	PV_STAT(pc_chunk_allocs++);
995	pc = (struct pv_chunk *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
996	pc->pc_pmap = pmap;
997	pc->pc_map[0] = pc_freemask[0] & ~1ul;	/* preallocated bit 0 */
998	for (field = 1; field < _NPCM; field++)
999		pc->pc_map[field] = pc_freemask[field];
1000	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
1001	pv = &pc->pc_pventry[0];
1002	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1003	PV_STAT(pv_entry_spare += _NPCPV - 1);
1004	return (pv);
1005}
1006
1007/*
1008 * Conditionally create a pv entry.
1009 */
1010static boolean_t
1011pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1012{
1013	pv_entry_t pv;
1014
1015	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1016	rw_assert(&pvh_global_lock, RA_WLOCKED);
1017	if ((pv = get_pv_entry(pmap, TRUE)) != NULL) {
1018		pv->pv_va = va;
1019		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1020		return (TRUE);
1021	} else
1022		return (FALSE);
1023}
1024
1025/*
1026 * Add an ia64_lpte to the VHPT.
1027 */
1028static void
1029pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
1030{
1031	struct ia64_bucket *bckt;
1032	struct ia64_lpte *vhpte;
1033	uint64_t pte_pa;
1034
1035	/* Can fault, so get it out of the way. */
1036	pte_pa = ia64_tpa((vm_offset_t)pte);
1037
1038	vhpte = (struct ia64_lpte *)ia64_thash(va);
1039	bckt = (struct ia64_bucket *)vhpte->chain;
1040
1041	mtx_lock_spin(&bckt->mutex);
1042	pte->chain = bckt->chain;
1043	ia64_mf();
1044	bckt->chain = pte_pa;
1045
1046	pmap_vhpt_inserts++;
1047	bckt->length++;
1048	mtx_unlock_spin(&bckt->mutex);
1049}
1050
1051/*
1052 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
1053 * worked or an appropriate error code otherwise.
1054 */
1055static int
1056pmap_remove_vhpt(vm_offset_t va)
1057{
1058	struct ia64_bucket *bckt;
1059	struct ia64_lpte *pte;
1060	struct ia64_lpte *lpte;
1061	struct ia64_lpte *vhpte;
1062	uint64_t chain, tag;
1063
1064	tag = ia64_ttag(va);
1065	vhpte = (struct ia64_lpte *)ia64_thash(va);
1066	bckt = (struct ia64_bucket *)vhpte->chain;
1067
1068	lpte = NULL;
1069	mtx_lock_spin(&bckt->mutex);
1070	chain = bckt->chain;
1071	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1072	while (chain != 0 && pte->tag != tag) {
1073		lpte = pte;
1074		chain = pte->chain;
1075		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1076	}
1077	if (chain == 0) {
1078		mtx_unlock_spin(&bckt->mutex);
1079		return (ENOENT);
1080	}
1081
1082	/* Snip this pv_entry out of the collision chain. */
1083	if (lpte == NULL)
1084		bckt->chain = pte->chain;
1085	else
1086		lpte->chain = pte->chain;
1087	ia64_mf();
1088
1089	bckt->length--;
1090	mtx_unlock_spin(&bckt->mutex);
1091	return (0);
1092}
1093
1094/*
1095 * Find the ia64_lpte for the given va, if any.
1096 */
1097static struct ia64_lpte *
1098pmap_find_vhpt(vm_offset_t va)
1099{
1100	struct ia64_bucket *bckt;
1101	struct ia64_lpte *pte;
1102	uint64_t chain, tag;
1103
1104	tag = ia64_ttag(va);
1105	pte = (struct ia64_lpte *)ia64_thash(va);
1106	bckt = (struct ia64_bucket *)pte->chain;
1107
1108	mtx_lock_spin(&bckt->mutex);
1109	chain = bckt->chain;
1110	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1111	while (chain != 0 && pte->tag != tag) {
1112		chain = pte->chain;
1113		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1114	}
1115	mtx_unlock_spin(&bckt->mutex);
1116	return ((chain != 0) ? pte : NULL);
1117}
1118
1119/*
1120 * Remove an entry from the list of managed mappings.
1121 */
1122static int
1123pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1124{
1125
1126	rw_assert(&pvh_global_lock, RA_WLOCKED);
1127	if (!pv) {
1128		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1129			if (pmap == PV_PMAP(pv) && va == pv->pv_va)
1130				break;
1131		}
1132	}
1133
1134	if (pv) {
1135		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1136		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1137			vm_page_aflag_clear(m, PGA_WRITEABLE);
1138
1139		free_pv_entry(pmap, pv);
1140		return 0;
1141	} else {
1142		return ENOENT;
1143	}
1144}
1145
1146/*
1147 * Create a pv entry for page at pa for
1148 * (pmap, va).
1149 */
1150static void
1151pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1152{
1153	pv_entry_t pv;
1154
1155	rw_assert(&pvh_global_lock, RA_WLOCKED);
1156	pv = get_pv_entry(pmap, FALSE);
1157	pv->pv_va = va;
1158	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1159}
1160
1161/*
1162 *	Routine:	pmap_extract
1163 *	Function:
1164 *		Extract the physical page address associated
1165 *		with the given map/virtual_address pair.
1166 */
1167vm_paddr_t
1168pmap_extract(pmap_t pmap, vm_offset_t va)
1169{
1170	struct ia64_lpte *pte;
1171	pmap_t oldpmap;
1172	vm_paddr_t pa;
1173
1174	CTR3(KTR_PMAP, "%s(pm=%p, va=%#lx)", __func__, pmap, va);
1175
1176	pa = 0;
1177	PMAP_LOCK(pmap);
1178	oldpmap = pmap_switch(pmap);
1179	pte = pmap_find_vhpt(va);
1180	if (pte != NULL && pmap_present(pte))
1181		pa = pmap_ppn(pte);
1182	pmap_switch(oldpmap);
1183	PMAP_UNLOCK(pmap);
1184	return (pa);
1185}
1186
1187/*
1188 *	Routine:	pmap_extract_and_hold
1189 *	Function:
1190 *		Atomically extract and hold the physical page
1191 *		with the given pmap and virtual address pair
1192 *		if that mapping permits the given protection.
1193 */
1194vm_page_t
1195pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1196{
1197	struct ia64_lpte *pte;
1198	pmap_t oldpmap;
1199	vm_page_t m;
1200	vm_paddr_t pa;
1201
1202	CTR4(KTR_PMAP, "%s(pm=%p, va=%#lx, prot=%#x)", __func__, pmap, va,
1203	    prot);
1204
1205	pa = 0;
1206	m = NULL;
1207	PMAP_LOCK(pmap);
1208	oldpmap = pmap_switch(pmap);
1209retry:
1210	pte = pmap_find_vhpt(va);
1211	if (pte != NULL && pmap_present(pte) &&
1212	    (pmap_prot(pte) & prot) == prot) {
1213		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1214		if (vm_page_pa_tryrelock(pmap, pmap_ppn(pte), &pa))
1215			goto retry;
1216		vm_page_hold(m);
1217	}
1218	PA_UNLOCK_COND(pa);
1219	pmap_switch(oldpmap);
1220	PMAP_UNLOCK(pmap);
1221	return (m);
1222}
1223
1224/***************************************************
1225 * Low level mapping routines.....
1226 ***************************************************/
1227
1228/*
1229 * Find the kernel lpte for mapping the given virtual address, which
1230 * must be in the part of region 5 which we can cover with our kernel
1231 * 'page tables'.
1232 */
1233static struct ia64_lpte *
1234pmap_find_kpte(vm_offset_t va)
1235{
1236	struct ia64_lpte **dir1;
1237	struct ia64_lpte *leaf;
1238
1239	KASSERT((va >> 61) == 5,
1240		("kernel mapping 0x%lx not in region 5", va));
1241	KASSERT(va < kernel_vm_end,
1242		("kernel mapping 0x%lx out of range", va));
1243
1244	dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)];
1245	leaf = dir1[KPTE_DIR1_INDEX(va)];
1246	return (&leaf[KPTE_PTE_INDEX(va)]);
1247}
1248
1249/*
1250 * Find a pte suitable for mapping a user-space address. If one exists
1251 * in the VHPT, that one will be returned, otherwise a new pte is
1252 * allocated.
1253 */
1254static struct ia64_lpte *
1255pmap_find_pte(vm_offset_t va)
1256{
1257	struct ia64_lpte *pte;
1258
1259	if (va >= VM_MAXUSER_ADDRESS)
1260		return pmap_find_kpte(va);
1261
1262	pte = pmap_find_vhpt(va);
1263	if (pte == NULL) {
1264		pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1265		pte->tag = 1UL << 63;
1266	}
1267	return (pte);
1268}
1269
1270/*
1271 * Free a pte which is now unused. This simply returns it to the zone
1272 * allocator if it is a user mapping. For kernel mappings, clear the
1273 * valid bit to make it clear that the mapping is not currently used.
1274 */
1275static void
1276pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1277{
1278	if (va < VM_MAXUSER_ADDRESS)
1279		uma_zfree(ptezone, pte);
1280	else
1281		pmap_clear_present(pte);
1282}
1283
1284static PMAP_INLINE void
1285pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1286{
1287	static long prot2ar[4] = {
1288		PTE_AR_R,		/* VM_PROT_NONE */
1289		PTE_AR_RW,		/* VM_PROT_WRITE */
1290		PTE_AR_RX|PTE_ED,	/* VM_PROT_EXECUTE */
1291		PTE_AR_RWX|PTE_ED	/* VM_PROT_WRITE|VM_PROT_EXECUTE */
1292	};
1293
1294	pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED);
1295	pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1296	pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1297	    ? PTE_PL_KERN : PTE_PL_USER;
1298	pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1299}
1300
1301static PMAP_INLINE void
1302pmap_pte_attr(struct ia64_lpte *pte, vm_memattr_t ma)
1303{
1304
1305	pte->pte &= ~PTE_MA_MASK;
1306	pte->pte |= (ma & PTE_MA_MASK);
1307}
1308
1309/*
1310 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1311 * the pte was orginally valid, then its assumed to already be in the
1312 * VHPT.
1313 * This functions does not set the protection bits.  It's expected
1314 * that those have been set correctly prior to calling this function.
1315 */
1316static void
1317pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1318    boolean_t wired, boolean_t managed)
1319{
1320
1321	pte->pte &= PTE_PROT_MASK | PTE_MA_MASK | PTE_PL_MASK |
1322	    PTE_AR_MASK | PTE_ED;
1323	pte->pte |= PTE_PRESENT;
1324	pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1325	pte->pte |= (wired) ? PTE_WIRED : 0;
1326	pte->pte |= pa & PTE_PPN_MASK;
1327
1328	pte->itir = PAGE_SHIFT << 2;
1329
1330	ia64_mf();
1331
1332	pte->tag = ia64_ttag(va);
1333}
1334
1335/*
1336 * Remove the (possibly managed) mapping represented by pte from the
1337 * given pmap.
1338 */
1339static int
1340pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1341		pv_entry_t pv, int freepte)
1342{
1343	int error;
1344	vm_page_t m;
1345
1346	/*
1347	 * First remove from the VHPT.
1348	 */
1349	error = pmap_remove_vhpt(va);
1350	KASSERT(error == 0, ("%s: pmap_remove_vhpt returned %d",
1351	    __func__, error));
1352
1353	pmap_invalidate_page(va);
1354
1355	if (pmap_wired(pte))
1356		pmap->pm_stats.wired_count -= 1;
1357
1358	pmap->pm_stats.resident_count -= 1;
1359	if (pmap_managed(pte)) {
1360		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1361		if (pmap_dirty(pte))
1362			vm_page_dirty(m);
1363		if (pmap_accessed(pte))
1364			vm_page_aflag_set(m, PGA_REFERENCED);
1365
1366		error = pmap_remove_entry(pmap, m, va, pv);
1367	}
1368	if (freepte)
1369		pmap_free_pte(pte, va);
1370
1371	return (error);
1372}
1373
1374/*
1375 * Extract the physical page address associated with a kernel
1376 * virtual address.
1377 */
1378vm_paddr_t
1379pmap_kextract(vm_offset_t va)
1380{
1381	struct ia64_lpte *pte;
1382	uint64_t *pbvm_pgtbl;
1383	vm_paddr_t pa;
1384	u_int idx;
1385
1386	CTR2(KTR_PMAP, "%s(va=%#lx)", __func__, va);
1387
1388	KASSERT(va >= VM_MAXUSER_ADDRESS, ("Must be kernel VA"));
1389
1390	/* Regions 6 and 7 are direct mapped. */
1391	if (va >= IA64_RR_BASE(6)) {
1392		pa = IA64_RR_MASK(va);
1393		goto out;
1394	}
1395
1396	/* Region 5 is our KVA. Bail out if the VA is beyond our limits. */
1397	if (va >= kernel_vm_end)
1398		goto err_out;
1399	if (va >= VM_INIT_KERNEL_ADDRESS) {
1400		pte = pmap_find_kpte(va);
1401		pa = pmap_present(pte) ? pmap_ppn(pte) | (va & PAGE_MASK) : 0;
1402		goto out;
1403	}
1404
1405	/* The PBVM page table. */
1406	if (va >= IA64_PBVM_PGTBL + bootinfo->bi_pbvm_pgtblsz)
1407		goto err_out;
1408	if (va >= IA64_PBVM_PGTBL) {
1409		pa = (va - IA64_PBVM_PGTBL) + bootinfo->bi_pbvm_pgtbl;
1410		goto out;
1411	}
1412
1413	/* The PBVM itself. */
1414	if (va >= IA64_PBVM_BASE) {
1415		pbvm_pgtbl = (void *)IA64_PBVM_PGTBL;
1416		idx = (va - IA64_PBVM_BASE) >> IA64_PBVM_PAGE_SHIFT;
1417		if (idx >= (bootinfo->bi_pbvm_pgtblsz >> 3))
1418			goto err_out;
1419		if ((pbvm_pgtbl[idx] & PTE_PRESENT) == 0)
1420			goto err_out;
1421		pa = (pbvm_pgtbl[idx] & PTE_PPN_MASK) +
1422		    (va & IA64_PBVM_PAGE_MASK);
1423		goto out;
1424	}
1425
1426 err_out:
1427	printf("XXX: %s: va=%#lx is invalid\n", __func__, va);
1428	pa = 0;
1429	/* FALLTHROUGH */
1430
1431 out:
1432	return (pa);
1433}
1434
1435/*
1436 * Add a list of wired pages to the kva this routine is only used for
1437 * temporary kernel mappings that do not need to have page modification
1438 * or references recorded.  Note that old mappings are simply written
1439 * over.  The page is effectively wired, but it's customary to not have
1440 * the PTE reflect that, nor update statistics.
1441 */
1442void
1443pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1444{
1445	struct ia64_lpte *pte;
1446	int i;
1447
1448	CTR4(KTR_PMAP, "%s(va=%#lx, m_p=%p, cnt=%d)", __func__, va, m, count);
1449
1450	for (i = 0; i < count; i++) {
1451		pte = pmap_find_kpte(va);
1452		if (pmap_present(pte))
1453			pmap_invalidate_page(va);
1454		else
1455			pmap_enter_vhpt(pte, va);
1456		pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1457		pmap_pte_attr(pte, m[i]->md.memattr);
1458		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1459		va += PAGE_SIZE;
1460	}
1461}
1462
1463/*
1464 * this routine jerks page mappings from the
1465 * kernel -- it is meant only for temporary mappings.
1466 */
1467void
1468pmap_qremove(vm_offset_t va, int count)
1469{
1470	struct ia64_lpte *pte;
1471	int i;
1472
1473	CTR3(KTR_PMAP, "%s(va=%#lx, cnt=%d)", __func__, va, count);
1474
1475	for (i = 0; i < count; i++) {
1476		pte = pmap_find_kpte(va);
1477		if (pmap_present(pte)) {
1478			pmap_remove_vhpt(va);
1479			pmap_invalidate_page(va);
1480			pmap_clear_present(pte);
1481		}
1482		va += PAGE_SIZE;
1483	}
1484}
1485
1486/*
1487 * Add a wired page to the kva.  As for pmap_qenter(), it's customary
1488 * to not have the PTE reflect that, nor update statistics.
1489 */
1490void
1491pmap_kenter(vm_offset_t va, vm_paddr_t pa)
1492{
1493	struct ia64_lpte *pte;
1494
1495	CTR3(KTR_PMAP, "%s(va=%#lx, pa=%#lx)", __func__, va, pa);
1496
1497	pte = pmap_find_kpte(va);
1498	if (pmap_present(pte))
1499		pmap_invalidate_page(va);
1500	else
1501		pmap_enter_vhpt(pte, va);
1502	pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1503	pmap_pte_attr(pte, VM_MEMATTR_DEFAULT);
1504	pmap_set_pte(pte, va, pa, FALSE, FALSE);
1505}
1506
1507/*
1508 * Remove a page from the kva
1509 */
1510void
1511pmap_kremove(vm_offset_t va)
1512{
1513	struct ia64_lpte *pte;
1514
1515	CTR2(KTR_PMAP, "%s(va=%#lx)", __func__, va);
1516
1517	pte = pmap_find_kpte(va);
1518	if (pmap_present(pte)) {
1519		pmap_remove_vhpt(va);
1520		pmap_invalidate_page(va);
1521		pmap_clear_present(pte);
1522	}
1523}
1524
1525/*
1526 *	Used to map a range of physical addresses into kernel
1527 *	virtual address space.
1528 *
1529 *	The value passed in '*virt' is a suggested virtual address for
1530 *	the mapping. Architectures which can support a direct-mapped
1531 *	physical to virtual region can return the appropriate address
1532 *	within that region, leaving '*virt' unchanged. Other
1533 *	architectures should map the pages starting at '*virt' and
1534 *	update '*virt' with the first usable address after the mapped
1535 *	region.
1536 */
1537vm_offset_t
1538pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1539{
1540
1541	CTR5(KTR_PMAP, "%s(va_p=%p, sva=%#lx, eva=%#lx, prot=%#x)", __func__,
1542	    virt, start, end, prot);
1543
1544	return IA64_PHYS_TO_RR7(start);
1545}
1546
1547/*
1548 *	Remove the given range of addresses from the specified map.
1549 *
1550 *	It is assumed that the start and end are properly
1551 *	rounded to the page size.
1552 *
1553 *	Sparsely used ranges are inefficiently removed.  The VHPT is
1554 *	probed for every page within the range.  XXX
1555 */
1556void
1557pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1558{
1559	pmap_t oldpmap;
1560	vm_offset_t va;
1561	struct ia64_lpte *pte;
1562
1563	CTR4(KTR_PMAP, "%s(pm=%p, sva=%#lx, eva=%#lx)", __func__, pmap, sva,
1564	    eva);
1565
1566	/*
1567	 * Perform an unsynchronized read.  This is, however, safe.
1568	 */
1569	if (pmap->pm_stats.resident_count == 0)
1570		return;
1571
1572	rw_wlock(&pvh_global_lock);
1573	PMAP_LOCK(pmap);
1574	oldpmap = pmap_switch(pmap);
1575	for (va = sva; va < eva; va += PAGE_SIZE) {
1576		pte = pmap_find_vhpt(va);
1577		if (pte != NULL)
1578			pmap_remove_pte(pmap, pte, va, 0, 1);
1579	}
1580	rw_wunlock(&pvh_global_lock);
1581	pmap_switch(oldpmap);
1582	PMAP_UNLOCK(pmap);
1583}
1584
1585/*
1586 *	Routine:	pmap_remove_all
1587 *	Function:
1588 *		Removes this physical page from
1589 *		all physical maps in which it resides.
1590 *		Reflects back modify bits to the pager.
1591 *
1592 *	Notes:
1593 *		Original versions of this routine were very
1594 *		inefficient because they iteratively called
1595 *		pmap_remove (slow...)
1596 */
1597void
1598pmap_remove_all(vm_page_t m)
1599{
1600	pmap_t oldpmap;
1601	pv_entry_t pv;
1602
1603	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
1604
1605	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1606	    ("pmap_remove_all: page %p is not managed", m));
1607	rw_wlock(&pvh_global_lock);
1608	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1609		struct ia64_lpte *pte;
1610		pmap_t pmap = PV_PMAP(pv);
1611		vm_offset_t va = pv->pv_va;
1612
1613		PMAP_LOCK(pmap);
1614		oldpmap = pmap_switch(pmap);
1615		pte = pmap_find_vhpt(va);
1616		KASSERT(pte != NULL, ("pte"));
1617		if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1618			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1619		pmap_remove_pte(pmap, pte, va, pv, 1);
1620		pmap_switch(oldpmap);
1621		PMAP_UNLOCK(pmap);
1622	}
1623	vm_page_aflag_clear(m, PGA_WRITEABLE);
1624	rw_wunlock(&pvh_global_lock);
1625}
1626
1627/*
1628 *	Set the physical protection on the
1629 *	specified range of this map as requested.
1630 */
1631void
1632pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1633{
1634	pmap_t oldpmap;
1635	struct ia64_lpte *pte;
1636
1637	CTR5(KTR_PMAP, "%s(pm=%p, sva=%#lx, eva=%#lx, prot=%#x)", __func__,
1638	    pmap, sva, eva, prot);
1639
1640	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1641		pmap_remove(pmap, sva, eva);
1642		return;
1643	}
1644
1645	if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
1646	    (VM_PROT_WRITE|VM_PROT_EXECUTE))
1647		return;
1648
1649	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1650		panic("pmap_protect: unaligned addresses");
1651
1652	PMAP_LOCK(pmap);
1653	oldpmap = pmap_switch(pmap);
1654	for ( ; sva < eva; sva += PAGE_SIZE) {
1655		/* If page is invalid, skip this page */
1656		pte = pmap_find_vhpt(sva);
1657		if (pte == NULL)
1658			continue;
1659
1660		/* If there's no change, skip it too */
1661		if (pmap_prot(pte) == prot)
1662			continue;
1663
1664		if ((prot & VM_PROT_WRITE) == 0 &&
1665		    pmap_managed(pte) && pmap_dirty(pte)) {
1666			vm_paddr_t pa = pmap_ppn(pte);
1667			vm_page_t m = PHYS_TO_VM_PAGE(pa);
1668
1669			vm_page_dirty(m);
1670			pmap_clear_dirty(pte);
1671		}
1672
1673		if (prot & VM_PROT_EXECUTE)
1674			ia64_sync_icache(sva, PAGE_SIZE);
1675
1676		pmap_pte_prot(pmap, pte, prot);
1677		pmap_invalidate_page(sva);
1678	}
1679	pmap_switch(oldpmap);
1680	PMAP_UNLOCK(pmap);
1681}
1682
1683/*
1684 *	Insert the given physical page (p) at
1685 *	the specified virtual address (v) in the
1686 *	target physical map with the protection requested.
1687 *
1688 *	If specified, the page will be wired down, meaning
1689 *	that the related pte can not be reclaimed.
1690 *
1691 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1692 *	or lose information.  That is, this routine must actually
1693 *	insert this page into the given map NOW.
1694 */
1695int
1696pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1697    u_int flags, int8_t psind __unused)
1698{
1699	pmap_t oldpmap;
1700	vm_offset_t pa;
1701	vm_offset_t opa;
1702	struct ia64_lpte origpte;
1703	struct ia64_lpte *pte;
1704	boolean_t icache_inval, managed, wired;
1705
1706	CTR5(KTR_PMAP, "pmap_enter(pm=%p, va=%#lx, m=%p, prot=%#x, "
1707	    "flags=%u)", pmap, va, m, prot, flags);
1708
1709	wired = (flags & PMAP_ENTER_WIRED) != 0;
1710	rw_wlock(&pvh_global_lock);
1711	PMAP_LOCK(pmap);
1712	oldpmap = pmap_switch(pmap);
1713
1714	va &= ~PAGE_MASK;
1715 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
1716	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
1717		VM_OBJECT_ASSERT_LOCKED(m->object);
1718
1719	/*
1720	 * Find (or create) a pte for the given mapping.
1721	 */
1722	while ((pte = pmap_find_pte(va)) == NULL) {
1723		pmap_switch(oldpmap);
1724		PMAP_UNLOCK(pmap);
1725		rw_wunlock(&pvh_global_lock);
1726		if ((flags & PMAP_ENTER_NOSLEEP) != 0)
1727			return (KERN_RESOURCE_SHORTAGE);
1728		VM_WAIT;
1729		rw_wlock(&pvh_global_lock);
1730		PMAP_LOCK(pmap);
1731		oldpmap = pmap_switch(pmap);
1732	}
1733	origpte = *pte;
1734	if (!pmap_present(pte)) {
1735		opa = ~0UL;
1736		pmap_enter_vhpt(pte, va);
1737	} else
1738		opa = pmap_ppn(pte);
1739	managed = FALSE;
1740	pa = VM_PAGE_TO_PHYS(m);
1741
1742	icache_inval = (prot & VM_PROT_EXECUTE) ? TRUE : FALSE;
1743
1744	/*
1745	 * Mapping has not changed, must be protection or wiring change.
1746	 */
1747	if (opa == pa) {
1748		/*
1749		 * Wiring change, just update stats. We don't worry about
1750		 * wiring PT pages as they remain resident as long as there
1751		 * are valid mappings in them. Hence, if a user page is wired,
1752		 * the PT page will be also.
1753		 */
1754		if (wired && !pmap_wired(&origpte))
1755			pmap->pm_stats.wired_count++;
1756		else if (!wired && pmap_wired(&origpte))
1757			pmap->pm_stats.wired_count--;
1758
1759		managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1760
1761		/*
1762		 * We might be turning off write access to the page,
1763		 * so we go ahead and sense modify status. Otherwise,
1764		 * we can avoid I-cache invalidation if the page
1765		 * already allowed execution.
1766		 */
1767		if (managed && pmap_dirty(&origpte))
1768			vm_page_dirty(m);
1769		else if (pmap_exec(&origpte))
1770			icache_inval = FALSE;
1771
1772		pmap_invalidate_page(va);
1773		goto validate;
1774	}
1775
1776	/*
1777	 * Mapping has changed, invalidate old range and fall
1778	 * through to handle validating new mapping.
1779	 */
1780	if (opa != ~0UL) {
1781		pmap_remove_pte(pmap, pte, va, 0, 0);
1782		pmap_enter_vhpt(pte, va);
1783	}
1784
1785	/*
1786	 * Enter on the PV list if part of our managed memory.
1787	 */
1788	if ((m->oflags & VPO_UNMANAGED) == 0) {
1789		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1790		    ("pmap_enter: managed mapping within the clean submap"));
1791		pmap_insert_entry(pmap, va, m);
1792		managed = TRUE;
1793	}
1794
1795	/*
1796	 * Increment counters
1797	 */
1798	pmap->pm_stats.resident_count++;
1799	if (wired)
1800		pmap->pm_stats.wired_count++;
1801
1802validate:
1803
1804	/*
1805	 * Now validate mapping with desired protection/wiring. This
1806	 * adds the pte to the VHPT if necessary.
1807	 */
1808	pmap_pte_prot(pmap, pte, prot);
1809	pmap_pte_attr(pte, m->md.memattr);
1810	pmap_set_pte(pte, va, pa, wired, managed);
1811
1812	/* Invalidate the I-cache when needed. */
1813	if (icache_inval)
1814		ia64_sync_icache(va, PAGE_SIZE);
1815
1816	if ((prot & VM_PROT_WRITE) != 0 && managed)
1817		vm_page_aflag_set(m, PGA_WRITEABLE);
1818	rw_wunlock(&pvh_global_lock);
1819	pmap_switch(oldpmap);
1820	PMAP_UNLOCK(pmap);
1821	return (KERN_SUCCESS);
1822}
1823
1824/*
1825 * Maps a sequence of resident pages belonging to the same object.
1826 * The sequence begins with the given page m_start.  This page is
1827 * mapped at the given virtual address start.  Each subsequent page is
1828 * mapped at a virtual address that is offset from start by the same
1829 * amount as the page is offset from m_start within the object.  The
1830 * last page in the sequence is the page with the largest offset from
1831 * m_start that can be mapped at a virtual address less than the given
1832 * virtual address end.  Not every virtual page between start and end
1833 * is mapped; only those for which a resident page exists with the
1834 * corresponding offset from m_start are mapped.
1835 */
1836void
1837pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1838    vm_page_t m_start, vm_prot_t prot)
1839{
1840	pmap_t oldpmap;
1841	vm_page_t m;
1842	vm_pindex_t diff, psize;
1843
1844	CTR6(KTR_PMAP, "%s(pm=%p, sva=%#lx, eva=%#lx, m=%p, prot=%#x)",
1845	    __func__, pmap, start, end, m_start, prot);
1846
1847	VM_OBJECT_ASSERT_LOCKED(m_start->object);
1848
1849	psize = atop(end - start);
1850	m = m_start;
1851	rw_wlock(&pvh_global_lock);
1852	PMAP_LOCK(pmap);
1853	oldpmap = pmap_switch(pmap);
1854	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1855		pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1856		m = TAILQ_NEXT(m, listq);
1857	}
1858	rw_wunlock(&pvh_global_lock);
1859	pmap_switch(oldpmap);
1860 	PMAP_UNLOCK(pmap);
1861}
1862
1863/*
1864 * this code makes some *MAJOR* assumptions:
1865 * 1. Current pmap & pmap exists.
1866 * 2. Not wired.
1867 * 3. Read access.
1868 * 4. No page table pages.
1869 * but is *MUCH* faster than pmap_enter...
1870 */
1871void
1872pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1873{
1874	pmap_t oldpmap;
1875
1876	CTR5(KTR_PMAP, "%s(pm=%p, va=%#lx, m=%p, prot=%#x)", __func__, pmap,
1877	    va, m, prot);
1878
1879	rw_wlock(&pvh_global_lock);
1880	PMAP_LOCK(pmap);
1881	oldpmap = pmap_switch(pmap);
1882	pmap_enter_quick_locked(pmap, va, m, prot);
1883	rw_wunlock(&pvh_global_lock);
1884	pmap_switch(oldpmap);
1885	PMAP_UNLOCK(pmap);
1886}
1887
1888static void
1889pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1890    vm_prot_t prot)
1891{
1892	struct ia64_lpte *pte;
1893	boolean_t managed;
1894
1895	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1896	    (m->oflags & VPO_UNMANAGED) != 0,
1897	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1898	rw_assert(&pvh_global_lock, RA_WLOCKED);
1899	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1900
1901	if ((pte = pmap_find_pte(va)) == NULL)
1902		return;
1903
1904	if (!pmap_present(pte)) {
1905		/* Enter on the PV list if the page is managed. */
1906		if ((m->oflags & VPO_UNMANAGED) == 0) {
1907			if (!pmap_try_insert_pv_entry(pmap, va, m)) {
1908				pmap_free_pte(pte, va);
1909				return;
1910			}
1911			managed = TRUE;
1912		} else
1913			managed = FALSE;
1914
1915		/* Increment counters. */
1916		pmap->pm_stats.resident_count++;
1917
1918		/* Initialise with R/O protection and enter into VHPT. */
1919		pmap_enter_vhpt(pte, va);
1920		pmap_pte_prot(pmap, pte,
1921		    prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1922		pmap_pte_attr(pte, m->md.memattr);
1923		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1924
1925		if (prot & VM_PROT_EXECUTE)
1926			ia64_sync_icache(va, PAGE_SIZE);
1927	}
1928}
1929
1930/*
1931 * pmap_object_init_pt preloads the ptes for a given object
1932 * into the specified pmap.  This eliminates the blast of soft
1933 * faults on process startup and immediately after an mmap.
1934 */
1935void
1936pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
1937    vm_pindex_t pindex, vm_size_t size)
1938{
1939
1940	CTR6(KTR_PMAP, "%s(pm=%p, va=%#lx, obj=%p, idx=%lu, sz=%#lx)",
1941	    __func__, pmap, addr, object, pindex, size);
1942
1943	VM_OBJECT_ASSERT_WLOCKED(object);
1944	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1945	    ("pmap_object_init_pt: non-device object"));
1946}
1947
1948/*
1949 *	Clear the wired attribute from the mappings for the specified range of
1950 *	addresses in the given pmap.  Every valid mapping within that range
1951 *	must have the wired attribute set.  In contrast, invalid mappings
1952 *	cannot have the wired attribute set, so they are ignored.
1953 *
1954 *	The wired attribute of the page table entry is not a hardware feature,
1955 *	so there is no need to invalidate any TLB entries.
1956 */
1957void
1958pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1959{
1960	pmap_t oldpmap;
1961	struct ia64_lpte *pte;
1962
1963	CTR4(KTR_PMAP, "%s(%p, %#x, %#x)", __func__, pmap, sva, eva);
1964
1965	PMAP_LOCK(pmap);
1966	oldpmap = pmap_switch(pmap);
1967	for (; sva < eva; sva += PAGE_SIZE) {
1968		pte = pmap_find_vhpt(sva);
1969		if (pte == NULL)
1970			continue;
1971		if (!pmap_wired(pte))
1972			panic("pmap_unwire: pte %p isn't wired", pte);
1973		pmap->pm_stats.wired_count--;
1974		pmap_clear_wired(pte);
1975	}
1976	pmap_switch(oldpmap);
1977	PMAP_UNLOCK(pmap);
1978}
1979
1980/*
1981 *	Copy the range specified by src_addr/len
1982 *	from the source map to the range dst_addr/len
1983 *	in the destination map.
1984 *
1985 *	This routine is only advisory and need not do anything.
1986 */
1987void
1988pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_va, vm_size_t len,
1989    vm_offset_t src_va)
1990{
1991
1992	CTR6(KTR_PMAP, "%s(dpm=%p, spm=%p, dva=%#lx, sz=%#lx, sva=%#lx)",
1993	    __func__, dst_pmap, src_pmap, dst_va, len, src_va);
1994}
1995
1996/*
1997 *	pmap_zero_page zeros the specified hardware page by
1998 *	mapping it into virtual memory and using bzero to clear
1999 *	its contents.
2000 */
2001void
2002pmap_zero_page(vm_page_t m)
2003{
2004	void *p;
2005
2006	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2007
2008	p = (void *)pmap_page_to_va(m);
2009	bzero(p, PAGE_SIZE);
2010}
2011
2012/*
2013 *	pmap_zero_page_area zeros the specified hardware page by
2014 *	mapping it into virtual memory and using bzero to clear
2015 *	its contents.
2016 *
2017 *	off and size must reside within a single page.
2018 */
2019void
2020pmap_zero_page_area(vm_page_t m, int off, int size)
2021{
2022	char *p;
2023
2024	CTR4(KTR_PMAP, "%s(m=%p, ofs=%d, len=%d)", __func__, m, off, size);
2025
2026	p = (void *)pmap_page_to_va(m);
2027	bzero(p + off, size);
2028}
2029
2030/*
2031 *	pmap_zero_page_idle zeros the specified hardware page by
2032 *	mapping it into virtual memory and using bzero to clear
2033 *	its contents.  This is for the vm_idlezero process.
2034 */
2035void
2036pmap_zero_page_idle(vm_page_t m)
2037{
2038	void *p;
2039
2040	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2041
2042	p = (void *)pmap_page_to_va(m);
2043	bzero(p, PAGE_SIZE);
2044}
2045
2046/*
2047 *	pmap_copy_page copies the specified (machine independent)
2048 *	page by mapping the page into virtual memory and using
2049 *	bcopy to copy the page, one machine dependent page at a
2050 *	time.
2051 */
2052void
2053pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2054{
2055	void *dst, *src;
2056
2057	CTR3(KTR_PMAP, "%s(sm=%p, dm=%p)", __func__, msrc, mdst);
2058
2059	src = (void *)pmap_page_to_va(msrc);
2060	dst = (void *)pmap_page_to_va(mdst);
2061	bcopy(src, dst, PAGE_SIZE);
2062}
2063
2064void
2065pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
2066    vm_offset_t b_offset, int xfersize)
2067{
2068	void *a_cp, *b_cp;
2069	vm_offset_t a_pg_offset, b_pg_offset;
2070	int cnt;
2071
2072	CTR6(KTR_PMAP, "%s(m0=%p, va0=%#lx, m1=%p, va1=%#lx, sz=%#x)",
2073	    __func__, ma, a_offset, mb, b_offset, xfersize);
2074
2075	while (xfersize > 0) {
2076		a_pg_offset = a_offset & PAGE_MASK;
2077		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
2078		a_cp = (char *)pmap_page_to_va(ma[a_offset >> PAGE_SHIFT]) +
2079		    a_pg_offset;
2080		b_pg_offset = b_offset & PAGE_MASK;
2081		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
2082		b_cp = (char *)pmap_page_to_va(mb[b_offset >> PAGE_SHIFT]) +
2083		    b_pg_offset;
2084		bcopy(a_cp, b_cp, cnt);
2085		a_offset += cnt;
2086		b_offset += cnt;
2087		xfersize -= cnt;
2088	}
2089}
2090
2091/*
2092 * Returns true if the pmap's pv is one of the first
2093 * 16 pvs linked to from this page.  This count may
2094 * be changed upwards or downwards in the future; it
2095 * is only necessary that true be returned for a small
2096 * subset of pmaps for proper page aging.
2097 */
2098boolean_t
2099pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2100{
2101	pv_entry_t pv;
2102	int loops = 0;
2103	boolean_t rv;
2104
2105	CTR3(KTR_PMAP, "%s(pm=%p, m=%p)", __func__, pmap, m);
2106
2107	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2108	    ("pmap_page_exists_quick: page %p is not managed", m));
2109	rv = FALSE;
2110	rw_wlock(&pvh_global_lock);
2111	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2112		if (PV_PMAP(pv) == pmap) {
2113			rv = TRUE;
2114			break;
2115		}
2116		loops++;
2117		if (loops >= 16)
2118			break;
2119	}
2120	rw_wunlock(&pvh_global_lock);
2121	return (rv);
2122}
2123
2124/*
2125 *	pmap_page_wired_mappings:
2126 *
2127 *	Return the number of managed mappings to the given physical page
2128 *	that are wired.
2129 */
2130int
2131pmap_page_wired_mappings(vm_page_t m)
2132{
2133	struct ia64_lpte *pte;
2134	pmap_t oldpmap, pmap;
2135	pv_entry_t pv;
2136	int count;
2137
2138	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2139
2140	count = 0;
2141	if ((m->oflags & VPO_UNMANAGED) != 0)
2142		return (count);
2143	rw_wlock(&pvh_global_lock);
2144	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2145		pmap = PV_PMAP(pv);
2146		PMAP_LOCK(pmap);
2147		oldpmap = pmap_switch(pmap);
2148		pte = pmap_find_vhpt(pv->pv_va);
2149		KASSERT(pte != NULL, ("pte"));
2150		if (pmap_wired(pte))
2151			count++;
2152		pmap_switch(oldpmap);
2153		PMAP_UNLOCK(pmap);
2154	}
2155	rw_wunlock(&pvh_global_lock);
2156	return (count);
2157}
2158
2159/*
2160 * Remove all pages from specified address space
2161 * this aids process exit speeds.  Also, this code
2162 * is special cased for current process only, but
2163 * can have the more generic (and slightly slower)
2164 * mode enabled.  This is much faster than pmap_remove
2165 * in the case of running down an entire address space.
2166 */
2167void
2168pmap_remove_pages(pmap_t pmap)
2169{
2170	struct pv_chunk *pc, *npc;
2171	struct ia64_lpte *pte;
2172	pmap_t oldpmap;
2173	pv_entry_t pv;
2174	vm_offset_t va;
2175	vm_page_t m;
2176	u_long inuse, bitmask;
2177	int allfree, bit, field, idx;
2178
2179	CTR2(KTR_PMAP, "%s(pm=%p)", __func__, pmap);
2180
2181	rw_wlock(&pvh_global_lock);
2182	PMAP_LOCK(pmap);
2183	oldpmap = pmap_switch(pmap);
2184	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
2185		allfree = 1;
2186		for (field = 0; field < _NPCM; field++) {
2187			inuse = ~pc->pc_map[field] & pc_freemask[field];
2188			while (inuse != 0) {
2189				bit = ffsl(inuse) - 1;
2190				bitmask = 1UL << bit;
2191				idx = field * sizeof(inuse) * NBBY + bit;
2192				pv = &pc->pc_pventry[idx];
2193				inuse &= ~bitmask;
2194				va = pv->pv_va;
2195				pte = pmap_find_vhpt(va);
2196				KASSERT(pte != NULL, ("pte"));
2197				if (pmap_wired(pte)) {
2198					allfree = 0;
2199					continue;
2200				}
2201				pmap_remove_vhpt(va);
2202				pmap_invalidate_page(va);
2203				m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
2204				if (pmap_dirty(pte))
2205					vm_page_dirty(m);
2206				pmap_free_pte(pte, va);
2207				/* Mark free */
2208				PV_STAT(pv_entry_frees++);
2209				PV_STAT(pv_entry_spare++);
2210				pv_entry_count--;
2211				pc->pc_map[field] |= bitmask;
2212				pmap->pm_stats.resident_count--;
2213				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2214				if (TAILQ_EMPTY(&m->md.pv_list))
2215					vm_page_aflag_clear(m, PGA_WRITEABLE);
2216			}
2217		}
2218		if (allfree) {
2219			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2220			free_pv_chunk(pc);
2221		}
2222	}
2223	pmap_switch(oldpmap);
2224	PMAP_UNLOCK(pmap);
2225	rw_wunlock(&pvh_global_lock);
2226}
2227
2228/*
2229 *	pmap_ts_referenced:
2230 *
2231 *	Return a count of reference bits for a page, clearing those bits.
2232 *	It is not necessary for every reference bit to be cleared, but it
2233 *	is necessary that 0 only be returned when there are truly no
2234 *	reference bits set.
2235 *
2236 *	XXX: The exact number of bits to check and clear is a matter that
2237 *	should be tested and standardized at some point in the future for
2238 *	optimal aging of shared pages.
2239 */
2240int
2241pmap_ts_referenced(vm_page_t m)
2242{
2243	struct ia64_lpte *pte;
2244	pmap_t oldpmap, pmap;
2245	pv_entry_t pv;
2246	int count = 0;
2247
2248	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2249
2250	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2251	    ("pmap_ts_referenced: page %p is not managed", m));
2252	rw_wlock(&pvh_global_lock);
2253	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2254		pmap = PV_PMAP(pv);
2255		PMAP_LOCK(pmap);
2256		oldpmap = pmap_switch(pmap);
2257		pte = pmap_find_vhpt(pv->pv_va);
2258		KASSERT(pte != NULL, ("pte"));
2259		if (pmap_accessed(pte)) {
2260			count++;
2261			pmap_clear_accessed(pte);
2262			pmap_invalidate_page(pv->pv_va);
2263		}
2264		pmap_switch(oldpmap);
2265		PMAP_UNLOCK(pmap);
2266	}
2267	rw_wunlock(&pvh_global_lock);
2268	return (count);
2269}
2270
2271/*
2272 *	pmap_is_modified:
2273 *
2274 *	Return whether or not the specified physical page was modified
2275 *	in any physical maps.
2276 */
2277boolean_t
2278pmap_is_modified(vm_page_t m)
2279{
2280	struct ia64_lpte *pte;
2281	pmap_t oldpmap, pmap;
2282	pv_entry_t pv;
2283	boolean_t rv;
2284
2285	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2286
2287	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2288	    ("pmap_is_modified: page %p is not managed", m));
2289	rv = FALSE;
2290
2291	/*
2292	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2293	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2294	 * is clear, no PTEs can be dirty.
2295	 */
2296	VM_OBJECT_ASSERT_WLOCKED(m->object);
2297	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2298		return (rv);
2299	rw_wlock(&pvh_global_lock);
2300	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2301		pmap = PV_PMAP(pv);
2302		PMAP_LOCK(pmap);
2303		oldpmap = pmap_switch(pmap);
2304		pte = pmap_find_vhpt(pv->pv_va);
2305		pmap_switch(oldpmap);
2306		KASSERT(pte != NULL, ("pte"));
2307		rv = pmap_dirty(pte) ? TRUE : FALSE;
2308		PMAP_UNLOCK(pmap);
2309		if (rv)
2310			break;
2311	}
2312	rw_wunlock(&pvh_global_lock);
2313	return (rv);
2314}
2315
2316/*
2317 *	pmap_is_prefaultable:
2318 *
2319 *	Return whether or not the specified virtual address is elgible
2320 *	for prefault.
2321 */
2322boolean_t
2323pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2324{
2325	struct ia64_lpte *pte;
2326
2327	CTR3(KTR_PMAP, "%s(pm=%p, va=%#lx)", __func__, pmap, addr);
2328
2329	pte = pmap_find_vhpt(addr);
2330	if (pte != NULL && pmap_present(pte))
2331		return (FALSE);
2332	return (TRUE);
2333}
2334
2335/*
2336 *	pmap_is_referenced:
2337 *
2338 *	Return whether or not the specified physical page was referenced
2339 *	in any physical maps.
2340 */
2341boolean_t
2342pmap_is_referenced(vm_page_t m)
2343{
2344	struct ia64_lpte *pte;
2345	pmap_t oldpmap, pmap;
2346	pv_entry_t pv;
2347	boolean_t rv;
2348
2349	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2350
2351	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2352	    ("pmap_is_referenced: page %p is not managed", m));
2353	rv = FALSE;
2354	rw_wlock(&pvh_global_lock);
2355	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2356		pmap = PV_PMAP(pv);
2357		PMAP_LOCK(pmap);
2358		oldpmap = pmap_switch(pmap);
2359		pte = pmap_find_vhpt(pv->pv_va);
2360		pmap_switch(oldpmap);
2361		KASSERT(pte != NULL, ("pte"));
2362		rv = pmap_accessed(pte) ? TRUE : FALSE;
2363		PMAP_UNLOCK(pmap);
2364		if (rv)
2365			break;
2366	}
2367	rw_wunlock(&pvh_global_lock);
2368	return (rv);
2369}
2370
2371/*
2372 *	Apply the given advice to the specified range of addresses within the
2373 *	given pmap.  Depending on the advice, clear the referenced and/or
2374 *	modified flags in each mapping and set the mapped page's dirty field.
2375 */
2376void
2377pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
2378{
2379	struct ia64_lpte *pte;
2380	pmap_t oldpmap;
2381	vm_page_t m;
2382
2383	CTR5(KTR_PMAP, "%s(pm=%p, sva=%#lx, eva=%#lx, adv=%d)", __func__,
2384	    pmap, sva, eva, advice);
2385
2386	PMAP_LOCK(pmap);
2387	oldpmap = pmap_switch(pmap);
2388	for (; sva < eva; sva += PAGE_SIZE) {
2389		/* If page is invalid, skip this page. */
2390		pte = pmap_find_vhpt(sva);
2391		if (pte == NULL)
2392			continue;
2393
2394		/* If it isn't managed, skip it too. */
2395		if (!pmap_managed(pte))
2396			continue;
2397
2398		/* Clear its modified and referenced bits. */
2399		if (pmap_dirty(pte)) {
2400			if (advice == MADV_DONTNEED) {
2401				/*
2402				 * Future calls to pmap_is_modified() can be
2403				 * avoided by making the page dirty now.
2404				 */
2405				m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
2406				vm_page_dirty(m);
2407			}
2408			pmap_clear_dirty(pte);
2409		} else if (!pmap_accessed(pte))
2410			continue;
2411		pmap_clear_accessed(pte);
2412		pmap_invalidate_page(sva);
2413	}
2414	pmap_switch(oldpmap);
2415	PMAP_UNLOCK(pmap);
2416}
2417
2418/*
2419 *	Clear the modify bits on the specified physical page.
2420 */
2421void
2422pmap_clear_modify(vm_page_t m)
2423{
2424	struct ia64_lpte *pte;
2425	pmap_t oldpmap, pmap;
2426	pv_entry_t pv;
2427
2428	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2429
2430	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2431	    ("pmap_clear_modify: page %p is not managed", m));
2432	VM_OBJECT_ASSERT_WLOCKED(m->object);
2433	KASSERT(!vm_page_xbusied(m),
2434	    ("pmap_clear_modify: page %p is exclusive busied", m));
2435
2436	/*
2437	 * If the page is not PGA_WRITEABLE, then no PTEs can be modified.
2438	 * If the object containing the page is locked and the page is not
2439	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
2440	 */
2441	if ((m->aflags & PGA_WRITEABLE) == 0)
2442		return;
2443	rw_wlock(&pvh_global_lock);
2444	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2445		pmap = PV_PMAP(pv);
2446		PMAP_LOCK(pmap);
2447		oldpmap = pmap_switch(pmap);
2448		pte = pmap_find_vhpt(pv->pv_va);
2449		KASSERT(pte != NULL, ("pte"));
2450		if (pmap_dirty(pte)) {
2451			pmap_clear_dirty(pte);
2452			pmap_invalidate_page(pv->pv_va);
2453		}
2454		pmap_switch(oldpmap);
2455		PMAP_UNLOCK(pmap);
2456	}
2457	rw_wunlock(&pvh_global_lock);
2458}
2459
2460/*
2461 * Clear the write and modified bits in each of the given page's mappings.
2462 */
2463void
2464pmap_remove_write(vm_page_t m)
2465{
2466	struct ia64_lpte *pte;
2467	pmap_t oldpmap, pmap;
2468	pv_entry_t pv;
2469	vm_prot_t prot;
2470
2471	CTR2(KTR_PMAP, "%s(m=%p)", __func__, m);
2472
2473	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2474	    ("pmap_remove_write: page %p is not managed", m));
2475
2476	/*
2477	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2478	 * set by another thread while the object is locked.  Thus,
2479	 * if PGA_WRITEABLE is clear, no page table entries need updating.
2480	 */
2481	VM_OBJECT_ASSERT_WLOCKED(m->object);
2482	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2483		return;
2484	rw_wlock(&pvh_global_lock);
2485	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2486		pmap = PV_PMAP(pv);
2487		PMAP_LOCK(pmap);
2488		oldpmap = pmap_switch(pmap);
2489		pte = pmap_find_vhpt(pv->pv_va);
2490		KASSERT(pte != NULL, ("pte"));
2491		prot = pmap_prot(pte);
2492		if ((prot & VM_PROT_WRITE) != 0) {
2493			if (pmap_dirty(pte)) {
2494				vm_page_dirty(m);
2495				pmap_clear_dirty(pte);
2496			}
2497			prot &= ~VM_PROT_WRITE;
2498			pmap_pte_prot(pmap, pte, prot);
2499			pmap_pte_attr(pte, m->md.memattr);
2500			pmap_invalidate_page(pv->pv_va);
2501		}
2502		pmap_switch(oldpmap);
2503		PMAP_UNLOCK(pmap);
2504	}
2505	vm_page_aflag_clear(m, PGA_WRITEABLE);
2506	rw_wunlock(&pvh_global_lock);
2507}
2508
2509vm_offset_t
2510pmap_mapdev_priv(vm_paddr_t pa, vm_size_t sz, vm_memattr_t attr)
2511{
2512	static vm_offset_t last_va = 0;
2513	static vm_paddr_t last_pa = ~0UL;
2514	static vm_size_t last_sz = 0;
2515	struct efi_md *md;
2516
2517	if (pa == last_pa && sz == last_sz)
2518		return (last_va);
2519
2520	md = efi_md_find(pa);
2521	if (md == NULL) {
2522		printf("%s: [%#lx..%#lx] not covered by memory descriptor\n",
2523		    __func__, pa, pa + sz - 1);
2524		return (IA64_PHYS_TO_RR6(pa));
2525	}
2526
2527	if (md->md_type == EFI_MD_TYPE_FREE) {
2528		printf("%s: [%#lx..%#lx] is in DRAM\n", __func__, pa,
2529		    pa + sz - 1);
2530		return (0);
2531	}
2532
2533	last_va = (md->md_attr & EFI_MD_ATTR_WB) ? IA64_PHYS_TO_RR7(pa) :
2534	    IA64_PHYS_TO_RR6(pa);
2535	last_pa = pa;
2536	last_sz = sz;
2537	return (last_va);
2538}
2539
2540/*
2541 * Map a set of physical memory pages into the kernel virtual
2542 * address space. Return a pointer to where it is mapped. This
2543 * routine is intended to be used for mapping device memory,
2544 * NOT real memory.
2545 */
2546void *
2547pmap_mapdev_attr(vm_paddr_t pa, vm_size_t sz, vm_memattr_t attr)
2548{
2549	vm_offset_t va;
2550
2551	CTR4(KTR_PMAP, "%s(pa=%#lx, sz=%#lx, attr=%#x)", __func__, pa, sz,
2552	    attr);
2553
2554	va = pmap_mapdev_priv(pa, sz, attr);
2555	return ((void *)(uintptr_t)va);
2556}
2557
2558/*
2559 * 'Unmap' a range mapped by pmap_mapdev_attr().
2560 */
2561void
2562pmap_unmapdev(vm_offset_t va, vm_size_t size)
2563{
2564
2565	CTR3(KTR_PMAP, "%s(va=%#lx, sz=%#lx)", __func__, va, size);
2566}
2567
2568/*
2569 * Sets the memory attribute for the specified page.
2570 */
2571static void
2572pmap_page_set_memattr_1(void *arg)
2573{
2574	struct ia64_pal_result res;
2575	register_t is;
2576	uintptr_t pp = (uintptr_t)arg;
2577
2578	is = intr_disable();
2579	res = ia64_call_pal_static(pp, 0, 0, 0);
2580	intr_restore(is);
2581}
2582
2583void
2584pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
2585{
2586	struct ia64_lpte *pte;
2587	pmap_t oldpmap, pmap;
2588	pv_entry_t pv;
2589	void *va;
2590
2591	CTR3(KTR_PMAP, "%s(m=%p, attr=%#x)", __func__, m, ma);
2592
2593	rw_wlock(&pvh_global_lock);
2594	m->md.memattr = ma;
2595	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2596		pmap = PV_PMAP(pv);
2597		PMAP_LOCK(pmap);
2598		oldpmap = pmap_switch(pmap);
2599		pte = pmap_find_vhpt(pv->pv_va);
2600		KASSERT(pte != NULL, ("pte"));
2601		pmap_pte_attr(pte, ma);
2602		pmap_invalidate_page(pv->pv_va);
2603		pmap_switch(oldpmap);
2604		PMAP_UNLOCK(pmap);
2605	}
2606	rw_wunlock(&pvh_global_lock);
2607
2608	if (ma == VM_MEMATTR_UNCACHEABLE) {
2609#ifdef SMP
2610		smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL,
2611		    (void *)PAL_PREFETCH_VISIBILITY);
2612#else
2613		pmap_page_set_memattr_1((void *)PAL_PREFETCH_VISIBILITY);
2614#endif
2615		va = (void *)pmap_page_to_va(m);
2616		critical_enter();
2617		cpu_flush_dcache(va, PAGE_SIZE);
2618		critical_exit();
2619#ifdef SMP
2620		smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL,
2621		    (void *)PAL_MC_DRAIN);
2622#else
2623		pmap_page_set_memattr_1((void *)PAL_MC_DRAIN);
2624#endif
2625	}
2626}
2627
2628/*
2629 * perform the pmap work for mincore
2630 */
2631int
2632pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
2633{
2634	pmap_t oldpmap;
2635	struct ia64_lpte *pte, tpte;
2636	vm_paddr_t pa;
2637	int val;
2638
2639	CTR4(KTR_PMAP, "%s(pm=%p, va=%#lx, pa_p=%p)", __func__, pmap, addr,
2640	    locked_pa);
2641
2642	PMAP_LOCK(pmap);
2643retry:
2644	oldpmap = pmap_switch(pmap);
2645	pte = pmap_find_vhpt(addr);
2646	if (pte != NULL) {
2647		tpte = *pte;
2648		pte = &tpte;
2649	}
2650	pmap_switch(oldpmap);
2651	if (pte == NULL || !pmap_present(pte)) {
2652		val = 0;
2653		goto out;
2654	}
2655	val = MINCORE_INCORE;
2656	if (pmap_dirty(pte))
2657		val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
2658	if (pmap_accessed(pte))
2659		val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
2660	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
2661	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
2662	    pmap_managed(pte)) {
2663		pa = pmap_ppn(pte);
2664		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
2665		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
2666			goto retry;
2667	} else
2668out:
2669		PA_UNLOCK_COND(*locked_pa);
2670	PMAP_UNLOCK(pmap);
2671	return (val);
2672}
2673
2674/*
2675 *
2676 */
2677void
2678pmap_activate(struct thread *td)
2679{
2680
2681	CTR2(KTR_PMAP, "%s(td=%p)", __func__, td);
2682
2683	pmap_switch(vmspace_pmap(td->td_proc->p_vmspace));
2684}
2685
2686pmap_t
2687pmap_switch(pmap_t pm)
2688{
2689	pmap_t prevpm;
2690	int i;
2691
2692	critical_enter();
2693	prevpm = PCPU_GET(md.current_pmap);
2694	if (prevpm == pm)
2695		goto out;
2696	if (pm == NULL) {
2697		for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2698			ia64_set_rr(IA64_RR_BASE(i),
2699			    (i << 8)|(PAGE_SHIFT << 2)|1);
2700		}
2701	} else {
2702		for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2703			ia64_set_rr(IA64_RR_BASE(i),
2704			    (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2705		}
2706	}
2707	PCPU_SET(md.current_pmap, pm);
2708	ia64_srlz_d();
2709
2710out:
2711	critical_exit();
2712	return (prevpm);
2713}
2714
2715/*
2716 *
2717 */
2718void
2719pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2720{
2721	pmap_t oldpm;
2722	struct ia64_lpte *pte;
2723	vm_offset_t lim;
2724	vm_size_t len;
2725
2726	CTR4(KTR_PMAP, "%s(pm=%p, va=%#lx, sz=%#lx)", __func__, pm, va, sz);
2727
2728	sz += va & 31;
2729	va &= ~31;
2730	sz = (sz + 31) & ~31;
2731
2732	PMAP_LOCK(pm);
2733	oldpm = pmap_switch(pm);
2734	while (sz > 0) {
2735		lim = round_page(va);
2736		len = MIN(lim - va, sz);
2737		pte = pmap_find_vhpt(va);
2738		if (pte != NULL && pmap_present(pte))
2739			ia64_sync_icache(va, len);
2740		va += len;
2741		sz -= len;
2742	}
2743	pmap_switch(oldpm);
2744	PMAP_UNLOCK(pm);
2745}
2746
2747/*
2748 *	Increase the starting virtual address of the given mapping if a
2749 *	different alignment might result in more superpage mappings.
2750 */
2751void
2752pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2753    vm_offset_t *addr, vm_size_t size)
2754{
2755
2756	CTR5(KTR_PMAP, "%s(obj=%p, ofs=%#lx, va_p=%p, sz=%#lx)", __func__,
2757	    object, offset, addr, size);
2758}
2759
2760#include "opt_ddb.h"
2761
2762#ifdef DDB
2763
2764#include <ddb/ddb.h>
2765
2766static const char*	psnames[] = {
2767	"1B",	"2B",	"4B",	"8B",
2768	"16B",	"32B",	"64B",	"128B",
2769	"256B",	"512B",	"1K",	"2K",
2770	"4K",	"8K",	"16K",	"32K",
2771	"64K",	"128K",	"256K",	"512K",
2772	"1M",	"2M",	"4M",	"8M",
2773	"16M",	"32M",	"64M",	"128M",
2774	"256M",	"512M",	"1G",	"2G"
2775};
2776
2777static void
2778print_trs(int type)
2779{
2780	struct ia64_pal_result res;
2781	int i, maxtr;
2782	struct {
2783		pt_entry_t	pte;
2784		uint64_t	itir;
2785		uint64_t	ifa;
2786		struct ia64_rr	rr;
2787	} buf;
2788	static const char *manames[] = {
2789		"WB",	"bad",	"bad",	"bad",
2790		"UC",	"UCE",	"WC",	"NaT",
2791	};
2792
2793	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2794	if (res.pal_status != 0) {
2795		db_printf("Can't get VM summary\n");
2796		return;
2797	}
2798
2799	if (type == 0)
2800		maxtr = (res.pal_result[0] >> 40) & 0xff;
2801	else
2802		maxtr = (res.pal_result[0] >> 32) & 0xff;
2803
2804	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2805	for (i = 0; i <= maxtr; i++) {
2806		bzero(&buf, sizeof(buf));
2807		res = ia64_pal_physical(PAL_VM_TR_READ, i, type,
2808		    ia64_tpa((uint64_t)&buf));
2809		if (!(res.pal_result[0] & 1))
2810			buf.pte &= ~PTE_AR_MASK;
2811		if (!(res.pal_result[0] & 2))
2812			buf.pte &= ~PTE_PL_MASK;
2813		if (!(res.pal_result[0] & 4))
2814			pmap_clear_dirty(&buf);
2815		if (!(res.pal_result[0] & 8))
2816			buf.pte &= ~PTE_MA_MASK;
2817		db_printf("%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s "
2818		    "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2819		    buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2820		    psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2821		    (buf.pte & PTE_ED) ? 1 : 0,
2822		    (int)(buf.pte & PTE_AR_MASK) >> 9,
2823		    (int)(buf.pte & PTE_PL_MASK) >> 7,
2824		    (pmap_dirty(&buf)) ? 1 : 0,
2825		    (pmap_accessed(&buf)) ? 1 : 0,
2826		    manames[(buf.pte & PTE_MA_MASK) >> 2],
2827		    (pmap_present(&buf)) ? 1 : 0,
2828		    (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2829	}
2830}
2831
2832DB_COMMAND(itr, db_itr)
2833{
2834	print_trs(0);
2835}
2836
2837DB_COMMAND(dtr, db_dtr)
2838{
2839	print_trs(1);
2840}
2841
2842DB_COMMAND(rr, db_rr)
2843{
2844	int i;
2845	uint64_t t;
2846	struct ia64_rr rr;
2847
2848	printf("RR RID    PgSz VE\n");
2849	for (i = 0; i < 8; i++) {
2850		__asm __volatile ("mov %0=rr[%1]"
2851				  : "=r"(t)
2852				  : "r"(IA64_RR_BASE(i)));
2853		*(uint64_t *) &rr = t;
2854		printf("%d  %06x %4s %d\n",
2855		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2856	}
2857}
2858
2859DB_COMMAND(thash, db_thash)
2860{
2861	if (!have_addr)
2862		return;
2863
2864	db_printf("%p\n", (void *) ia64_thash(addr));
2865}
2866
2867DB_COMMAND(ttag, db_ttag)
2868{
2869	if (!have_addr)
2870		return;
2871
2872	db_printf("0x%lx\n", ia64_ttag(addr));
2873}
2874
2875DB_COMMAND(kpte, db_kpte)
2876{
2877	struct ia64_lpte *pte;
2878
2879	if (!have_addr) {
2880		db_printf("usage: kpte <kva>\n");
2881		return;
2882	}
2883	if (addr < VM_INIT_KERNEL_ADDRESS) {
2884		db_printf("kpte: error: invalid <kva>\n");
2885		return;
2886	}
2887	pte = pmap_find_kpte(addr);
2888	db_printf("kpte at %p:\n", pte);
2889	db_printf("  pte  =%016lx\n", pte->pte);
2890	db_printf("  itir =%016lx\n", pte->itir);
2891	db_printf("  tag  =%016lx\n", pte->tag);
2892	db_printf("  chain=%016lx\n", pte->chain);
2893}
2894
2895#endif
2896