pmap.c revision 166631
1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 */
47
48#include <sys/cdefs.h>
49__FBSDID("$FreeBSD: head/sys/ia64/ia64/pmap.c 166631 2007-02-11 02:52:54Z marcel $");
50
51#include <sys/param.h>
52#include <sys/kernel.h>
53#include <sys/lock.h>
54#include <sys/mman.h>
55#include <sys/mutex.h>
56#include <sys/proc.h>
57#include <sys/smp.h>
58#include <sys/sysctl.h>
59#include <sys/systm.h>
60
61#include <vm/vm.h>
62#include <vm/vm_page.h>
63#include <vm/vm_map.h>
64#include <vm/vm_object.h>
65#include <vm/vm_pageout.h>
66#include <vm/uma.h>
67
68#include <machine/md_var.h>
69#include <machine/pal.h>
70
71/*
72 *	Manages physical address maps.
73 *
74 *	In addition to hardware address maps, this
75 *	module is called upon to provide software-use-only
76 *	maps which may or may not be stored in the same
77 *	form as hardware maps.  These pseudo-maps are
78 *	used to store intermediate results from copy
79 *	operations to and from address spaces.
80 *
81 *	Since the information managed by this module is
82 *	also stored by the logical address mapping module,
83 *	this module may throw away valid virtual-to-physical
84 *	mappings at almost any time.  However, invalidations
85 *	of virtual-to-physical mappings must be done as
86 *	requested.
87 *
88 *	In order to cope with hardware architectures which
89 *	make virtual-to-physical map invalidates expensive,
90 *	this module may delay invalidate or reduced protection
91 *	operations until such time as they are actually
92 *	necessary.  This module is given full information as
93 *	to which processors are currently using which maps,
94 *	and to when physical maps must be made correct.
95 */
96
97/*
98 * Following the Linux model, region IDs are allocated in groups of
99 * eight so that a single region ID can be used for as many RRs as we
100 * want by encoding the RR number into the low bits of the ID.
101 *
102 * We reserve region ID 0 for the kernel and allocate the remaining
103 * IDs for user pmaps.
104 *
105 * Region 0..4
106 *	User virtually mapped
107 *
108 * Region 5
109 *	Kernel virtually mapped
110 *
111 * Region 6
112 *	Kernel physically mapped uncacheable
113 *
114 * Region 7
115 *	Kernel physically mapped cacheable
116 */
117
118/* XXX move to a header. */
119extern uint64_t ia64_gateway_page[];
120
121MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
122
123#ifndef PMAP_SHPGPERPROC
124#define PMAP_SHPGPERPROC 200
125#endif
126
127#if !defined(DIAGNOSTIC)
128#define PMAP_INLINE __inline
129#else
130#define PMAP_INLINE
131#endif
132
133#define	pmap_accessed(lpte)		((lpte)->pte & PTE_ACCESSED)
134#define	pmap_dirty(lpte)		((lpte)->pte & PTE_DIRTY)
135#define	pmap_managed(lpte)		((lpte)->pte & PTE_MANAGED)
136#define	pmap_ppn(lpte)			((lpte)->pte & PTE_PPN_MASK)
137#define	pmap_present(lpte)		((lpte)->pte & PTE_PRESENT)
138#define	pmap_prot(lpte)			(((lpte)->pte & PTE_PROT_MASK) >> 56)
139#define	pmap_wired(lpte)		((lpte)->pte & PTE_WIRED)
140
141#define	pmap_clear_accessed(lpte)	(lpte)->pte &= ~PTE_ACCESSED
142#define	pmap_clear_dirty(lpte)		(lpte)->pte &= ~PTE_DIRTY
143#define	pmap_clear_present(lpte)	(lpte)->pte &= ~PTE_PRESENT
144#define	pmap_clear_wired(lpte)		(lpte)->pte &= ~PTE_WIRED
145
146#define	pmap_set_wired(lpte)		(lpte)->pte |= PTE_WIRED
147
148/*
149 * The VHPT bucket head structure.
150 */
151struct ia64_bucket {
152	uint64_t	chain;
153	struct mtx	mutex;
154	u_int		length;
155};
156
157/*
158 * Statically allocated kernel pmap
159 */
160struct pmap kernel_pmap_store;
161
162vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
163vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
164
165/*
166 * Kernel virtual memory management.
167 */
168static int nkpt;
169struct ia64_lpte **ia64_kptdir;
170#define KPTE_DIR_INDEX(va) \
171	((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
172#define KPTE_PTE_INDEX(va) \
173	((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
174#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
175
176vm_offset_t kernel_vm_end;
177
178/* Values for ptc.e. XXX values for SKI. */
179static uint64_t pmap_ptc_e_base = 0x100000000;
180static uint64_t pmap_ptc_e_count1 = 3;
181static uint64_t pmap_ptc_e_count2 = 2;
182static uint64_t pmap_ptc_e_stride1 = 0x2000;
183static uint64_t pmap_ptc_e_stride2 = 0x100000000;
184struct mtx pmap_ptcmutex;
185
186/*
187 * Data for the RID allocator
188 */
189static int pmap_ridcount;
190static int pmap_rididx;
191static int pmap_ridmapsz;
192static int pmap_ridmax;
193static uint64_t *pmap_ridmap;
194struct mtx pmap_ridmutex;
195
196/*
197 * Data for the pv entry allocation mechanism
198 */
199static uma_zone_t pvzone;
200static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
201
202/*
203 * Data for allocating PTEs for user processes.
204 */
205static uma_zone_t ptezone;
206
207/*
208 * Virtual Hash Page Table (VHPT) data.
209 */
210/* SYSCTL_DECL(_machdep); */
211SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
212
213struct ia64_bucket *pmap_vhpt_bucket;
214
215int pmap_vhpt_nbuckets;
216SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
217    &pmap_vhpt_nbuckets, 0, "");
218
219uint64_t pmap_vhpt_base[MAXCPU];
220
221int pmap_vhpt_log2size = 0;
222TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
223SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
224    &pmap_vhpt_log2size, 0, "");
225
226static int pmap_vhpt_inserts;
227SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
228    &pmap_vhpt_inserts, 0, "");
229
230static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
231SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
232    NULL, 0, pmap_vhpt_population, "I", "");
233
234static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
235
236static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
237static pv_entry_t get_pv_entry(pmap_t locked_pmap);
238
239static void	pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
240		    vm_page_t m, vm_prot_t prot);
241static pmap_t	pmap_install(pmap_t);
242static void	pmap_invalidate_all(pmap_t pmap);
243static int	pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
244		    vm_offset_t va, pv_entry_t pv, int freepte);
245static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
246		    vm_page_t m);
247
248vm_offset_t
249pmap_steal_memory(vm_size_t size)
250{
251	vm_size_t bank_size;
252	vm_offset_t pa, va;
253
254	size = round_page(size);
255
256	bank_size = phys_avail[1] - phys_avail[0];
257	while (size > bank_size) {
258		int i;
259		for (i = 0; phys_avail[i+2]; i+= 2) {
260			phys_avail[i] = phys_avail[i+2];
261			phys_avail[i+1] = phys_avail[i+3];
262		}
263		phys_avail[i] = 0;
264		phys_avail[i+1] = 0;
265		if (!phys_avail[0])
266			panic("pmap_steal_memory: out of memory");
267		bank_size = phys_avail[1] - phys_avail[0];
268	}
269
270	pa = phys_avail[0];
271	phys_avail[0] += size;
272
273	va = IA64_PHYS_TO_RR7(pa);
274	bzero((caddr_t) va, size);
275	return va;
276}
277
278/*
279 *	Bootstrap the system enough to run with virtual memory.
280 */
281void
282pmap_bootstrap()
283{
284	struct ia64_pal_result res;
285	struct ia64_lpte *pte;
286	vm_offset_t base, limit;
287	size_t size;
288	int i, j, count, ridbits;
289
290	/*
291	 * Query the PAL Code to find the loop parameters for the
292	 * ptc.e instruction.
293	 */
294	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
295	if (res.pal_status != 0)
296		panic("Can't configure ptc.e parameters");
297	pmap_ptc_e_base = res.pal_result[0];
298	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
299	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
300	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
301	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
302	if (bootverbose)
303		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
304		       "stride1=0x%lx, stride2=0x%lx\n",
305		       pmap_ptc_e_base,
306		       pmap_ptc_e_count1,
307		       pmap_ptc_e_count2,
308		       pmap_ptc_e_stride1,
309		       pmap_ptc_e_stride2);
310	mtx_init(&pmap_ptcmutex, "Global PTC lock", NULL, MTX_SPIN);
311
312	/*
313	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
314	 *
315	 * We currently need at least 19 bits in the RID because PID_MAX
316	 * can only be encoded in 17 bits and we need RIDs for 5 regions
317	 * per process. With PID_MAX equalling 99999 this means that we
318	 * need to be able to encode 499995 (=5*PID_MAX).
319	 * The Itanium processor only has 18 bits and the architected
320	 * minimum is exactly that. So, we cannot use a PID based scheme
321	 * in those cases. Enter pmap_ridmap...
322	 * We should avoid the map when running on a processor that has
323	 * implemented enough bits. This means that we should pass the
324	 * process/thread ID to pmap. This we currently don't do, so we
325	 * use the map anyway. However, we don't want to allocate a map
326	 * that is large enough to cover the range dictated by the number
327	 * of bits in the RID, because that may result in a RID map of
328	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
329	 * The bottomline: we create a 32KB map when the processor only
330	 * implements 18 bits (or when we can't figure it out). Otherwise
331	 * we create a 64KB map.
332	 */
333	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
334	if (res.pal_status != 0) {
335		if (bootverbose)
336			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
337		ridbits = 18; /* guaranteed minimum */
338	} else {
339		ridbits = (res.pal_result[1] >> 8) & 0xff;
340		if (bootverbose)
341			printf("Processor supports %d Region ID bits\n",
342			    ridbits);
343	}
344	if (ridbits > 19)
345		ridbits = 19;
346
347	pmap_ridmax = (1 << ridbits);
348	pmap_ridmapsz = pmap_ridmax / 64;
349	pmap_ridmap = (uint64_t *)pmap_steal_memory(pmap_ridmax / 8);
350	pmap_ridmap[0] |= 0xff;
351	pmap_rididx = 0;
352	pmap_ridcount = 8;
353	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
354
355	/*
356	 * Allocate some memory for initial kernel 'page tables'.
357	 */
358	ia64_kptdir = (void *)pmap_steal_memory(PAGE_SIZE);
359	for (i = 0; i < NKPT; i++) {
360		ia64_kptdir[i] = (void*)pmap_steal_memory(PAGE_SIZE);
361	}
362	nkpt = NKPT;
363	kernel_vm_end = NKPT * PAGE_SIZE * NKPTEPG + VM_MIN_KERNEL_ADDRESS -
364	    VM_GATEWAY_SIZE;
365
366	for (i = 0; phys_avail[i+2]; i+= 2)
367		;
368	count = i+2;
369
370	/*
371	 * Figure out a useful size for the VHPT, based on the size of
372	 * physical memory and try to locate a region which is large
373	 * enough to contain the VHPT (which must be a power of two in
374	 * size and aligned to a natural boundary).
375	 * We silently bump up the VHPT size to the minimum size if the
376	 * user has set the tunable too small. Likewise, the VHPT size
377	 * is silently capped to the maximum allowed.
378	 */
379	TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
380	if (pmap_vhpt_log2size == 0) {
381		pmap_vhpt_log2size = 15;
382		size = 1UL << pmap_vhpt_log2size;
383		while (size < Maxmem * 32) {
384			pmap_vhpt_log2size++;
385			size <<= 1;
386		}
387	} else if (pmap_vhpt_log2size < 15)
388		pmap_vhpt_log2size = 15;
389	if (pmap_vhpt_log2size > 61)
390		pmap_vhpt_log2size = 61;
391
392	pmap_vhpt_base[0] = 0;
393	base = limit = 0;
394	size = 1UL << pmap_vhpt_log2size;
395	while (pmap_vhpt_base[0] == 0) {
396		if (bootverbose)
397			printf("Trying VHPT size 0x%lx\n", size);
398		for (i = 0; i < count; i += 2) {
399			base = (phys_avail[i] + size - 1) & ~(size - 1);
400			limit = base + MAXCPU * size;
401			if (limit <= phys_avail[i+1])
402				/*
403				 * VHPT can fit in this region
404				 */
405				break;
406		}
407		if (!phys_avail[i]) {
408			/* Can't fit, try next smaller size. */
409			pmap_vhpt_log2size--;
410			size >>= 1;
411		} else
412			pmap_vhpt_base[0] = IA64_PHYS_TO_RR7(base);
413	}
414	if (pmap_vhpt_log2size < 15)
415		panic("Can't find space for VHPT");
416
417	if (bootverbose)
418		printf("Putting VHPT at 0x%lx\n", base);
419
420	if (base != phys_avail[i]) {
421		/* Split this region. */
422		if (bootverbose)
423			printf("Splitting [%p-%p]\n", (void *)phys_avail[i],
424			    (void *)phys_avail[i+1]);
425		for (j = count; j > i; j -= 2) {
426			phys_avail[j] = phys_avail[j-2];
427			phys_avail[j+1] = phys_avail[j-2+1];
428		}
429		phys_avail[i+1] = base;
430		phys_avail[i+2] = limit;
431	} else
432		phys_avail[i] = limit;
433
434	pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
435
436	pmap_vhpt_bucket = (void *)pmap_steal_memory(pmap_vhpt_nbuckets *
437	    sizeof(struct ia64_bucket));
438	pte = (struct ia64_lpte *)pmap_vhpt_base[0];
439	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
440		pte[i].pte = 0;
441		pte[i].itir = 0;
442		pte[i].tag = 1UL << 63;	/* Invalid tag */
443		pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
444		/* Stolen memory is zeroed! */
445		mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
446		    MTX_SPIN);
447	}
448
449	for (i = 1; i < MAXCPU; i++) {
450		pmap_vhpt_base[i] = pmap_vhpt_base[i - 1] + size;
451		bcopy((void *)pmap_vhpt_base[i - 1], (void *)pmap_vhpt_base[i],
452		    size);
453	}
454
455	__asm __volatile("mov cr.pta=%0;; srlz.i;;" ::
456	    "r" (pmap_vhpt_base[0] + (1<<8) + (pmap_vhpt_log2size<<2) + 1));
457
458	virtual_avail = VM_MIN_KERNEL_ADDRESS;
459	virtual_end = VM_MAX_KERNEL_ADDRESS;
460
461	/*
462	 * Initialize the kernel pmap (which is statically allocated).
463	 */
464	PMAP_LOCK_INIT(kernel_pmap);
465	for (i = 0; i < 5; i++)
466		kernel_pmap->pm_rid[i] = 0;
467	kernel_pmap->pm_active = 1;
468	TAILQ_INIT(&kernel_pmap->pm_pvlist);
469	PCPU_SET(current_pmap, kernel_pmap);
470
471	/*
472	 * Region 5 is mapped via the vhpt.
473	 */
474	ia64_set_rr(IA64_RR_BASE(5),
475		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
476
477	/*
478	 * Region 6 is direct mapped UC and region 7 is direct mapped
479	 * WC. The details of this is controlled by the Alt {I,D}TLB
480	 * handlers. Here we just make sure that they have the largest
481	 * possible page size to minimise TLB usage.
482	 */
483	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (IA64_ID_PAGE_SHIFT << 2));
484	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (IA64_ID_PAGE_SHIFT << 2));
485
486	/*
487	 * Clear out any random TLB entries left over from booting.
488	 */
489	pmap_invalidate_all(kernel_pmap);
490
491	map_gateway_page();
492}
493
494static int
495pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
496{
497	int count, error, i;
498
499	count = 0;
500	for (i = 0; i < pmap_vhpt_nbuckets; i++)
501		count += pmap_vhpt_bucket[i].length;
502
503	error = SYSCTL_OUT(req, &count, sizeof(count));
504	return (error);
505}
506
507/*
508 *	Initialize a vm_page's machine-dependent fields.
509 */
510void
511pmap_page_init(vm_page_t m)
512{
513
514	TAILQ_INIT(&m->md.pv_list);
515	m->md.pv_list_count = 0;
516}
517
518/*
519 *	Initialize the pmap module.
520 *	Called by vm_init, to initialize any structures that the pmap
521 *	system needs to map virtual memory.
522 */
523void
524pmap_init(void)
525{
526	int shpgperproc = PMAP_SHPGPERPROC;
527
528	/*
529	 * Initialize the address space (zone) for the pv entries.  Set a
530	 * high water mark so that the system can recover from excessive
531	 * numbers of pv entries.
532	 */
533	pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
534	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
535	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
536	pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
537	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
538	pv_entry_high_water = 9 * (pv_entry_max / 10);
539
540	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
541	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
542}
543
544
545/***************************************************
546 * Manipulate TLBs for a pmap
547 ***************************************************/
548
549#if 0
550static __inline void
551pmap_invalidate_page_locally(void *arg)
552{
553	vm_offset_t va = (uintptr_t)arg;
554	struct ia64_lpte *pte;
555
556	pte = (struct ia64_lpte *)ia64_thash(va);
557	if (pte->tag == ia64_ttag(va))
558		pte->tag = 1UL << 63;
559	ia64_ptc_l(va, PAGE_SHIFT << 2);
560}
561
562#ifdef SMP
563static void
564pmap_invalidate_page_1(void *arg)
565{
566	void **args = arg;
567	pmap_t oldpmap;
568
569	critical_enter();
570	oldpmap = pmap_install(args[0]);
571	pmap_invalidate_page_locally(args[1]);
572	pmap_install(oldpmap);
573	critical_exit();
574}
575#endif
576
577static void
578pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
579{
580
581	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
582		("invalidating TLB for non-current pmap"));
583
584#ifdef SMP
585	if (mp_ncpus > 1) {
586		void *args[2];
587		args[0] = pmap;
588		args[1] = (void *)va;
589		smp_rendezvous(NULL, pmap_invalidate_page_1, NULL, args);
590	} else
591#endif
592	pmap_invalidate_page_locally((void *)va);
593}
594#endif /* 0 */
595
596static void
597pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
598{
599	struct ia64_lpte *pte;
600	int i, vhpt_ofs;
601
602	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
603		("invalidating TLB for non-current pmap"));
604
605	vhpt_ofs = ia64_thash(va) - pmap_vhpt_base[PCPU_GET(cpuid)];
606	critical_enter();
607	for (i = 0; i < MAXCPU; i++) {
608		pte = (struct ia64_lpte *)(pmap_vhpt_base[i] + vhpt_ofs);
609		if (pte->tag == ia64_ttag(va))
610			pte->tag = 1UL << 63;
611	}
612	critical_exit();
613	mtx_lock_spin(&pmap_ptcmutex);
614	ia64_ptc_ga(va, PAGE_SHIFT << 2);
615	mtx_unlock_spin(&pmap_ptcmutex);
616}
617
618static void
619pmap_invalidate_all_1(void *arg)
620{
621	uint64_t addr;
622	int i, j;
623
624	critical_enter();
625	addr = pmap_ptc_e_base;
626	for (i = 0; i < pmap_ptc_e_count1; i++) {
627		for (j = 0; j < pmap_ptc_e_count2; j++) {
628			ia64_ptc_e(addr);
629			addr += pmap_ptc_e_stride2;
630		}
631		addr += pmap_ptc_e_stride1;
632	}
633	critical_exit();
634}
635
636static void
637pmap_invalidate_all(pmap_t pmap)
638{
639
640	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
641		("invalidating TLB for non-current pmap"));
642
643#ifdef SMP
644	if (mp_ncpus > 1)
645		smp_rendezvous(NULL, pmap_invalidate_all_1, NULL, NULL);
646	else
647#endif
648	pmap_invalidate_all_1(NULL);
649}
650
651static uint32_t
652pmap_allocate_rid(void)
653{
654	uint64_t bit, bits;
655	int rid;
656
657	mtx_lock(&pmap_ridmutex);
658	if (pmap_ridcount == pmap_ridmax)
659		panic("pmap_allocate_rid: All Region IDs used");
660
661	/* Find an index with a free bit. */
662	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
663		pmap_rididx++;
664		if (pmap_rididx == pmap_ridmapsz)
665			pmap_rididx = 0;
666	}
667	rid = pmap_rididx * 64;
668
669	/* Find a free bit. */
670	bit = 1UL;
671	while (bits & bit) {
672		rid++;
673		bit <<= 1;
674	}
675
676	pmap_ridmap[pmap_rididx] |= bit;
677	pmap_ridcount++;
678	mtx_unlock(&pmap_ridmutex);
679
680	return rid;
681}
682
683static void
684pmap_free_rid(uint32_t rid)
685{
686	uint64_t bit;
687	int idx;
688
689	idx = rid / 64;
690	bit = ~(1UL << (rid & 63));
691
692	mtx_lock(&pmap_ridmutex);
693	pmap_ridmap[idx] &= bit;
694	pmap_ridcount--;
695	mtx_unlock(&pmap_ridmutex);
696}
697
698/***************************************************
699 * Page table page management routines.....
700 ***************************************************/
701
702void
703pmap_pinit0(struct pmap *pmap)
704{
705	/* kernel_pmap is the same as any other pmap. */
706	pmap_pinit(pmap);
707}
708
709/*
710 * Initialize a preallocated and zeroed pmap structure,
711 * such as one in a vmspace structure.
712 */
713void
714pmap_pinit(struct pmap *pmap)
715{
716	int i;
717
718	PMAP_LOCK_INIT(pmap);
719	for (i = 0; i < 5; i++)
720		pmap->pm_rid[i] = pmap_allocate_rid();
721	pmap->pm_active = 0;
722	TAILQ_INIT(&pmap->pm_pvlist);
723	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
724}
725
726/***************************************************
727 * Pmap allocation/deallocation routines.
728 ***************************************************/
729
730/*
731 * Release any resources held by the given physical map.
732 * Called when a pmap initialized by pmap_pinit is being released.
733 * Should only be called if the map contains no valid mappings.
734 */
735void
736pmap_release(pmap_t pmap)
737{
738	int i;
739
740	for (i = 0; i < 5; i++)
741		if (pmap->pm_rid[i])
742			pmap_free_rid(pmap->pm_rid[i]);
743	PMAP_LOCK_DESTROY(pmap);
744}
745
746/*
747 * grow the number of kernel page table entries, if needed
748 */
749void
750pmap_growkernel(vm_offset_t addr)
751{
752	struct ia64_lpte *ptepage;
753	vm_page_t nkpg;
754
755	while (kernel_vm_end < addr) {
756		/* We could handle more by increasing the size of kptdir. */
757		if (nkpt == MAXKPT)
758			panic("pmap_growkernel: out of kernel address space");
759
760		nkpg = vm_page_alloc(NULL, nkpt,
761		    VM_ALLOC_NOOBJ | VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
762		if (!nkpg)
763			panic("pmap_growkernel: no memory to grow kernel");
764
765		ptepage = (struct ia64_lpte *)
766		    IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
767		bzero(ptepage, PAGE_SIZE);
768		ia64_kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage;
769
770		nkpt++;
771		kernel_vm_end += PAGE_SIZE * NKPTEPG;
772	}
773}
774
775/***************************************************
776 * page management routines.
777 ***************************************************/
778
779/*
780 * free the pv_entry back to the free list
781 */
782static PMAP_INLINE void
783free_pv_entry(pv_entry_t pv)
784{
785	pv_entry_count--;
786	uma_zfree(pvzone, pv);
787}
788
789/*
790 * get a new pv_entry, allocating a block from the system
791 * when needed.
792 */
793static pv_entry_t
794get_pv_entry(pmap_t locked_pmap)
795{
796	static const struct timeval printinterval = { 60, 0 };
797	static struct timeval lastprint;
798	struct vpgqueues *vpq;
799	struct ia64_lpte *pte;
800	pmap_t oldpmap, pmap;
801	pv_entry_t allocated_pv, next_pv, pv;
802	vm_offset_t va;
803	vm_page_t m;
804
805	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
806	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
807	allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
808	if (allocated_pv != NULL) {
809		pv_entry_count++;
810		if (pv_entry_count > pv_entry_high_water)
811			pagedaemon_wakeup();
812		else
813			return (allocated_pv);
814	}
815
816	/*
817	 * Reclaim pv entries: At first, destroy mappings to inactive
818	 * pages.  After that, if a pv entry is still needed, destroy
819	 * mappings to active pages.
820	 */
821	if (ratecheck(&lastprint, &printinterval))
822		printf("Approaching the limit on PV entries, "
823		    "increase the vm.pmap.shpgperproc tunable.\n");
824	vpq = &vm_page_queues[PQ_INACTIVE];
825retry:
826	TAILQ_FOREACH(m, &vpq->pl, pageq) {
827		if (m->hold_count || m->busy)
828			continue;
829		TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
830			va = pv->pv_va;
831			pmap = pv->pv_pmap;
832			/* Avoid deadlock and lock recursion. */
833			if (pmap > locked_pmap)
834				PMAP_LOCK(pmap);
835			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
836				continue;
837			oldpmap = pmap_install(pmap);
838			pte = pmap_find_vhpt(va);
839			KASSERT(pte != NULL, ("pte"));
840			pmap_remove_pte(pmap, pte, va, pv, 1);
841			pmap_install(oldpmap);
842			if (pmap != locked_pmap)
843				PMAP_UNLOCK(pmap);
844			if (allocated_pv == NULL)
845				allocated_pv = pv;
846			else
847				free_pv_entry(pv);
848		}
849	}
850	if (allocated_pv == NULL) {
851		if (vpq == &vm_page_queues[PQ_INACTIVE]) {
852			vpq = &vm_page_queues[PQ_ACTIVE];
853			goto retry;
854		}
855		panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
856	}
857	return (allocated_pv);
858}
859
860/*
861 * Conditionally create a pv entry.
862 */
863static boolean_t
864pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
865{
866	pv_entry_t pv;
867
868	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
869	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
870	if (pv_entry_count < pv_entry_high_water &&
871	    (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) {
872		pv_entry_count++;
873		pv->pv_va = va;
874		pv->pv_pmap = pmap;
875		TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
876		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
877		m->md.pv_list_count++;
878		return (TRUE);
879	} else
880		return (FALSE);
881}
882
883/*
884 * Add an ia64_lpte to the VHPT.
885 */
886static void
887pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
888{
889	struct ia64_bucket *bckt;
890	struct ia64_lpte *vhpte;
891	uint64_t pte_pa;
892
893	/* Can fault, so get it out of the way. */
894	pte_pa = ia64_tpa((vm_offset_t)pte);
895
896	vhpte = (struct ia64_lpte *)ia64_thash(va);
897	bckt = (struct ia64_bucket *)vhpte->chain;
898
899	mtx_lock_spin(&bckt->mutex);
900	pte->chain = bckt->chain;
901	ia64_mf();
902	bckt->chain = pte_pa;
903
904	pmap_vhpt_inserts++;
905	bckt->length++;
906	mtx_unlock_spin(&bckt->mutex);
907}
908
909/*
910 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
911 * worked or an appropriate error code otherwise.
912 */
913static int
914pmap_remove_vhpt(vm_offset_t va)
915{
916	struct ia64_bucket *bckt;
917	struct ia64_lpte *pte;
918	struct ia64_lpte *lpte;
919	struct ia64_lpte *vhpte;
920	uint64_t chain, tag;
921
922	tag = ia64_ttag(va);
923	vhpte = (struct ia64_lpte *)ia64_thash(va);
924	bckt = (struct ia64_bucket *)vhpte->chain;
925
926	lpte = NULL;
927	mtx_lock_spin(&bckt->mutex);
928	chain = bckt->chain;
929	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
930	while (chain != 0 && pte->tag != tag) {
931		lpte = pte;
932		chain = pte->chain;
933		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
934	}
935	if (chain == 0) {
936		mtx_unlock_spin(&bckt->mutex);
937		return (ENOENT);
938	}
939
940	/* Snip this pv_entry out of the collision chain. */
941	if (lpte == NULL)
942		bckt->chain = pte->chain;
943	else
944		lpte->chain = pte->chain;
945	ia64_mf();
946
947	bckt->length--;
948	mtx_unlock_spin(&bckt->mutex);
949	return (0);
950}
951
952/*
953 * Find the ia64_lpte for the given va, if any.
954 */
955static struct ia64_lpte *
956pmap_find_vhpt(vm_offset_t va)
957{
958	struct ia64_bucket *bckt;
959	struct ia64_lpte *pte;
960	uint64_t chain, tag;
961
962	tag = ia64_ttag(va);
963	pte = (struct ia64_lpte *)ia64_thash(va);
964	bckt = (struct ia64_bucket *)pte->chain;
965
966	mtx_lock_spin(&bckt->mutex);
967	chain = bckt->chain;
968	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
969	while (chain != 0 && pte->tag != tag) {
970		chain = pte->chain;
971		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
972	}
973	mtx_unlock_spin(&bckt->mutex);
974	return ((chain != 0) ? pte : NULL);
975}
976
977/*
978 * Remove an entry from the list of managed mappings.
979 */
980static int
981pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
982{
983	if (!pv) {
984		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
985			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
986				if (pmap == pv->pv_pmap && va == pv->pv_va)
987					break;
988			}
989		} else {
990			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
991				if (va == pv->pv_va)
992					break;
993			}
994		}
995	}
996
997	if (pv) {
998		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
999		m->md.pv_list_count--;
1000		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1001			vm_page_flag_clear(m, PG_WRITEABLE);
1002
1003		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1004		free_pv_entry(pv);
1005		return 0;
1006	} else {
1007		return ENOENT;
1008	}
1009}
1010
1011/*
1012 * Create a pv entry for page at pa for
1013 * (pmap, va).
1014 */
1015static void
1016pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1017{
1018	pv_entry_t pv;
1019
1020	pv = get_pv_entry(pmap);
1021	pv->pv_pmap = pmap;
1022	pv->pv_va = va;
1023
1024	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1025	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1026	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1027	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1028	m->md.pv_list_count++;
1029}
1030
1031/*
1032 *	Routine:	pmap_extract
1033 *	Function:
1034 *		Extract the physical page address associated
1035 *		with the given map/virtual_address pair.
1036 */
1037vm_paddr_t
1038pmap_extract(pmap_t pmap, vm_offset_t va)
1039{
1040	struct ia64_lpte *pte;
1041	pmap_t oldpmap;
1042	vm_paddr_t pa;
1043
1044	pa = 0;
1045	PMAP_LOCK(pmap);
1046	oldpmap = pmap_install(pmap);
1047	pte = pmap_find_vhpt(va);
1048	if (pte != NULL && pmap_present(pte))
1049		pa = pmap_ppn(pte);
1050	pmap_install(oldpmap);
1051	PMAP_UNLOCK(pmap);
1052	return (pa);
1053}
1054
1055/*
1056 *	Routine:	pmap_extract_and_hold
1057 *	Function:
1058 *		Atomically extract and hold the physical page
1059 *		with the given pmap and virtual address pair
1060 *		if that mapping permits the given protection.
1061 */
1062vm_page_t
1063pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1064{
1065	struct ia64_lpte *pte;
1066	pmap_t oldpmap;
1067	vm_page_t m;
1068
1069	m = NULL;
1070	vm_page_lock_queues();
1071	PMAP_LOCK(pmap);
1072	oldpmap = pmap_install(pmap);
1073	pte = pmap_find_vhpt(va);
1074	if (pte != NULL && pmap_present(pte) &&
1075	    (pmap_prot(pte) & prot) == prot) {
1076		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1077		vm_page_hold(m);
1078	}
1079	vm_page_unlock_queues();
1080	pmap_install(oldpmap);
1081	PMAP_UNLOCK(pmap);
1082	return (m);
1083}
1084
1085/***************************************************
1086 * Low level mapping routines.....
1087 ***************************************************/
1088
1089/*
1090 * Find the kernel lpte for mapping the given virtual address, which
1091 * must be in the part of region 5 which we can cover with our kernel
1092 * 'page tables'.
1093 */
1094static struct ia64_lpte *
1095pmap_find_kpte(vm_offset_t va)
1096{
1097	KASSERT((va >> 61) == 5,
1098		("kernel mapping 0x%lx not in region 5", va));
1099	KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG),
1100		("kernel mapping 0x%lx out of range", va));
1101	return (&ia64_kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)]);
1102}
1103
1104/*
1105 * Find a pte suitable for mapping a user-space address. If one exists
1106 * in the VHPT, that one will be returned, otherwise a new pte is
1107 * allocated.
1108 */
1109static struct ia64_lpte *
1110pmap_find_pte(vm_offset_t va)
1111{
1112	struct ia64_lpte *pte;
1113
1114	if (va >= VM_MAXUSER_ADDRESS)
1115		return pmap_find_kpte(va);
1116
1117	pte = pmap_find_vhpt(va);
1118	if (pte == NULL) {
1119		pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1120		pte->tag = 1UL << 63;
1121	}
1122	return (pte);
1123}
1124
1125/*
1126 * Free a pte which is now unused. This simply returns it to the zone
1127 * allocator if it is a user mapping. For kernel mappings, clear the
1128 * valid bit to make it clear that the mapping is not currently used.
1129 */
1130static void
1131pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1132{
1133	if (va < VM_MAXUSER_ADDRESS)
1134		uma_zfree(ptezone, pte);
1135	else
1136		pmap_clear_present(pte);
1137}
1138
1139static PMAP_INLINE void
1140pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1141{
1142	static int prot2ar[4] = {
1143		PTE_AR_R,	/* VM_PROT_NONE */
1144		PTE_AR_RW,	/* VM_PROT_WRITE */
1145		PTE_AR_RX,	/* VM_PROT_EXECUTE */
1146		PTE_AR_RWX	/* VM_PROT_WRITE|VM_PROT_EXECUTE */
1147	};
1148
1149	pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK);
1150	pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1151	pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1152	    ? PTE_PL_KERN : PTE_PL_USER;
1153	pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1154}
1155
1156/*
1157 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1158 * the pte was orginally valid, then its assumed to already be in the
1159 * VHPT.
1160 * This functions does not set the protection bits.  It's expected
1161 * that those have been set correctly prior to calling this function.
1162 */
1163static void
1164pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1165    boolean_t wired, boolean_t managed)
1166{
1167
1168	pte->pte &= PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK;
1169	pte->pte |= PTE_PRESENT | PTE_MA_WB;
1170	pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1171	pte->pte |= (wired) ? PTE_WIRED : 0;
1172	pte->pte |= pa & PTE_PPN_MASK;
1173
1174	pte->itir = PAGE_SHIFT << 2;
1175
1176	pte->tag = ia64_ttag(va);
1177}
1178
1179/*
1180 * Remove the (possibly managed) mapping represented by pte from the
1181 * given pmap.
1182 */
1183static int
1184pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1185		pv_entry_t pv, int freepte)
1186{
1187	int error;
1188	vm_page_t m;
1189
1190	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1191		("removing pte for non-current pmap"));
1192
1193	/*
1194	 * First remove from the VHPT.
1195	 */
1196	error = pmap_remove_vhpt(va);
1197	if (error)
1198		return (error);
1199
1200	pmap_invalidate_page(pmap, va);
1201
1202	if (pmap_wired(pte))
1203		pmap->pm_stats.wired_count -= 1;
1204
1205	pmap->pm_stats.resident_count -= 1;
1206	if (pmap_managed(pte)) {
1207		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1208		if (pmap_dirty(pte))
1209			vm_page_dirty(m);
1210		if (pmap_accessed(pte))
1211			vm_page_flag_set(m, PG_REFERENCED);
1212
1213		error = pmap_remove_entry(pmap, m, va, pv);
1214	}
1215	if (freepte)
1216		pmap_free_pte(pte, va);
1217
1218	return (error);
1219}
1220
1221/*
1222 * Extract the physical page address associated with a kernel
1223 * virtual address.
1224 */
1225vm_paddr_t
1226pmap_kextract(vm_offset_t va)
1227{
1228	struct ia64_lpte *pte;
1229	vm_offset_t gwpage;
1230
1231	KASSERT(va >= IA64_RR_BASE(5), ("Must be kernel VA"));
1232
1233	/* Regions 6 and 7 are direct mapped. */
1234	if (va >= IA64_RR_BASE(6))
1235		return (IA64_RR_MASK(va));
1236
1237	/* EPC gateway page? */
1238	gwpage = (vm_offset_t)ia64_get_k5();
1239	if (va >= gwpage && va < gwpage + VM_GATEWAY_SIZE)
1240		return (IA64_RR_MASK((vm_offset_t)ia64_gateway_page));
1241
1242	/* Bail out if the virtual address is beyond our limits. */
1243	if (IA64_RR_MASK(va) >= nkpt * PAGE_SIZE * NKPTEPG)
1244		return (0);
1245
1246	pte = pmap_find_kpte(va);
1247	if (!pmap_present(pte))
1248		return (0);
1249	return (pmap_ppn(pte) | (va & PAGE_MASK));
1250}
1251
1252/*
1253 * Add a list of wired pages to the kva this routine is only used for
1254 * temporary kernel mappings that do not need to have page modification
1255 * or references recorded.  Note that old mappings are simply written
1256 * over.  The page is effectively wired, but it's customary to not have
1257 * the PTE reflect that, nor update statistics.
1258 */
1259void
1260pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1261{
1262	struct ia64_lpte *pte;
1263	int i;
1264
1265	for (i = 0; i < count; i++) {
1266		pte = pmap_find_kpte(va);
1267		if (pmap_present(pte))
1268			pmap_invalidate_page(kernel_pmap, va);
1269		else
1270			pmap_enter_vhpt(pte, va);
1271		pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1272		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1273		va += PAGE_SIZE;
1274	}
1275}
1276
1277/*
1278 * this routine jerks page mappings from the
1279 * kernel -- it is meant only for temporary mappings.
1280 */
1281void
1282pmap_qremove(vm_offset_t va, int count)
1283{
1284	struct ia64_lpte *pte;
1285	int i;
1286
1287	for (i = 0; i < count; i++) {
1288		pte = pmap_find_kpte(va);
1289		if (pmap_present(pte)) {
1290			pmap_remove_vhpt(va);
1291			pmap_invalidate_page(kernel_pmap, va);
1292			pmap_clear_present(pte);
1293		}
1294		va += PAGE_SIZE;
1295	}
1296}
1297
1298/*
1299 * Add a wired page to the kva.  As for pmap_qenter(), it's customary
1300 * to not have the PTE reflect that, nor update statistics.
1301 */
1302void
1303pmap_kenter(vm_offset_t va, vm_offset_t pa)
1304{
1305	struct ia64_lpte *pte;
1306
1307	pte = pmap_find_kpte(va);
1308	if (pmap_present(pte))
1309		pmap_invalidate_page(kernel_pmap, va);
1310	else
1311		pmap_enter_vhpt(pte, va);
1312	pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1313	pmap_set_pte(pte, va, pa, FALSE, FALSE);
1314}
1315
1316/*
1317 * Remove a page from the kva
1318 */
1319void
1320pmap_kremove(vm_offset_t va)
1321{
1322	struct ia64_lpte *pte;
1323
1324	pte = pmap_find_kpte(va);
1325	if (pmap_present(pte)) {
1326		pmap_remove_vhpt(va);
1327		pmap_invalidate_page(kernel_pmap, va);
1328		pmap_clear_present(pte);
1329	}
1330}
1331
1332/*
1333 *	Used to map a range of physical addresses into kernel
1334 *	virtual address space.
1335 *
1336 *	The value passed in '*virt' is a suggested virtual address for
1337 *	the mapping. Architectures which can support a direct-mapped
1338 *	physical to virtual region can return the appropriate address
1339 *	within that region, leaving '*virt' unchanged. Other
1340 *	architectures should map the pages starting at '*virt' and
1341 *	update '*virt' with the first usable address after the mapped
1342 *	region.
1343 */
1344vm_offset_t
1345pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1346{
1347	return IA64_PHYS_TO_RR7(start);
1348}
1349
1350/*
1351 * Remove a single page from a process address space
1352 */
1353static void
1354pmap_remove_page(pmap_t pmap, vm_offset_t va)
1355{
1356	struct ia64_lpte *pte;
1357
1358	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1359		("removing page for non-current pmap"));
1360
1361	pte = pmap_find_vhpt(va);
1362	if (pte != NULL)
1363		pmap_remove_pte(pmap, pte, va, 0, 1);
1364	return;
1365}
1366
1367/*
1368 *	Remove the given range of addresses from the specified map.
1369 *
1370 *	It is assumed that the start and end are properly
1371 *	rounded to the page size.
1372 */
1373void
1374pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1375{
1376	pmap_t oldpmap;
1377	vm_offset_t va;
1378	pv_entry_t npv, pv;
1379	struct ia64_lpte *pte;
1380
1381	if (pmap->pm_stats.resident_count == 0)
1382		return;
1383
1384	vm_page_lock_queues();
1385	PMAP_LOCK(pmap);
1386	oldpmap = pmap_install(pmap);
1387
1388	/*
1389	 * special handling of removing one page.  a very
1390	 * common operation and easy to short circuit some
1391	 * code.
1392	 */
1393	if (sva + PAGE_SIZE == eva) {
1394		pmap_remove_page(pmap, sva);
1395		goto out;
1396	}
1397
1398	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1399		TAILQ_FOREACH_SAFE(pv, &pmap->pm_pvlist, pv_plist, npv) {
1400			va = pv->pv_va;
1401			if (va >= sva && va < eva) {
1402				pte = pmap_find_vhpt(va);
1403				KASSERT(pte != NULL, ("pte"));
1404				pmap_remove_pte(pmap, pte, va, pv, 1);
1405			}
1406		}
1407
1408	} else {
1409		for (va = sva; va < eva; va = va += PAGE_SIZE) {
1410			pte = pmap_find_vhpt(va);
1411			if (pte != NULL)
1412				pmap_remove_pte(pmap, pte, va, 0, 1);
1413		}
1414	}
1415out:
1416	vm_page_unlock_queues();
1417	pmap_install(oldpmap);
1418	PMAP_UNLOCK(pmap);
1419}
1420
1421/*
1422 *	Routine:	pmap_remove_all
1423 *	Function:
1424 *		Removes this physical page from
1425 *		all physical maps in which it resides.
1426 *		Reflects back modify bits to the pager.
1427 *
1428 *	Notes:
1429 *		Original versions of this routine were very
1430 *		inefficient because they iteratively called
1431 *		pmap_remove (slow...)
1432 */
1433
1434void
1435pmap_remove_all(vm_page_t m)
1436{
1437	pmap_t oldpmap;
1438	pv_entry_t pv;
1439
1440#if defined(DIAGNOSTIC)
1441	/*
1442	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1443	 * pages!
1444	 */
1445	if (m->flags & PG_FICTITIOUS) {
1446		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1447	}
1448#endif
1449	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1450	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1451		struct ia64_lpte *pte;
1452		pmap_t pmap = pv->pv_pmap;
1453		vm_offset_t va = pv->pv_va;
1454
1455		PMAP_LOCK(pmap);
1456		oldpmap = pmap_install(pmap);
1457		pte = pmap_find_vhpt(va);
1458		KASSERT(pte != NULL, ("pte"));
1459		if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1460			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1461		pmap_remove_pte(pmap, pte, va, pv, 1);
1462		pmap_install(oldpmap);
1463		PMAP_UNLOCK(pmap);
1464	}
1465	vm_page_flag_clear(m, PG_WRITEABLE);
1466}
1467
1468/*
1469 *	Set the physical protection on the
1470 *	specified range of this map as requested.
1471 */
1472void
1473pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1474{
1475	pmap_t oldpmap;
1476	struct ia64_lpte *pte;
1477
1478	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1479		pmap_remove(pmap, sva, eva);
1480		return;
1481	}
1482
1483	if (prot & VM_PROT_WRITE)
1484		return;
1485
1486	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1487		panic("pmap_protect: unaligned addresses");
1488
1489	vm_page_lock_queues();
1490	PMAP_LOCK(pmap);
1491	oldpmap = pmap_install(pmap);
1492	while (sva < eva) {
1493		/*
1494		 * If page is invalid, skip this page
1495		 */
1496		pte = pmap_find_vhpt(sva);
1497		if (pte == NULL) {
1498			sva += PAGE_SIZE;
1499			continue;
1500		}
1501
1502		if (pmap_prot(pte) != prot) {
1503			if (pmap_managed(pte)) {
1504				vm_offset_t pa = pmap_ppn(pte);
1505				vm_page_t m = PHYS_TO_VM_PAGE(pa);
1506				if (pmap_dirty(pte)) {
1507					vm_page_dirty(m);
1508					pmap_clear_dirty(pte);
1509				}
1510				if (pmap_accessed(pte)) {
1511					vm_page_flag_set(m, PG_REFERENCED);
1512					pmap_clear_accessed(pte);
1513				}
1514			}
1515			pmap_pte_prot(pmap, pte, prot);
1516			pmap_invalidate_page(pmap, sva);
1517		}
1518
1519		sva += PAGE_SIZE;
1520	}
1521	vm_page_unlock_queues();
1522	pmap_install(oldpmap);
1523	PMAP_UNLOCK(pmap);
1524}
1525
1526/*
1527 *	Insert the given physical page (p) at
1528 *	the specified virtual address (v) in the
1529 *	target physical map with the protection requested.
1530 *
1531 *	If specified, the page will be wired down, meaning
1532 *	that the related pte can not be reclaimed.
1533 *
1534 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1535 *	or lose information.  That is, this routine must actually
1536 *	insert this page into the given map NOW.
1537 */
1538void
1539pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1540    boolean_t wired)
1541{
1542	pmap_t oldpmap;
1543	vm_offset_t pa;
1544	vm_offset_t opa;
1545	struct ia64_lpte origpte;
1546	struct ia64_lpte *pte;
1547	boolean_t managed;
1548
1549	vm_page_lock_queues();
1550	PMAP_LOCK(pmap);
1551	oldpmap = pmap_install(pmap);
1552
1553	va &= ~PAGE_MASK;
1554#ifdef DIAGNOSTIC
1555	if (va > VM_MAX_KERNEL_ADDRESS)
1556		panic("pmap_enter: toobig");
1557#endif
1558
1559	/*
1560	 * Find (or create) a pte for the given mapping.
1561	 */
1562	while ((pte = pmap_find_pte(va)) == NULL) {
1563		pmap_install(oldpmap);
1564		PMAP_UNLOCK(pmap);
1565		vm_page_unlock_queues();
1566		VM_WAIT;
1567		vm_page_lock_queues();
1568		PMAP_LOCK(pmap);
1569		oldpmap = pmap_install(pmap);
1570	}
1571	origpte = *pte;
1572	if (!pmap_present(pte)) {
1573		opa = ~0UL;
1574		pmap_enter_vhpt(pte, va);
1575	} else
1576		opa = pmap_ppn(pte);
1577	managed = FALSE;
1578	pa = VM_PAGE_TO_PHYS(m);
1579
1580	/*
1581	 * Mapping has not changed, must be protection or wiring change.
1582	 */
1583	if (opa == pa) {
1584		/*
1585		 * Wiring change, just update stats. We don't worry about
1586		 * wiring PT pages as they remain resident as long as there
1587		 * are valid mappings in them. Hence, if a user page is wired,
1588		 * the PT page will be also.
1589		 */
1590		if (wired && !pmap_wired(&origpte))
1591			pmap->pm_stats.wired_count++;
1592		else if (!wired && pmap_wired(&origpte))
1593			pmap->pm_stats.wired_count--;
1594
1595		managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1596
1597		/*
1598		 * We might be turning off write access to the page,
1599		 * so we go ahead and sense modify status.
1600		 */
1601		if (managed && pmap_dirty(&origpte))
1602			vm_page_dirty(m);
1603
1604		pmap_invalidate_page(pmap, va);
1605		goto validate;
1606	}
1607
1608	/*
1609	 * Mapping has changed, invalidate old range and fall
1610	 * through to handle validating new mapping.
1611	 */
1612	if (opa != ~0UL) {
1613		pmap_remove_pte(pmap, pte, va, 0, 0);
1614		pmap_enter_vhpt(pte, va);
1615	}
1616
1617	/*
1618	 * Enter on the PV list if part of our managed memory.
1619	 */
1620	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1621		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1622		    ("pmap_enter: managed mapping within the clean submap"));
1623		pmap_insert_entry(pmap, va, m);
1624		managed = TRUE;
1625	}
1626
1627	/*
1628	 * Increment counters
1629	 */
1630	pmap->pm_stats.resident_count++;
1631	if (wired)
1632		pmap->pm_stats.wired_count++;
1633
1634validate:
1635
1636	/*
1637	 * Now validate mapping with desired protection/wiring. This
1638	 * adds the pte to the VHPT if necessary.
1639	 */
1640	pmap_pte_prot(pmap, pte, prot);
1641	pmap_set_pte(pte, va, pa, wired, managed);
1642
1643	if ((prot & VM_PROT_WRITE) != 0)
1644		vm_page_flag_set(m, PG_WRITEABLE);
1645	vm_page_unlock_queues();
1646	pmap_install(oldpmap);
1647	PMAP_UNLOCK(pmap);
1648}
1649
1650/*
1651 * Maps a sequence of resident pages belonging to the same object.
1652 * The sequence begins with the given page m_start.  This page is
1653 * mapped at the given virtual address start.  Each subsequent page is
1654 * mapped at a virtual address that is offset from start by the same
1655 * amount as the page is offset from m_start within the object.  The
1656 * last page in the sequence is the page with the largest offset from
1657 * m_start that can be mapped at a virtual address less than the given
1658 * virtual address end.  Not every virtual page between start and end
1659 * is mapped; only those for which a resident page exists with the
1660 * corresponding offset from m_start are mapped.
1661 */
1662void
1663pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1664    vm_page_t m_start, vm_prot_t prot)
1665{
1666	pmap_t oldpmap;
1667	vm_page_t m;
1668	vm_pindex_t diff, psize;
1669
1670	VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
1671	psize = atop(end - start);
1672	m = m_start;
1673	PMAP_LOCK(pmap);
1674	oldpmap = pmap_install(pmap);
1675	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1676		pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1677		m = TAILQ_NEXT(m, listq);
1678	}
1679	pmap_install(oldpmap);
1680 	PMAP_UNLOCK(pmap);
1681}
1682
1683/*
1684 * this code makes some *MAJOR* assumptions:
1685 * 1. Current pmap & pmap exists.
1686 * 2. Not wired.
1687 * 3. Read access.
1688 * 4. No page table pages.
1689 * but is *MUCH* faster than pmap_enter...
1690 */
1691
1692void
1693pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1694{
1695	pmap_t oldpmap;
1696
1697	PMAP_LOCK(pmap);
1698	oldpmap = pmap_install(pmap);
1699	pmap_enter_quick_locked(pmap, va, m, prot);
1700	pmap_install(oldpmap);
1701	PMAP_UNLOCK(pmap);
1702}
1703
1704static void
1705pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1706    vm_prot_t prot)
1707{
1708	struct ia64_lpte *pte;
1709	boolean_t managed;
1710
1711	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1712	    (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
1713	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1714	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1715	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1716
1717	if ((pte = pmap_find_pte(va)) == NULL)
1718		return;
1719
1720	if (!pmap_present(pte)) {
1721		/* Enter on the PV list if the page is managed. */
1722		if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1723			if (!pmap_try_insert_pv_entry(pmap, va, m)) {
1724				pmap_free_pte(pte, va);
1725				return;
1726			}
1727			managed = TRUE;
1728		} else
1729			managed = FALSE;
1730
1731		/* Increment counters. */
1732		pmap->pm_stats.resident_count++;
1733
1734		/* Initialise with R/O protection and enter into VHPT. */
1735		pmap_enter_vhpt(pte, va);
1736		pmap_pte_prot(pmap, pte,
1737		    prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1738		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1739	}
1740}
1741
1742/*
1743 * pmap_object_init_pt preloads the ptes for a given object
1744 * into the specified pmap.  This eliminates the blast of soft
1745 * faults on process startup and immediately after an mmap.
1746 */
1747void
1748pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1749		    vm_object_t object, vm_pindex_t pindex,
1750		    vm_size_t size)
1751{
1752
1753	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1754	KASSERT(object->type == OBJT_DEVICE,
1755	    ("pmap_object_init_pt: non-device object"));
1756}
1757
1758/*
1759 *	Routine:	pmap_change_wiring
1760 *	Function:	Change the wiring attribute for a map/virtual-address
1761 *			pair.
1762 *	In/out conditions:
1763 *			The mapping must already exist in the pmap.
1764 */
1765void
1766pmap_change_wiring(pmap, va, wired)
1767	register pmap_t pmap;
1768	vm_offset_t va;
1769	boolean_t wired;
1770{
1771	pmap_t oldpmap;
1772	struct ia64_lpte *pte;
1773
1774	PMAP_LOCK(pmap);
1775	oldpmap = pmap_install(pmap);
1776
1777	pte = pmap_find_vhpt(va);
1778	KASSERT(pte != NULL, ("pte"));
1779	if (wired && !pmap_wired(pte)) {
1780		pmap->pm_stats.wired_count++;
1781		pmap_set_wired(pte);
1782	} else if (!wired && pmap_wired(pte)) {
1783		pmap->pm_stats.wired_count--;
1784		pmap_clear_wired(pte);
1785	}
1786
1787	pmap_install(oldpmap);
1788	PMAP_UNLOCK(pmap);
1789}
1790
1791
1792
1793/*
1794 *	Copy the range specified by src_addr/len
1795 *	from the source map to the range dst_addr/len
1796 *	in the destination map.
1797 *
1798 *	This routine is only advisory and need not do anything.
1799 */
1800
1801void
1802pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
1803	  vm_offset_t src_addr)
1804{
1805}
1806
1807
1808/*
1809 *	pmap_zero_page zeros the specified hardware page by
1810 *	mapping it into virtual memory and using bzero to clear
1811 *	its contents.
1812 */
1813
1814void
1815pmap_zero_page(vm_page_t m)
1816{
1817	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1818	bzero((caddr_t) va, PAGE_SIZE);
1819}
1820
1821
1822/*
1823 *	pmap_zero_page_area zeros the specified hardware page by
1824 *	mapping it into virtual memory and using bzero to clear
1825 *	its contents.
1826 *
1827 *	off and size must reside within a single page.
1828 */
1829
1830void
1831pmap_zero_page_area(vm_page_t m, int off, int size)
1832{
1833	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1834	bzero((char *)(caddr_t)va + off, size);
1835}
1836
1837
1838/*
1839 *	pmap_zero_page_idle zeros the specified hardware page by
1840 *	mapping it into virtual memory and using bzero to clear
1841 *	its contents.  This is for the vm_idlezero process.
1842 */
1843
1844void
1845pmap_zero_page_idle(vm_page_t m)
1846{
1847	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1848	bzero((caddr_t) va, PAGE_SIZE);
1849}
1850
1851
1852/*
1853 *	pmap_copy_page copies the specified (machine independent)
1854 *	page by mapping the page into virtual memory and using
1855 *	bcopy to copy the page, one machine dependent page at a
1856 *	time.
1857 */
1858void
1859pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1860{
1861	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
1862	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
1863	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
1864}
1865
1866/*
1867 * Returns true if the pmap's pv is one of the first
1868 * 16 pvs linked to from this page.  This count may
1869 * be changed upwards or downwards in the future; it
1870 * is only necessary that true be returned for a small
1871 * subset of pmaps for proper page aging.
1872 */
1873boolean_t
1874pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
1875{
1876	pv_entry_t pv;
1877	int loops = 0;
1878
1879	if (m->flags & PG_FICTITIOUS)
1880		return FALSE;
1881
1882	/*
1883	 * Not found, check current mappings returning immediately if found.
1884	 */
1885	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1886	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1887		if (pv->pv_pmap == pmap) {
1888			return TRUE;
1889		}
1890		loops++;
1891		if (loops >= 16)
1892			break;
1893	}
1894	return (FALSE);
1895}
1896
1897/*
1898 * Remove all pages from specified address space
1899 * this aids process exit speeds.  Also, this code
1900 * is special cased for current process only, but
1901 * can have the more generic (and slightly slower)
1902 * mode enabled.  This is much faster than pmap_remove
1903 * in the case of running down an entire address space.
1904 */
1905void
1906pmap_remove_pages(pmap_t pmap)
1907{
1908	pmap_t oldpmap;
1909	pv_entry_t pv, npv;
1910
1911	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
1912		printf("warning: pmap_remove_pages called with non-current pmap\n");
1913		return;
1914	}
1915
1916	vm_page_lock_queues();
1917	PMAP_LOCK(pmap);
1918	oldpmap = pmap_install(pmap);
1919
1920	for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
1921		struct ia64_lpte *pte;
1922
1923		npv = TAILQ_NEXT(pv, pv_plist);
1924
1925		pte = pmap_find_vhpt(pv->pv_va);
1926		KASSERT(pte != NULL, ("pte"));
1927		if (!pmap_wired(pte))
1928			pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
1929	}
1930
1931	pmap_install(oldpmap);
1932	PMAP_UNLOCK(pmap);
1933	vm_page_unlock_queues();
1934}
1935
1936/*
1937 *	pmap_ts_referenced:
1938 *
1939 *	Return a count of reference bits for a page, clearing those bits.
1940 *	It is not necessary for every reference bit to be cleared, but it
1941 *	is necessary that 0 only be returned when there are truly no
1942 *	reference bits set.
1943 *
1944 *	XXX: The exact number of bits to check and clear is a matter that
1945 *	should be tested and standardized at some point in the future for
1946 *	optimal aging of shared pages.
1947 */
1948int
1949pmap_ts_referenced(vm_page_t m)
1950{
1951	struct ia64_lpte *pte;
1952	pmap_t oldpmap;
1953	pv_entry_t pv;
1954	int count = 0;
1955
1956	if (m->flags & PG_FICTITIOUS)
1957		return 0;
1958
1959	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1960		PMAP_LOCK(pv->pv_pmap);
1961		oldpmap = pmap_install(pv->pv_pmap);
1962		pte = pmap_find_vhpt(pv->pv_va);
1963		KASSERT(pte != NULL, ("pte"));
1964		if (pmap_accessed(pte)) {
1965			count++;
1966			pmap_clear_accessed(pte);
1967			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
1968		}
1969		pmap_install(oldpmap);
1970		PMAP_UNLOCK(pv->pv_pmap);
1971	}
1972
1973	return count;
1974}
1975
1976/*
1977 *	pmap_is_modified:
1978 *
1979 *	Return whether or not the specified physical page was modified
1980 *	in any physical maps.
1981 */
1982boolean_t
1983pmap_is_modified(vm_page_t m)
1984{
1985	struct ia64_lpte *pte;
1986	pmap_t oldpmap;
1987	pv_entry_t pv;
1988	boolean_t rv;
1989
1990	rv = FALSE;
1991	if (m->flags & PG_FICTITIOUS)
1992		return (rv);
1993
1994	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1995		PMAP_LOCK(pv->pv_pmap);
1996		oldpmap = pmap_install(pv->pv_pmap);
1997		pte = pmap_find_vhpt(pv->pv_va);
1998		pmap_install(oldpmap);
1999		KASSERT(pte != NULL, ("pte"));
2000		rv = pmap_dirty(pte) ? TRUE : FALSE;
2001		PMAP_UNLOCK(pv->pv_pmap);
2002		if (rv)
2003			break;
2004	}
2005
2006	return (rv);
2007}
2008
2009/*
2010 *	pmap_is_prefaultable:
2011 *
2012 *	Return whether or not the specified virtual address is elgible
2013 *	for prefault.
2014 */
2015boolean_t
2016pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2017{
2018	struct ia64_lpte *pte;
2019
2020	pte = pmap_find_vhpt(addr);
2021	if (pte != NULL && pmap_present(pte))
2022		return (FALSE);
2023	return (TRUE);
2024}
2025
2026/*
2027 *	Clear the modify bits on the specified physical page.
2028 */
2029void
2030pmap_clear_modify(vm_page_t m)
2031{
2032	struct ia64_lpte *pte;
2033	pmap_t oldpmap;
2034	pv_entry_t pv;
2035
2036	if (m->flags & PG_FICTITIOUS)
2037		return;
2038
2039	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2040		PMAP_LOCK(pv->pv_pmap);
2041		oldpmap = pmap_install(pv->pv_pmap);
2042		pte = pmap_find_vhpt(pv->pv_va);
2043		KASSERT(pte != NULL, ("pte"));
2044		if (pmap_dirty(pte)) {
2045			pmap_clear_dirty(pte);
2046			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2047		}
2048		pmap_install(oldpmap);
2049		PMAP_UNLOCK(pv->pv_pmap);
2050	}
2051}
2052
2053/*
2054 *	pmap_clear_reference:
2055 *
2056 *	Clear the reference bit on the specified physical page.
2057 */
2058void
2059pmap_clear_reference(vm_page_t m)
2060{
2061	struct ia64_lpte *pte;
2062	pmap_t oldpmap;
2063	pv_entry_t pv;
2064
2065	if (m->flags & PG_FICTITIOUS)
2066		return;
2067
2068	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2069		PMAP_LOCK(pv->pv_pmap);
2070		oldpmap = pmap_install(pv->pv_pmap);
2071		pte = pmap_find_vhpt(pv->pv_va);
2072		KASSERT(pte != NULL, ("pte"));
2073		if (pmap_accessed(pte)) {
2074			pmap_clear_accessed(pte);
2075			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2076		}
2077		pmap_install(oldpmap);
2078		PMAP_UNLOCK(pv->pv_pmap);
2079	}
2080}
2081
2082/*
2083 * Clear the write and modified bits in each of the given page's mappings.
2084 */
2085void
2086pmap_remove_write(vm_page_t m)
2087{
2088	struct ia64_lpte *pte;
2089	pmap_t oldpmap, pmap;
2090	pv_entry_t pv;
2091	vm_prot_t prot;
2092
2093	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2094	if ((m->flags & PG_FICTITIOUS) != 0 ||
2095	    (m->flags & PG_WRITEABLE) == 0)
2096		return;
2097	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2098		pmap = pv->pv_pmap;
2099		PMAP_LOCK(pmap);
2100		oldpmap = pmap_install(pmap);
2101		pte = pmap_find_vhpt(pv->pv_va);
2102		KASSERT(pte != NULL, ("pte"));
2103		prot = pmap_prot(pte);
2104		if ((prot & VM_PROT_WRITE) != 0) {
2105			if (pmap_dirty(pte)) {
2106				vm_page_dirty(m);
2107				pmap_clear_dirty(pte);
2108			}
2109			prot &= ~VM_PROT_WRITE;
2110			pmap_pte_prot(pmap, pte, prot);
2111			pmap_invalidate_page(pmap, pv->pv_va);
2112		}
2113		pmap_install(oldpmap);
2114		PMAP_UNLOCK(pmap);
2115	}
2116	vm_page_flag_clear(m, PG_WRITEABLE);
2117}
2118
2119/*
2120 * Map a set of physical memory pages into the kernel virtual
2121 * address space. Return a pointer to where it is mapped. This
2122 * routine is intended to be used for mapping device memory,
2123 * NOT real memory.
2124 */
2125void *
2126pmap_mapdev(vm_offset_t pa, vm_size_t size)
2127{
2128	return (void*) IA64_PHYS_TO_RR6(pa);
2129}
2130
2131/*
2132 * 'Unmap' a range mapped by pmap_mapdev().
2133 */
2134void
2135pmap_unmapdev(vm_offset_t va, vm_size_t size)
2136{
2137	return;
2138}
2139
2140/*
2141 * perform the pmap work for mincore
2142 */
2143int
2144pmap_mincore(pmap_t pmap, vm_offset_t addr)
2145{
2146	pmap_t oldpmap;
2147	struct ia64_lpte *pte, tpte;
2148	int val = 0;
2149
2150	PMAP_LOCK(pmap);
2151	oldpmap = pmap_install(pmap);
2152	pte = pmap_find_vhpt(addr);
2153	if (pte != NULL) {
2154		tpte = *pte;
2155		pte = &tpte;
2156	}
2157	pmap_install(oldpmap);
2158	PMAP_UNLOCK(pmap);
2159
2160	if (pte == NULL)
2161		return 0;
2162
2163	if (pmap_present(pte)) {
2164		vm_page_t m;
2165		vm_offset_t pa;
2166
2167		val = MINCORE_INCORE;
2168		if (!pmap_managed(pte))
2169			return val;
2170
2171		pa = pmap_ppn(pte);
2172
2173		m = PHYS_TO_VM_PAGE(pa);
2174
2175		/*
2176		 * Modified by us
2177		 */
2178		if (pmap_dirty(pte))
2179			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2180		else {
2181			/*
2182			 * Modified by someone
2183			 */
2184			vm_page_lock_queues();
2185			if (pmap_is_modified(m))
2186				val |= MINCORE_MODIFIED_OTHER;
2187			vm_page_unlock_queues();
2188		}
2189		/*
2190		 * Referenced by us
2191		 */
2192		if (pmap_accessed(pte))
2193			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2194		else {
2195			/*
2196			 * Referenced by someone
2197			 */
2198			vm_page_lock_queues();
2199			if (pmap_ts_referenced(m)) {
2200				val |= MINCORE_REFERENCED_OTHER;
2201				vm_page_flag_set(m, PG_REFERENCED);
2202			}
2203			vm_page_unlock_queues();
2204		}
2205	}
2206	return val;
2207}
2208
2209void
2210pmap_activate(struct thread *td)
2211{
2212	pmap_install(vmspace_pmap(td->td_proc->p_vmspace));
2213}
2214
2215pmap_t
2216pmap_switch(pmap_t pm)
2217{
2218	pmap_t prevpm;
2219	int i;
2220
2221	mtx_assert(&sched_lock, MA_OWNED);
2222
2223	prevpm = PCPU_GET(current_pmap);
2224	if (prevpm == pm)
2225		return (prevpm);
2226	if (prevpm != NULL)
2227		atomic_clear_32(&prevpm->pm_active, PCPU_GET(cpumask));
2228	if (pm == NULL) {
2229		for (i = 0; i < 5; i++) {
2230			ia64_set_rr(IA64_RR_BASE(i),
2231			    (i << 8)|(PAGE_SHIFT << 2)|1);
2232		}
2233	} else {
2234		for (i = 0; i < 5; i++) {
2235			ia64_set_rr(IA64_RR_BASE(i),
2236			    (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2237		}
2238		atomic_set_32(&pm->pm_active, PCPU_GET(cpumask));
2239	}
2240	PCPU_SET(current_pmap, pm);
2241	__asm __volatile("srlz.d");
2242	return (prevpm);
2243}
2244
2245static pmap_t
2246pmap_install(pmap_t pm)
2247{
2248	pmap_t prevpm;
2249
2250	mtx_lock_spin(&sched_lock);
2251	prevpm = pmap_switch(pm);
2252	mtx_unlock_spin(&sched_lock);
2253	return (prevpm);
2254}
2255
2256vm_offset_t
2257pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2258{
2259
2260	return addr;
2261}
2262
2263#include "opt_ddb.h"
2264
2265#ifdef DDB
2266
2267#include <ddb/ddb.h>
2268
2269static const char*	psnames[] = {
2270	"1B",	"2B",	"4B",	"8B",
2271	"16B",	"32B",	"64B",	"128B",
2272	"256B",	"512B",	"1K",	"2K",
2273	"4K",	"8K",	"16K",	"32K",
2274	"64K",	"128K",	"256K",	"512K",
2275	"1M",	"2M",	"4M",	"8M",
2276	"16M",	"32M",	"64M",	"128M",
2277	"256M",	"512M",	"1G",	"2G"
2278};
2279
2280static void
2281print_trs(int type)
2282{
2283	struct ia64_pal_result res;
2284	int i, maxtr;
2285	struct {
2286		pt_entry_t	pte;
2287		uint64_t	itir;
2288		uint64_t	ifa;
2289		struct ia64_rr	rr;
2290	} buf;
2291	static const char *manames[] = {
2292		"WB",	"bad",	"bad",	"bad",
2293		"UC",	"UCE",	"WC",	"NaT",
2294	};
2295
2296	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2297	if (res.pal_status != 0) {
2298		db_printf("Can't get VM summary\n");
2299		return;
2300	}
2301
2302	if (type == 0)
2303		maxtr = (res.pal_result[0] >> 40) & 0xff;
2304	else
2305		maxtr = (res.pal_result[0] >> 32) & 0xff;
2306
2307	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2308	for (i = 0; i <= maxtr; i++) {
2309		bzero(&buf, sizeof(buf));
2310		res = ia64_call_pal_stacked_physical
2311			(PAL_VM_TR_READ, i, type, ia64_tpa((uint64_t) &buf));
2312		if (!(res.pal_result[0] & 1))
2313			buf.pte &= ~PTE_AR_MASK;
2314		if (!(res.pal_result[0] & 2))
2315			buf.pte &= ~PTE_PL_MASK;
2316		if (!(res.pal_result[0] & 4))
2317			pmap_clear_dirty(&buf);
2318		if (!(res.pal_result[0] & 8))
2319			buf.pte &= ~PTE_MA_MASK;
2320		db_printf("%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s "
2321		    "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2322		    buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2323		    psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2324		    (buf.pte & PTE_ED) ? 1 : 0,
2325		    (int)(buf.pte & PTE_AR_MASK) >> 9,
2326		    (int)(buf.pte & PTE_PL_MASK) >> 7,
2327		    (pmap_dirty(&buf)) ? 1 : 0,
2328		    (pmap_accessed(&buf)) ? 1 : 0,
2329		    manames[(buf.pte & PTE_MA_MASK) >> 2],
2330		    (pmap_present(&buf)) ? 1 : 0,
2331		    (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2332	}
2333}
2334
2335DB_COMMAND(itr, db_itr)
2336{
2337	print_trs(0);
2338}
2339
2340DB_COMMAND(dtr, db_dtr)
2341{
2342	print_trs(1);
2343}
2344
2345DB_COMMAND(rr, db_rr)
2346{
2347	int i;
2348	uint64_t t;
2349	struct ia64_rr rr;
2350
2351	printf("RR RID    PgSz VE\n");
2352	for (i = 0; i < 8; i++) {
2353		__asm __volatile ("mov %0=rr[%1]"
2354				  : "=r"(t)
2355				  : "r"(IA64_RR_BASE(i)));
2356		*(uint64_t *) &rr = t;
2357		printf("%d  %06x %4s %d\n",
2358		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2359	}
2360}
2361
2362DB_COMMAND(thash, db_thash)
2363{
2364	if (!have_addr)
2365		return;
2366
2367	db_printf("%p\n", (void *) ia64_thash(addr));
2368}
2369
2370DB_COMMAND(ttag, db_ttag)
2371{
2372	if (!have_addr)
2373		return;
2374
2375	db_printf("0x%lx\n", ia64_ttag(addr));
2376}
2377
2378DB_COMMAND(kpte, db_kpte)
2379{
2380	struct ia64_lpte *pte;
2381
2382	if (!have_addr) {
2383		db_printf("usage: kpte <kva>\n");
2384		return;
2385	}
2386	if (addr < VM_MIN_KERNEL_ADDRESS) {
2387		db_printf("kpte: error: invalid <kva>\n");
2388		return;
2389	}
2390	pte = &ia64_kptdir[KPTE_DIR_INDEX(addr)][KPTE_PTE_INDEX(addr)];
2391	db_printf("kpte at %p:\n", pte);
2392	db_printf("  pte  =%016lx\n", pte->pte);
2393	db_printf("  itir =%016lx\n", pte->itir);
2394	db_printf("  tag  =%016lx\n", pte->tag);
2395	db_printf("  chain=%016lx\n", pte->chain);
2396}
2397
2398#endif
2399