pmap.c revision 152224
1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 */
47
48#include <sys/cdefs.h>
49__FBSDID("$FreeBSD: head/sys/ia64/ia64/pmap.c 152224 2005-11-09 08:19:21Z alc $");
50
51#include <sys/param.h>
52#include <sys/kernel.h>
53#include <sys/lock.h>
54#include <sys/mman.h>
55#include <sys/mutex.h>
56#include <sys/proc.h>
57#include <sys/smp.h>
58#include <sys/sysctl.h>
59#include <sys/systm.h>
60
61#include <vm/vm.h>
62#include <vm/vm_page.h>
63#include <vm/vm_map.h>
64#include <vm/vm_object.h>
65#include <vm/vm_pageout.h>
66#include <vm/uma.h>
67
68#include <machine/md_var.h>
69#include <machine/pal.h>
70
71/*
72 *	Manages physical address maps.
73 *
74 *	In addition to hardware address maps, this
75 *	module is called upon to provide software-use-only
76 *	maps which may or may not be stored in the same
77 *	form as hardware maps.  These pseudo-maps are
78 *	used to store intermediate results from copy
79 *	operations to and from address spaces.
80 *
81 *	Since the information managed by this module is
82 *	also stored by the logical address mapping module,
83 *	this module may throw away valid virtual-to-physical
84 *	mappings at almost any time.  However, invalidations
85 *	of virtual-to-physical mappings must be done as
86 *	requested.
87 *
88 *	In order to cope with hardware architectures which
89 *	make virtual-to-physical map invalidates expensive,
90 *	this module may delay invalidate or reduced protection
91 *	operations until such time as they are actually
92 *	necessary.  This module is given full information as
93 *	to which processors are currently using which maps,
94 *	and to when physical maps must be made correct.
95 */
96
97/*
98 * Following the Linux model, region IDs are allocated in groups of
99 * eight so that a single region ID can be used for as many RRs as we
100 * want by encoding the RR number into the low bits of the ID.
101 *
102 * We reserve region ID 0 for the kernel and allocate the remaining
103 * IDs for user pmaps.
104 *
105 * Region 0..4
106 *	User virtually mapped
107 *
108 * Region 5
109 *	Kernel virtually mapped
110 *
111 * Region 6
112 *	Kernel physically mapped uncacheable
113 *
114 * Region 7
115 *	Kernel physically mapped cacheable
116 */
117
118/* XXX move to a header. */
119extern uint64_t ia64_gateway_page[];
120
121MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
122
123#ifndef PMAP_SHPGPERPROC
124#define PMAP_SHPGPERPROC 200
125#endif
126
127#if !defined(DIAGNOSTIC)
128#define PMAP_INLINE __inline
129#else
130#define PMAP_INLINE
131#endif
132
133#define	pmap_accessed(lpte)		((lpte)->pte & PTE_ACCESSED)
134#define	pmap_dirty(lpte)		((lpte)->pte & PTE_DIRTY)
135#define	pmap_managed(lpte)		((lpte)->pte & PTE_MANAGED)
136#define	pmap_ppn(lpte)			((lpte)->pte & PTE_PPN_MASK)
137#define	pmap_present(lpte)		((lpte)->pte & PTE_PRESENT)
138#define	pmap_prot(lpte)			(((lpte)->pte & PTE_PROT_MASK) >> 56)
139#define	pmap_wired(lpte)		((lpte)->pte & PTE_WIRED)
140
141#define	pmap_clear_accessed(lpte)	(lpte)->pte &= ~PTE_ACCESSED
142#define	pmap_clear_dirty(lpte)		(lpte)->pte &= ~PTE_DIRTY
143#define	pmap_clear_present(lpte)	(lpte)->pte &= ~PTE_PRESENT
144#define	pmap_clear_wired(lpte)		(lpte)->pte &= ~PTE_WIRED
145
146#define	pmap_set_wired(lpte)		(lpte)->pte |= PTE_WIRED
147
148/*
149 * The VHPT bucket head structure.
150 */
151struct ia64_bucket {
152	uint64_t	chain;
153	struct mtx	mutex;
154	u_int		length;
155};
156
157/*
158 * Statically allocated kernel pmap
159 */
160struct pmap kernel_pmap_store;
161
162vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
163vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
164
165/*
166 * Kernel virtual memory management.
167 */
168static int nkpt;
169struct ia64_lpte **ia64_kptdir;
170#define KPTE_DIR_INDEX(va) \
171	((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
172#define KPTE_PTE_INDEX(va) \
173	((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
174#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
175
176vm_offset_t kernel_vm_end;
177
178/* Values for ptc.e. XXX values for SKI. */
179static uint64_t pmap_ptc_e_base = 0x100000000;
180static uint64_t pmap_ptc_e_count1 = 3;
181static uint64_t pmap_ptc_e_count2 = 2;
182static uint64_t pmap_ptc_e_stride1 = 0x2000;
183static uint64_t pmap_ptc_e_stride2 = 0x100000000;
184struct mtx pmap_ptcmutex;
185
186/*
187 * Data for the RID allocator
188 */
189static int pmap_ridcount;
190static int pmap_rididx;
191static int pmap_ridmapsz;
192static int pmap_ridmax;
193static uint64_t *pmap_ridmap;
194struct mtx pmap_ridmutex;
195
196/*
197 * Data for the pv entry allocation mechanism
198 */
199static uma_zone_t pvzone;
200static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
201
202/*
203 * Data for allocating PTEs for user processes.
204 */
205static uma_zone_t ptezone;
206
207/*
208 * Virtual Hash Page Table (VHPT) data.
209 */
210/* SYSCTL_DECL(_machdep); */
211SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
212
213struct ia64_bucket *pmap_vhpt_bucket;
214
215int pmap_vhpt_nbuckets;
216SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
217    &pmap_vhpt_nbuckets, 0, "");
218
219uint64_t pmap_vhpt_base[MAXCPU];
220
221int pmap_vhpt_log2size = 0;
222TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
223SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
224    &pmap_vhpt_log2size, 0, "");
225
226static int pmap_vhpt_inserts;
227SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
228    &pmap_vhpt_inserts, 0, "");
229
230static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
231SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
232    NULL, 0, pmap_vhpt_population, "I", "");
233
234static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
235
236static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
237static pv_entry_t get_pv_entry(pmap_t locked_pmap);
238
239static pmap_t	pmap_install(pmap_t);
240static void	pmap_invalidate_all(pmap_t pmap);
241static int	pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
242		    vm_offset_t va, pv_entry_t pv, int freepte);
243
244vm_offset_t
245pmap_steal_memory(vm_size_t size)
246{
247	vm_size_t bank_size;
248	vm_offset_t pa, va;
249
250	size = round_page(size);
251
252	bank_size = phys_avail[1] - phys_avail[0];
253	while (size > bank_size) {
254		int i;
255		for (i = 0; phys_avail[i+2]; i+= 2) {
256			phys_avail[i] = phys_avail[i+2];
257			phys_avail[i+1] = phys_avail[i+3];
258		}
259		phys_avail[i] = 0;
260		phys_avail[i+1] = 0;
261		if (!phys_avail[0])
262			panic("pmap_steal_memory: out of memory");
263		bank_size = phys_avail[1] - phys_avail[0];
264	}
265
266	pa = phys_avail[0];
267	phys_avail[0] += size;
268
269	va = IA64_PHYS_TO_RR7(pa);
270	bzero((caddr_t) va, size);
271	return va;
272}
273
274/*
275 *	Bootstrap the system enough to run with virtual memory.
276 */
277void
278pmap_bootstrap()
279{
280	struct ia64_pal_result res;
281	struct ia64_lpte *pte;
282	vm_offset_t base, limit;
283	size_t size;
284	int i, j, count, ridbits;
285
286	/*
287	 * Query the PAL Code to find the loop parameters for the
288	 * ptc.e instruction.
289	 */
290	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
291	if (res.pal_status != 0)
292		panic("Can't configure ptc.e parameters");
293	pmap_ptc_e_base = res.pal_result[0];
294	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
295	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
296	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
297	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
298	if (bootverbose)
299		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
300		       "stride1=0x%lx, stride2=0x%lx\n",
301		       pmap_ptc_e_base,
302		       pmap_ptc_e_count1,
303		       pmap_ptc_e_count2,
304		       pmap_ptc_e_stride1,
305		       pmap_ptc_e_stride2);
306	mtx_init(&pmap_ptcmutex, "Global PTC lock", NULL, MTX_SPIN);
307
308	/*
309	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
310	 *
311	 * We currently need at least 19 bits in the RID because PID_MAX
312	 * can only be encoded in 17 bits and we need RIDs for 5 regions
313	 * per process. With PID_MAX equalling 99999 this means that we
314	 * need to be able to encode 499995 (=5*PID_MAX).
315	 * The Itanium processor only has 18 bits and the architected
316	 * minimum is exactly that. So, we cannot use a PID based scheme
317	 * in those cases. Enter pmap_ridmap...
318	 * We should avoid the map when running on a processor that has
319	 * implemented enough bits. This means that we should pass the
320	 * process/thread ID to pmap. This we currently don't do, so we
321	 * use the map anyway. However, we don't want to allocate a map
322	 * that is large enough to cover the range dictated by the number
323	 * of bits in the RID, because that may result in a RID map of
324	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
325	 * The bottomline: we create a 32KB map when the processor only
326	 * implements 18 bits (or when we can't figure it out). Otherwise
327	 * we create a 64KB map.
328	 */
329	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
330	if (res.pal_status != 0) {
331		if (bootverbose)
332			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
333		ridbits = 18; /* guaranteed minimum */
334	} else {
335		ridbits = (res.pal_result[1] >> 8) & 0xff;
336		if (bootverbose)
337			printf("Processor supports %d Region ID bits\n",
338			    ridbits);
339	}
340	if (ridbits > 19)
341		ridbits = 19;
342
343	pmap_ridmax = (1 << ridbits);
344	pmap_ridmapsz = pmap_ridmax / 64;
345	pmap_ridmap = (uint64_t *)pmap_steal_memory(pmap_ridmax / 8);
346	pmap_ridmap[0] |= 0xff;
347	pmap_rididx = 0;
348	pmap_ridcount = 8;
349	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
350
351	/*
352	 * Allocate some memory for initial kernel 'page tables'.
353	 */
354	ia64_kptdir = (void *)pmap_steal_memory(PAGE_SIZE);
355	for (i = 0; i < NKPT; i++) {
356		ia64_kptdir[i] = (void*)pmap_steal_memory(PAGE_SIZE);
357	}
358	nkpt = NKPT;
359	kernel_vm_end = NKPT * PAGE_SIZE * NKPTEPG + VM_MIN_KERNEL_ADDRESS -
360	    VM_GATEWAY_SIZE;
361
362	for (i = 0; phys_avail[i+2]; i+= 2)
363		;
364	count = i+2;
365
366	/*
367	 * Figure out a useful size for the VHPT, based on the size of
368	 * physical memory and try to locate a region which is large
369	 * enough to contain the VHPT (which must be a power of two in
370	 * size and aligned to a natural boundary).
371	 * We silently bump up the VHPT size to the minimum size if the
372	 * user has set the tunable too small. Likewise, the VHPT size
373	 * is silently capped to the maximum allowed.
374	 */
375	TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
376	if (pmap_vhpt_log2size == 0) {
377		pmap_vhpt_log2size = 15;
378		size = 1UL << pmap_vhpt_log2size;
379		while (size < Maxmem * 32) {
380			pmap_vhpt_log2size++;
381			size <<= 1;
382		}
383	} else if (pmap_vhpt_log2size < 15)
384		pmap_vhpt_log2size = 15;
385	if (pmap_vhpt_log2size > 61)
386		pmap_vhpt_log2size = 61;
387
388	pmap_vhpt_base[0] = 0;
389	base = limit = 0;
390	size = 1UL << pmap_vhpt_log2size;
391	while (pmap_vhpt_base[0] == 0) {
392		if (bootverbose)
393			printf("Trying VHPT size 0x%lx\n", size);
394		for (i = 0; i < count; i += 2) {
395			base = (phys_avail[i] + size - 1) & ~(size - 1);
396			limit = base + MAXCPU * size;
397			if (limit <= phys_avail[i+1])
398				/*
399				 * VHPT can fit in this region
400				 */
401				break;
402		}
403		if (!phys_avail[i]) {
404			/* Can't fit, try next smaller size. */
405			pmap_vhpt_log2size--;
406			size >>= 1;
407		} else
408			pmap_vhpt_base[0] = IA64_PHYS_TO_RR7(base);
409	}
410	if (pmap_vhpt_log2size < 15)
411		panic("Can't find space for VHPT");
412
413	if (bootverbose)
414		printf("Putting VHPT at 0x%lx\n", base);
415
416	if (base != phys_avail[i]) {
417		/* Split this region. */
418		if (bootverbose)
419			printf("Splitting [%p-%p]\n", (void *)phys_avail[i],
420			    (void *)phys_avail[i+1]);
421		for (j = count; j > i; j -= 2) {
422			phys_avail[j] = phys_avail[j-2];
423			phys_avail[j+1] = phys_avail[j-2+1];
424		}
425		phys_avail[i+1] = base;
426		phys_avail[i+2] = limit;
427	} else
428		phys_avail[i] = limit;
429
430	pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
431
432	pmap_vhpt_bucket = (void *)pmap_steal_memory(pmap_vhpt_nbuckets *
433	    sizeof(struct ia64_bucket));
434	pte = (struct ia64_lpte *)pmap_vhpt_base[0];
435	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
436		pte[i].pte = 0;
437		pte[i].itir = 0;
438		pte[i].tag = 1UL << 63;	/* Invalid tag */
439		pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
440		/* Stolen memory is zeroed! */
441		mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
442		    MTX_SPIN);
443	}
444
445	for (i = 1; i < MAXCPU; i++) {
446		pmap_vhpt_base[i] = pmap_vhpt_base[i - 1] + size;
447		bcopy((void *)pmap_vhpt_base[i - 1], (void *)pmap_vhpt_base[i],
448		    size);
449	}
450
451	__asm __volatile("mov cr.pta=%0;; srlz.i;;" ::
452	    "r" (pmap_vhpt_base[0] + (1<<8) + (pmap_vhpt_log2size<<2) + 1));
453
454	virtual_avail = VM_MIN_KERNEL_ADDRESS;
455	virtual_end = VM_MAX_KERNEL_ADDRESS;
456
457	/*
458	 * Initialize the kernel pmap (which is statically allocated).
459	 */
460	PMAP_LOCK_INIT(kernel_pmap);
461	for (i = 0; i < 5; i++)
462		kernel_pmap->pm_rid[i] = 0;
463	kernel_pmap->pm_active = 1;
464	TAILQ_INIT(&kernel_pmap->pm_pvlist);
465	PCPU_SET(current_pmap, kernel_pmap);
466
467	/*
468	 * Region 5 is mapped via the vhpt.
469	 */
470	ia64_set_rr(IA64_RR_BASE(5),
471		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
472
473	/*
474	 * Region 6 is direct mapped UC and region 7 is direct mapped
475	 * WC. The details of this is controlled by the Alt {I,D}TLB
476	 * handlers. Here we just make sure that they have the largest
477	 * possible page size to minimise TLB usage.
478	 */
479	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (IA64_ID_PAGE_SHIFT << 2));
480	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (IA64_ID_PAGE_SHIFT << 2));
481
482	/*
483	 * Clear out any random TLB entries left over from booting.
484	 */
485	pmap_invalidate_all(kernel_pmap);
486
487	map_gateway_page();
488}
489
490static int
491pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
492{
493	int count, error, i;
494
495	count = 0;
496	for (i = 0; i < pmap_vhpt_nbuckets; i++)
497		count += pmap_vhpt_bucket[i].length;
498
499	error = SYSCTL_OUT(req, &count, sizeof(count));
500	return (error);
501}
502
503/*
504 *	Initialize a vm_page's machine-dependent fields.
505 */
506void
507pmap_page_init(vm_page_t m)
508{
509
510	TAILQ_INIT(&m->md.pv_list);
511	m->md.pv_list_count = 0;
512}
513
514/*
515 *	Initialize the pmap module.
516 *	Called by vm_init, to initialize any structures that the pmap
517 *	system needs to map virtual memory.
518 */
519void
520pmap_init(void)
521{
522	int shpgperproc = PMAP_SHPGPERPROC;
523
524	/*
525	 * Initialize the address space (zone) for the pv entries.  Set a
526	 * high water mark so that the system can recover from excessive
527	 * numbers of pv entries.
528	 */
529	pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
530	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
531	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
532	pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
533	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
534	pv_entry_high_water = 9 * (pv_entry_max / 10);
535
536	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
537	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
538}
539
540void
541pmap_init2()
542{
543}
544
545
546/***************************************************
547 * Manipulate TLBs for a pmap
548 ***************************************************/
549
550#if 0
551static __inline void
552pmap_invalidate_page_locally(void *arg)
553{
554	vm_offset_t va = (uintptr_t)arg;
555	struct ia64_lpte *pte;
556
557	pte = (struct ia64_lpte *)ia64_thash(va);
558	if (pte->tag == ia64_ttag(va))
559		pte->tag = 1UL << 63;
560	ia64_ptc_l(va, PAGE_SHIFT << 2);
561}
562
563#ifdef SMP
564static void
565pmap_invalidate_page_1(void *arg)
566{
567	void **args = arg;
568	pmap_t oldpmap;
569
570	critical_enter();
571	oldpmap = pmap_install(args[0]);
572	pmap_invalidate_page_locally(args[1]);
573	pmap_install(oldpmap);
574	critical_exit();
575}
576#endif
577
578static void
579pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
580{
581
582	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
583		("invalidating TLB for non-current pmap"));
584
585#ifdef SMP
586	if (mp_ncpus > 1) {
587		void *args[2];
588		args[0] = pmap;
589		args[1] = (void *)va;
590		smp_rendezvous(NULL, pmap_invalidate_page_1, NULL, args);
591	} else
592#endif
593	pmap_invalidate_page_locally((void *)va);
594}
595#endif /* 0 */
596
597static void
598pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
599{
600	struct ia64_lpte *pte;
601	int i, vhpt_ofs;
602
603	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
604		("invalidating TLB for non-current pmap"));
605
606	vhpt_ofs = ia64_thash(va) - pmap_vhpt_base[PCPU_GET(cpuid)];
607	critical_enter();
608	for (i = 0; i < MAXCPU; i++) {
609		pte = (struct ia64_lpte *)(pmap_vhpt_base[i] + vhpt_ofs);
610		if (pte->tag == ia64_ttag(va))
611			pte->tag = 1UL << 63;
612	}
613	critical_exit();
614	mtx_lock_spin(&pmap_ptcmutex);
615	ia64_ptc_ga(va, PAGE_SHIFT << 2);
616	mtx_unlock_spin(&pmap_ptcmutex);
617}
618
619static void
620pmap_invalidate_all_1(void *arg)
621{
622	uint64_t addr;
623	int i, j;
624
625	critical_enter();
626	addr = pmap_ptc_e_base;
627	for (i = 0; i < pmap_ptc_e_count1; i++) {
628		for (j = 0; j < pmap_ptc_e_count2; j++) {
629			ia64_ptc_e(addr);
630			addr += pmap_ptc_e_stride2;
631		}
632		addr += pmap_ptc_e_stride1;
633	}
634	critical_exit();
635}
636
637static void
638pmap_invalidate_all(pmap_t pmap)
639{
640
641	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
642		("invalidating TLB for non-current pmap"));
643
644#ifdef SMP
645	if (mp_ncpus > 1)
646		smp_rendezvous(NULL, pmap_invalidate_all_1, NULL, NULL);
647	else
648#endif
649	pmap_invalidate_all_1(NULL);
650}
651
652static uint32_t
653pmap_allocate_rid(void)
654{
655	uint64_t bit, bits;
656	int rid;
657
658	mtx_lock(&pmap_ridmutex);
659	if (pmap_ridcount == pmap_ridmax)
660		panic("pmap_allocate_rid: All Region IDs used");
661
662	/* Find an index with a free bit. */
663	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
664		pmap_rididx++;
665		if (pmap_rididx == pmap_ridmapsz)
666			pmap_rididx = 0;
667	}
668	rid = pmap_rididx * 64;
669
670	/* Find a free bit. */
671	bit = 1UL;
672	while (bits & bit) {
673		rid++;
674		bit <<= 1;
675	}
676
677	pmap_ridmap[pmap_rididx] |= bit;
678	pmap_ridcount++;
679	mtx_unlock(&pmap_ridmutex);
680
681	return rid;
682}
683
684static void
685pmap_free_rid(uint32_t rid)
686{
687	uint64_t bit;
688	int idx;
689
690	idx = rid / 64;
691	bit = ~(1UL << (rid & 63));
692
693	mtx_lock(&pmap_ridmutex);
694	pmap_ridmap[idx] &= bit;
695	pmap_ridcount--;
696	mtx_unlock(&pmap_ridmutex);
697}
698
699/*
700 * this routine defines the region(s) of memory that should
701 * not be tested for the modified bit.
702 */
703static PMAP_INLINE int
704pmap_track_modified(vm_offset_t va)
705{
706	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
707		return 1;
708	else
709		return 0;
710}
711
712/***************************************************
713 * Page table page management routines.....
714 ***************************************************/
715
716void
717pmap_pinit0(struct pmap *pmap)
718{
719	/* kernel_pmap is the same as any other pmap. */
720	pmap_pinit(pmap);
721}
722
723/*
724 * Initialize a preallocated and zeroed pmap structure,
725 * such as one in a vmspace structure.
726 */
727void
728pmap_pinit(struct pmap *pmap)
729{
730	int i;
731
732	PMAP_LOCK_INIT(pmap);
733	for (i = 0; i < 5; i++)
734		pmap->pm_rid[i] = pmap_allocate_rid();
735	pmap->pm_active = 0;
736	TAILQ_INIT(&pmap->pm_pvlist);
737	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
738}
739
740/***************************************************
741 * Pmap allocation/deallocation routines.
742 ***************************************************/
743
744/*
745 * Release any resources held by the given physical map.
746 * Called when a pmap initialized by pmap_pinit is being released.
747 * Should only be called if the map contains no valid mappings.
748 */
749void
750pmap_release(pmap_t pmap)
751{
752	int i;
753
754	for (i = 0; i < 5; i++)
755		if (pmap->pm_rid[i])
756			pmap_free_rid(pmap->pm_rid[i]);
757	PMAP_LOCK_DESTROY(pmap);
758}
759
760/*
761 * grow the number of kernel page table entries, if needed
762 */
763void
764pmap_growkernel(vm_offset_t addr)
765{
766	struct ia64_lpte *ptepage;
767	vm_page_t nkpg;
768
769	if (kernel_vm_end >= addr)
770		return;
771
772	critical_enter();
773
774	while (kernel_vm_end < addr) {
775		/* We could handle more by increasing the size of kptdir. */
776		if (nkpt == MAXKPT)
777			panic("pmap_growkernel: out of kernel address space");
778
779		nkpg = vm_page_alloc(NULL, nkpt,
780		    VM_ALLOC_NOOBJ | VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
781		if (!nkpg)
782			panic("pmap_growkernel: no memory to grow kernel");
783
784		ptepage = (struct ia64_lpte *)
785		    IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
786		bzero(ptepage, PAGE_SIZE);
787		ia64_kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage;
788
789		nkpt++;
790		kernel_vm_end += PAGE_SIZE * NKPTEPG;
791	}
792
793	critical_exit();
794}
795
796/***************************************************
797 * page management routines.
798 ***************************************************/
799
800/*
801 * free the pv_entry back to the free list
802 */
803static PMAP_INLINE void
804free_pv_entry(pv_entry_t pv)
805{
806	pv_entry_count--;
807	uma_zfree(pvzone, pv);
808}
809
810/*
811 * get a new pv_entry, allocating a block from the system
812 * when needed.
813 */
814static pv_entry_t
815get_pv_entry(pmap_t locked_pmap)
816{
817	static const struct timeval printinterval = { 60, 0 };
818	static struct timeval lastprint;
819	struct vpgqueues *vpq;
820	struct ia64_lpte *pte;
821	pmap_t oldpmap, pmap;
822	pv_entry_t allocated_pv, next_pv, pv;
823	vm_offset_t va;
824	vm_page_t m;
825
826	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
827	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
828	allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
829	if (allocated_pv != NULL) {
830		pv_entry_count++;
831		if (pv_entry_count > pv_entry_high_water)
832			pagedaemon_wakeup();
833		else
834			return (allocated_pv);
835	}
836
837	/*
838	 * Reclaim pv entries: At first, destroy mappings to inactive
839	 * pages.  After that, if a pv entry is still needed, destroy
840	 * mappings to active pages.
841	 */
842	if (ratecheck(&lastprint, &printinterval))
843		printf("Approaching the limit on PV entries, "
844		    "increase the vm.pmap.shpgperproc tunable.\n");
845	vpq = &vm_page_queues[PQ_INACTIVE];
846retry:
847	TAILQ_FOREACH(m, &vpq->pl, pageq) {
848		if (m->hold_count || m->busy || (m->flags & PG_BUSY))
849			continue;
850		TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
851			va = pv->pv_va;
852			pmap = pv->pv_pmap;
853			if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
854				continue;
855			oldpmap = pmap_install(pmap);
856			pte = pmap_find_vhpt(va);
857			KASSERT(pte != NULL, ("pte"));
858			pmap_remove_pte(pmap, pte, va, pv, 1);
859			pmap_install(oldpmap);
860			if (pmap != locked_pmap)
861				PMAP_UNLOCK(pmap);
862			if (allocated_pv == NULL)
863				allocated_pv = pv;
864			else
865				free_pv_entry(pv);
866		}
867	}
868	if (allocated_pv == NULL) {
869		if (vpq == &vm_page_queues[PQ_INACTIVE]) {
870			vpq = &vm_page_queues[PQ_ACTIVE];
871			goto retry;
872		}
873		panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
874	}
875	return (allocated_pv);
876}
877
878/*
879 * Add an ia64_lpte to the VHPT.
880 */
881static void
882pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
883{
884	struct ia64_bucket *bckt;
885	struct ia64_lpte *vhpte;
886	uint64_t pte_pa;
887
888	/* Can fault, so get it out of the way. */
889	pte_pa = ia64_tpa((vm_offset_t)pte);
890
891	vhpte = (struct ia64_lpte *)ia64_thash(va);
892	bckt = (struct ia64_bucket *)vhpte->chain;
893
894	mtx_lock_spin(&bckt->mutex);
895	pte->chain = bckt->chain;
896	ia64_mf();
897	bckt->chain = pte_pa;
898
899	pmap_vhpt_inserts++;
900	bckt->length++;
901	mtx_unlock_spin(&bckt->mutex);
902}
903
904/*
905 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
906 * worked or an appropriate error code otherwise.
907 */
908static int
909pmap_remove_vhpt(vm_offset_t va)
910{
911	struct ia64_bucket *bckt;
912	struct ia64_lpte *pte;
913	struct ia64_lpte *lpte;
914	struct ia64_lpte *vhpte;
915	uint64_t chain, tag;
916
917	tag = ia64_ttag(va);
918	vhpte = (struct ia64_lpte *)ia64_thash(va);
919	bckt = (struct ia64_bucket *)vhpte->chain;
920
921	lpte = NULL;
922	mtx_lock_spin(&bckt->mutex);
923	chain = bckt->chain;
924	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
925	while (chain != 0 && pte->tag != tag) {
926		lpte = pte;
927		chain = pte->chain;
928		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
929	}
930	if (chain == 0) {
931		mtx_unlock_spin(&bckt->mutex);
932		return (ENOENT);
933	}
934
935	/* Snip this pv_entry out of the collision chain. */
936	if (lpte == NULL)
937		bckt->chain = pte->chain;
938	else
939		lpte->chain = pte->chain;
940	ia64_mf();
941
942	bckt->length--;
943	mtx_unlock_spin(&bckt->mutex);
944	return (0);
945}
946
947/*
948 * Find the ia64_lpte for the given va, if any.
949 */
950static struct ia64_lpte *
951pmap_find_vhpt(vm_offset_t va)
952{
953	struct ia64_bucket *bckt;
954	struct ia64_lpte *pte;
955	uint64_t chain, tag;
956
957	tag = ia64_ttag(va);
958	pte = (struct ia64_lpte *)ia64_thash(va);
959	bckt = (struct ia64_bucket *)pte->chain;
960
961	mtx_lock_spin(&bckt->mutex);
962	chain = bckt->chain;
963	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
964	while (chain != 0 && pte->tag != tag) {
965		chain = pte->chain;
966		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
967	}
968	mtx_unlock_spin(&bckt->mutex);
969	return ((chain != 0) ? pte : NULL);
970}
971
972/*
973 * Remove an entry from the list of managed mappings.
974 */
975static int
976pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
977{
978	if (!pv) {
979		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
980			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
981				if (pmap == pv->pv_pmap && va == pv->pv_va)
982					break;
983			}
984		} else {
985			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
986				if (va == pv->pv_va)
987					break;
988			}
989		}
990	}
991
992	if (pv) {
993		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
994		m->md.pv_list_count--;
995		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
996			vm_page_flag_clear(m, PG_WRITEABLE);
997
998		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
999		free_pv_entry(pv);
1000		return 0;
1001	} else {
1002		return ENOENT;
1003	}
1004}
1005
1006/*
1007 * Create a pv entry for page at pa for
1008 * (pmap, va).
1009 */
1010static void
1011pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1012{
1013	pv_entry_t pv;
1014
1015	pv = get_pv_entry(pmap);
1016	pv->pv_pmap = pmap;
1017	pv->pv_va = va;
1018
1019	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1020	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1021	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1022	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1023	m->md.pv_list_count++;
1024}
1025
1026/*
1027 *	Routine:	pmap_extract
1028 *	Function:
1029 *		Extract the physical page address associated
1030 *		with the given map/virtual_address pair.
1031 */
1032vm_paddr_t
1033pmap_extract(pmap_t pmap, vm_offset_t va)
1034{
1035	struct ia64_lpte *pte;
1036	pmap_t oldpmap;
1037	vm_paddr_t pa;
1038
1039	pa = 0;
1040	PMAP_LOCK(pmap);
1041	oldpmap = pmap_install(pmap);
1042	pte = pmap_find_vhpt(va);
1043	if (pte != NULL && pmap_present(pte))
1044		pa = pmap_ppn(pte);
1045	pmap_install(oldpmap);
1046	PMAP_UNLOCK(pmap);
1047	return (pa);
1048}
1049
1050/*
1051 *	Routine:	pmap_extract_and_hold
1052 *	Function:
1053 *		Atomically extract and hold the physical page
1054 *		with the given pmap and virtual address pair
1055 *		if that mapping permits the given protection.
1056 */
1057vm_page_t
1058pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1059{
1060	struct ia64_lpte *pte;
1061	pmap_t oldpmap;
1062	vm_page_t m;
1063
1064	m = NULL;
1065	vm_page_lock_queues();
1066	PMAP_LOCK(pmap);
1067	oldpmap = pmap_install(pmap);
1068	pte = pmap_find_vhpt(va);
1069	if (pte != NULL && pmap_present(pte) &&
1070	    (pmap_prot(pte) & prot) == prot) {
1071		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1072		vm_page_hold(m);
1073	}
1074	vm_page_unlock_queues();
1075	pmap_install(oldpmap);
1076	PMAP_UNLOCK(pmap);
1077	return (m);
1078}
1079
1080/***************************************************
1081 * Low level mapping routines.....
1082 ***************************************************/
1083
1084/*
1085 * Find the kernel lpte for mapping the given virtual address, which
1086 * must be in the part of region 5 which we can cover with our kernel
1087 * 'page tables'.
1088 */
1089static struct ia64_lpte *
1090pmap_find_kpte(vm_offset_t va)
1091{
1092	KASSERT((va >> 61) == 5,
1093		("kernel mapping 0x%lx not in region 5", va));
1094	KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG),
1095		("kernel mapping 0x%lx out of range", va));
1096	return (&ia64_kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)]);
1097}
1098
1099/*
1100 * Find a pte suitable for mapping a user-space address. If one exists
1101 * in the VHPT, that one will be returned, otherwise a new pte is
1102 * allocated.
1103 */
1104static struct ia64_lpte *
1105pmap_find_pte(vm_offset_t va)
1106{
1107	struct ia64_lpte *pte;
1108
1109	if (va >= VM_MAXUSER_ADDRESS)
1110		return pmap_find_kpte(va);
1111
1112	pte = pmap_find_vhpt(va);
1113	if (pte == NULL) {
1114		pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1115		pte->tag = 1UL << 63;
1116	}
1117	return (pte);
1118}
1119
1120/*
1121 * Free a pte which is now unused. This simply returns it to the zone
1122 * allocator if it is a user mapping. For kernel mappings, clear the
1123 * valid bit to make it clear that the mapping is not currently used.
1124 */
1125static void
1126pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1127{
1128	if (va < VM_MAXUSER_ADDRESS)
1129		uma_zfree(ptezone, pte);
1130	else
1131		pmap_clear_present(pte);
1132}
1133
1134static PMAP_INLINE void
1135pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1136{
1137	static int prot2ar[4] = {
1138		PTE_AR_R,	/* VM_PROT_NONE */
1139		PTE_AR_RW,	/* VM_PROT_WRITE */
1140		PTE_AR_RX,	/* VM_PROT_EXECUTE */
1141		PTE_AR_RWX	/* VM_PROT_WRITE|VM_PROT_EXECUTE */
1142	};
1143
1144	pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK);
1145	pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1146	pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1147	    ? PTE_PL_KERN : PTE_PL_USER;
1148	pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1149}
1150
1151/*
1152 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1153 * the pte was orginally valid, then its assumed to already be in the
1154 * VHPT.
1155 * This functions does not set the protection bits.  It's expected
1156 * that those have been set correctly prior to calling this function.
1157 */
1158static void
1159pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1160    boolean_t wired, boolean_t managed)
1161{
1162
1163	pte->pte &= PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK;
1164	pte->pte |= PTE_PRESENT | PTE_MA_WB;
1165	pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1166	pte->pte |= (wired) ? PTE_WIRED : 0;
1167	pte->pte |= pa & PTE_PPN_MASK;
1168
1169	pte->itir = PAGE_SHIFT << 2;
1170
1171	pte->tag = ia64_ttag(va);
1172}
1173
1174/*
1175 * Remove the (possibly managed) mapping represented by pte from the
1176 * given pmap.
1177 */
1178static int
1179pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1180		pv_entry_t pv, int freepte)
1181{
1182	int error;
1183	vm_page_t m;
1184
1185	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1186		("removing pte for non-current pmap"));
1187
1188	/*
1189	 * First remove from the VHPT.
1190	 */
1191	error = pmap_remove_vhpt(va);
1192	if (error)
1193		return (error);
1194
1195	pmap_invalidate_page(pmap, va);
1196
1197	if (pmap_wired(pte))
1198		pmap->pm_stats.wired_count -= 1;
1199
1200	pmap->pm_stats.resident_count -= 1;
1201	if (pmap_managed(pte)) {
1202		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1203		if (pmap_dirty(pte))
1204			if (pmap_track_modified(va))
1205				vm_page_dirty(m);
1206		if (pmap_accessed(pte))
1207			vm_page_flag_set(m, PG_REFERENCED);
1208
1209		error = pmap_remove_entry(pmap, m, va, pv);
1210	}
1211	if (freepte)
1212		pmap_free_pte(pte, va);
1213
1214	return (error);
1215}
1216
1217/*
1218 * Extract the physical page address associated with a kernel
1219 * virtual address.
1220 */
1221vm_paddr_t
1222pmap_kextract(vm_offset_t va)
1223{
1224	struct ia64_lpte *pte;
1225	vm_offset_t gwpage;
1226
1227	KASSERT(va >= IA64_RR_BASE(5), ("Must be kernel VA"));
1228
1229	/* Regions 6 and 7 are direct mapped. */
1230	if (va >= IA64_RR_BASE(6))
1231		return (IA64_RR_MASK(va));
1232
1233	/* EPC gateway page? */
1234	gwpage = (vm_offset_t)ia64_get_k5();
1235	if (va >= gwpage && va < gwpage + VM_GATEWAY_SIZE)
1236		return (IA64_RR_MASK((vm_offset_t)ia64_gateway_page));
1237
1238	/* Bail out if the virtual address is beyond our limits. */
1239	if (IA64_RR_MASK(va) >= nkpt * PAGE_SIZE * NKPTEPG)
1240		return (0);
1241
1242	pte = pmap_find_kpte(va);
1243	if (!pmap_present(pte))
1244		return (0);
1245	return (pmap_ppn(pte) | (va & PAGE_MASK));
1246}
1247
1248/*
1249 * Add a list of wired pages to the kva this routine is only used for
1250 * temporary kernel mappings that do not need to have page modification
1251 * or references recorded.  Note that old mappings are simply written
1252 * over.  The page is effectively wired, but it's customary to not have
1253 * the PTE reflect that, nor update statistics.
1254 */
1255void
1256pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1257{
1258	struct ia64_lpte *pte;
1259	int i;
1260
1261	for (i = 0; i < count; i++) {
1262		pte = pmap_find_kpte(va);
1263		if (pmap_present(pte))
1264			pmap_invalidate_page(kernel_pmap, va);
1265		else
1266			pmap_enter_vhpt(pte, va);
1267		pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1268		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1269		va += PAGE_SIZE;
1270	}
1271}
1272
1273/*
1274 * this routine jerks page mappings from the
1275 * kernel -- it is meant only for temporary mappings.
1276 */
1277void
1278pmap_qremove(vm_offset_t va, int count)
1279{
1280	struct ia64_lpte *pte;
1281	int i;
1282
1283	for (i = 0; i < count; i++) {
1284		pte = pmap_find_kpte(va);
1285		if (pmap_present(pte)) {
1286			pmap_remove_vhpt(va);
1287			pmap_invalidate_page(kernel_pmap, va);
1288			pmap_clear_present(pte);
1289		}
1290		va += PAGE_SIZE;
1291	}
1292}
1293
1294/*
1295 * Add a wired page to the kva.  As for pmap_qenter(), it's customary
1296 * to not have the PTE reflect that, nor update statistics.
1297 */
1298void
1299pmap_kenter(vm_offset_t va, vm_offset_t pa)
1300{
1301	struct ia64_lpte *pte;
1302
1303	pte = pmap_find_kpte(va);
1304	if (pmap_present(pte))
1305		pmap_invalidate_page(kernel_pmap, va);
1306	else
1307		pmap_enter_vhpt(pte, va);
1308	pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1309	pmap_set_pte(pte, va, pa, FALSE, FALSE);
1310}
1311
1312/*
1313 * Remove a page from the kva
1314 */
1315void
1316pmap_kremove(vm_offset_t va)
1317{
1318	struct ia64_lpte *pte;
1319
1320	pte = pmap_find_kpte(va);
1321	if (pmap_present(pte)) {
1322		pmap_remove_vhpt(va);
1323		pmap_invalidate_page(kernel_pmap, va);
1324		pmap_clear_present(pte);
1325	}
1326}
1327
1328/*
1329 *	Used to map a range of physical addresses into kernel
1330 *	virtual address space.
1331 *
1332 *	The value passed in '*virt' is a suggested virtual address for
1333 *	the mapping. Architectures which can support a direct-mapped
1334 *	physical to virtual region can return the appropriate address
1335 *	within that region, leaving '*virt' unchanged. Other
1336 *	architectures should map the pages starting at '*virt' and
1337 *	update '*virt' with the first usable address after the mapped
1338 *	region.
1339 */
1340vm_offset_t
1341pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1342{
1343	return IA64_PHYS_TO_RR7(start);
1344}
1345
1346/*
1347 * Remove a single page from a process address space
1348 */
1349static void
1350pmap_remove_page(pmap_t pmap, vm_offset_t va)
1351{
1352	struct ia64_lpte *pte;
1353
1354	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1355		("removing page for non-current pmap"));
1356
1357	pte = pmap_find_vhpt(va);
1358	if (pte != NULL)
1359		pmap_remove_pte(pmap, pte, va, 0, 1);
1360	return;
1361}
1362
1363/*
1364 *	Remove the given range of addresses from the specified map.
1365 *
1366 *	It is assumed that the start and end are properly
1367 *	rounded to the page size.
1368 */
1369void
1370pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1371{
1372	pmap_t oldpmap;
1373	vm_offset_t va;
1374	pv_entry_t npv, pv;
1375	struct ia64_lpte *pte;
1376
1377	if (pmap->pm_stats.resident_count == 0)
1378		return;
1379
1380	vm_page_lock_queues();
1381	PMAP_LOCK(pmap);
1382	oldpmap = pmap_install(pmap);
1383
1384	/*
1385	 * special handling of removing one page.  a very
1386	 * common operation and easy to short circuit some
1387	 * code.
1388	 */
1389	if (sva + PAGE_SIZE == eva) {
1390		pmap_remove_page(pmap, sva);
1391		goto out;
1392	}
1393
1394	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1395		TAILQ_FOREACH_SAFE(pv, &pmap->pm_pvlist, pv_plist, npv) {
1396			va = pv->pv_va;
1397			if (va >= sva && va < eva) {
1398				pte = pmap_find_vhpt(va);
1399				KASSERT(pte != NULL, ("pte"));
1400				pmap_remove_pte(pmap, pte, va, pv, 1);
1401			}
1402		}
1403
1404	} else {
1405		for (va = sva; va < eva; va = va += PAGE_SIZE) {
1406			pte = pmap_find_vhpt(va);
1407			if (pte != NULL)
1408				pmap_remove_pte(pmap, pte, va, 0, 1);
1409		}
1410	}
1411out:
1412	vm_page_unlock_queues();
1413	pmap_install(oldpmap);
1414	PMAP_UNLOCK(pmap);
1415}
1416
1417/*
1418 *	Routine:	pmap_remove_all
1419 *	Function:
1420 *		Removes this physical page from
1421 *		all physical maps in which it resides.
1422 *		Reflects back modify bits to the pager.
1423 *
1424 *	Notes:
1425 *		Original versions of this routine were very
1426 *		inefficient because they iteratively called
1427 *		pmap_remove (slow...)
1428 */
1429
1430void
1431pmap_remove_all(vm_page_t m)
1432{
1433	pmap_t oldpmap;
1434	pv_entry_t pv;
1435
1436#if defined(DIAGNOSTIC)
1437	/*
1438	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1439	 * pages!
1440	 */
1441	if (m->flags & PG_FICTITIOUS) {
1442		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1443	}
1444#endif
1445	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1446	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1447		struct ia64_lpte *pte;
1448		pmap_t pmap = pv->pv_pmap;
1449		vm_offset_t va = pv->pv_va;
1450
1451		PMAP_LOCK(pmap);
1452		oldpmap = pmap_install(pmap);
1453		pte = pmap_find_vhpt(va);
1454		KASSERT(pte != NULL, ("pte"));
1455		if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1456			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1457		pmap_remove_pte(pmap, pte, va, pv, 1);
1458		pmap_install(oldpmap);
1459		PMAP_UNLOCK(pmap);
1460	}
1461	vm_page_flag_clear(m, PG_WRITEABLE);
1462}
1463
1464/*
1465 *	Set the physical protection on the
1466 *	specified range of this map as requested.
1467 */
1468void
1469pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1470{
1471	pmap_t oldpmap;
1472	struct ia64_lpte *pte;
1473
1474	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1475		pmap_remove(pmap, sva, eva);
1476		return;
1477	}
1478
1479	if (prot & VM_PROT_WRITE)
1480		return;
1481
1482	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1483		panic("pmap_protect: unaligned addresses");
1484
1485	vm_page_lock_queues();
1486	PMAP_LOCK(pmap);
1487	oldpmap = pmap_install(pmap);
1488	while (sva < eva) {
1489		/*
1490		 * If page is invalid, skip this page
1491		 */
1492		pte = pmap_find_vhpt(sva);
1493		if (pte == NULL) {
1494			sva += PAGE_SIZE;
1495			continue;
1496		}
1497
1498		if (pmap_prot(pte) != prot) {
1499			if (pmap_managed(pte)) {
1500				vm_offset_t pa = pmap_ppn(pte);
1501				vm_page_t m = PHYS_TO_VM_PAGE(pa);
1502				if (pmap_dirty(pte)) {
1503					if (pmap_track_modified(sva))
1504						vm_page_dirty(m);
1505					pmap_clear_dirty(pte);
1506				}
1507				if (pmap_accessed(pte)) {
1508					vm_page_flag_set(m, PG_REFERENCED);
1509					pmap_clear_accessed(pte);
1510				}
1511			}
1512			pmap_pte_prot(pmap, pte, prot);
1513			pmap_invalidate_page(pmap, sva);
1514		}
1515
1516		sva += PAGE_SIZE;
1517	}
1518	vm_page_unlock_queues();
1519	pmap_install(oldpmap);
1520	PMAP_UNLOCK(pmap);
1521}
1522
1523/*
1524 *	Insert the given physical page (p) at
1525 *	the specified virtual address (v) in the
1526 *	target physical map with the protection requested.
1527 *
1528 *	If specified, the page will be wired down, meaning
1529 *	that the related pte can not be reclaimed.
1530 *
1531 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1532 *	or lose information.  That is, this routine must actually
1533 *	insert this page into the given map NOW.
1534 */
1535void
1536pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1537    boolean_t wired)
1538{
1539	pmap_t oldpmap;
1540	vm_offset_t pa;
1541	vm_offset_t opa;
1542	struct ia64_lpte origpte;
1543	struct ia64_lpte *pte;
1544	boolean_t managed;
1545
1546	vm_page_lock_queues();
1547	PMAP_LOCK(pmap);
1548	oldpmap = pmap_install(pmap);
1549
1550	va &= ~PAGE_MASK;
1551#ifdef DIAGNOSTIC
1552	if (va > VM_MAX_KERNEL_ADDRESS)
1553		panic("pmap_enter: toobig");
1554#endif
1555
1556	/*
1557	 * Find (or create) a pte for the given mapping.
1558	 */
1559	while ((pte = pmap_find_pte(va)) == NULL) {
1560		pmap_install(oldpmap);
1561		PMAP_UNLOCK(pmap);
1562		vm_page_unlock_queues();
1563		VM_WAIT;
1564		vm_page_lock_queues();
1565		PMAP_LOCK(pmap);
1566		oldpmap = pmap_install(pmap);
1567	}
1568	origpte = *pte;
1569	if (!pmap_present(pte)) {
1570		opa = ~0UL;
1571		pmap_enter_vhpt(pte, va);
1572	} else
1573		opa = pmap_ppn(pte);
1574	managed = FALSE;
1575	pa = VM_PAGE_TO_PHYS(m);
1576
1577	/*
1578	 * Mapping has not changed, must be protection or wiring change.
1579	 */
1580	if (opa == pa) {
1581		/*
1582		 * Wiring change, just update stats. We don't worry about
1583		 * wiring PT pages as they remain resident as long as there
1584		 * are valid mappings in them. Hence, if a user page is wired,
1585		 * the PT page will be also.
1586		 */
1587		if (wired && !pmap_wired(&origpte))
1588			pmap->pm_stats.wired_count++;
1589		else if (!wired && pmap_wired(&origpte))
1590			pmap->pm_stats.wired_count--;
1591
1592		managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1593
1594		/*
1595		 * We might be turning off write access to the page,
1596		 * so we go ahead and sense modify status.
1597		 */
1598		if (managed && pmap_dirty(&origpte) &&
1599		    pmap_track_modified(va))
1600			vm_page_dirty(m);
1601
1602		pmap_invalidate_page(pmap, va);
1603		goto validate;
1604	}
1605
1606	/*
1607	 * Mapping has changed, invalidate old range and fall
1608	 * through to handle validating new mapping.
1609	 */
1610	if (opa != ~0UL) {
1611		pmap_remove_pte(pmap, pte, va, 0, 0);
1612		pmap_enter_vhpt(pte, va);
1613	}
1614
1615	/*
1616	 * Enter on the PV list if part of our managed memory.
1617	 */
1618	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1619		pmap_insert_entry(pmap, va, m);
1620		managed = TRUE;
1621	}
1622
1623	/*
1624	 * Increment counters
1625	 */
1626	pmap->pm_stats.resident_count++;
1627	if (wired)
1628		pmap->pm_stats.wired_count++;
1629
1630validate:
1631
1632	/*
1633	 * Now validate mapping with desired protection/wiring. This
1634	 * adds the pte to the VHPT if necessary.
1635	 */
1636	pmap_pte_prot(pmap, pte, prot);
1637	pmap_set_pte(pte, va, pa, wired, managed);
1638
1639	vm_page_unlock_queues();
1640	pmap_install(oldpmap);
1641	PMAP_UNLOCK(pmap);
1642}
1643
1644/*
1645 * this code makes some *MAJOR* assumptions:
1646 * 1. Current pmap & pmap exists.
1647 * 2. Not wired.
1648 * 3. Read access.
1649 * 4. No page table pages.
1650 * but is *MUCH* faster than pmap_enter...
1651 */
1652
1653vm_page_t
1654pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1655    vm_page_t mpte)
1656{
1657	struct ia64_lpte *pte;
1658	pmap_t oldpmap;
1659	boolean_t managed;
1660
1661	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1662	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
1663	PMAP_LOCK(pmap);
1664	oldpmap = pmap_install(pmap);
1665
1666	while ((pte = pmap_find_pte(va)) == NULL) {
1667		pmap_install(oldpmap);
1668		PMAP_UNLOCK(pmap);
1669		vm_page_busy(m);
1670		vm_page_unlock_queues();
1671		VM_OBJECT_UNLOCK(m->object);
1672		VM_WAIT;
1673		VM_OBJECT_LOCK(m->object);
1674		vm_page_lock_queues();
1675		vm_page_wakeup(m);
1676		PMAP_LOCK(pmap);
1677		oldpmap = pmap_install(pmap);
1678	}
1679
1680	if (!pmap_present(pte)) {
1681		/* Enter on the PV list if its managed. */
1682		if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1683			pmap_insert_entry(pmap, va, m);
1684			managed = TRUE;
1685		} else
1686			managed = FALSE;
1687
1688		/* Increment counters. */
1689		pmap->pm_stats.resident_count++;
1690
1691		/* Initialise with R/O protection and enter into VHPT. */
1692		pmap_enter_vhpt(pte, va);
1693		pmap_pte_prot(pmap, pte,
1694		    prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1695		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1696	}
1697
1698	pmap_install(oldpmap);
1699	PMAP_UNLOCK(pmap);
1700	return (NULL);
1701}
1702
1703/*
1704 * pmap_object_init_pt preloads the ptes for a given object
1705 * into the specified pmap.  This eliminates the blast of soft
1706 * faults on process startup and immediately after an mmap.
1707 */
1708void
1709pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1710		    vm_object_t object, vm_pindex_t pindex,
1711		    vm_size_t size)
1712{
1713
1714	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1715	KASSERT(object->type == OBJT_DEVICE,
1716	    ("pmap_object_init_pt: non-device object"));
1717}
1718
1719/*
1720 *	Routine:	pmap_change_wiring
1721 *	Function:	Change the wiring attribute for a map/virtual-address
1722 *			pair.
1723 *	In/out conditions:
1724 *			The mapping must already exist in the pmap.
1725 */
1726void
1727pmap_change_wiring(pmap, va, wired)
1728	register pmap_t pmap;
1729	vm_offset_t va;
1730	boolean_t wired;
1731{
1732	pmap_t oldpmap;
1733	struct ia64_lpte *pte;
1734
1735	PMAP_LOCK(pmap);
1736	oldpmap = pmap_install(pmap);
1737
1738	pte = pmap_find_vhpt(va);
1739	KASSERT(pte != NULL, ("pte"));
1740	if (wired && !pmap_wired(pte)) {
1741		pmap->pm_stats.wired_count++;
1742		pmap_set_wired(pte);
1743	} else if (!wired && pmap_wired(pte)) {
1744		pmap->pm_stats.wired_count--;
1745		pmap_clear_wired(pte);
1746	}
1747
1748	pmap_install(oldpmap);
1749	PMAP_UNLOCK(pmap);
1750}
1751
1752
1753
1754/*
1755 *	Copy the range specified by src_addr/len
1756 *	from the source map to the range dst_addr/len
1757 *	in the destination map.
1758 *
1759 *	This routine is only advisory and need not do anything.
1760 */
1761
1762void
1763pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
1764	  vm_offset_t src_addr)
1765{
1766}
1767
1768
1769/*
1770 *	pmap_zero_page zeros the specified hardware page by
1771 *	mapping it into virtual memory and using bzero to clear
1772 *	its contents.
1773 */
1774
1775void
1776pmap_zero_page(vm_page_t m)
1777{
1778	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1779	bzero((caddr_t) va, PAGE_SIZE);
1780}
1781
1782
1783/*
1784 *	pmap_zero_page_area zeros the specified hardware page by
1785 *	mapping it into virtual memory and using bzero to clear
1786 *	its contents.
1787 *
1788 *	off and size must reside within a single page.
1789 */
1790
1791void
1792pmap_zero_page_area(vm_page_t m, int off, int size)
1793{
1794	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1795	bzero((char *)(caddr_t)va + off, size);
1796}
1797
1798
1799/*
1800 *	pmap_zero_page_idle zeros the specified hardware page by
1801 *	mapping it into virtual memory and using bzero to clear
1802 *	its contents.  This is for the vm_idlezero process.
1803 */
1804
1805void
1806pmap_zero_page_idle(vm_page_t m)
1807{
1808	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1809	bzero((caddr_t) va, PAGE_SIZE);
1810}
1811
1812
1813/*
1814 *	pmap_copy_page copies the specified (machine independent)
1815 *	page by mapping the page into virtual memory and using
1816 *	bcopy to copy the page, one machine dependent page at a
1817 *	time.
1818 */
1819void
1820pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1821{
1822	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
1823	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
1824	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
1825}
1826
1827/*
1828 * Returns true if the pmap's pv is one of the first
1829 * 16 pvs linked to from this page.  This count may
1830 * be changed upwards or downwards in the future; it
1831 * is only necessary that true be returned for a small
1832 * subset of pmaps for proper page aging.
1833 */
1834boolean_t
1835pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
1836{
1837	pv_entry_t pv;
1838	int loops = 0;
1839
1840	if (m->flags & PG_FICTITIOUS)
1841		return FALSE;
1842
1843	/*
1844	 * Not found, check current mappings returning immediately if found.
1845	 */
1846	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1847	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1848		if (pv->pv_pmap == pmap) {
1849			return TRUE;
1850		}
1851		loops++;
1852		if (loops >= 16)
1853			break;
1854	}
1855	return (FALSE);
1856}
1857
1858/*
1859 * Remove all pages from specified address space
1860 * this aids process exit speeds.  Also, this code
1861 * is special cased for current process only, but
1862 * can have the more generic (and slightly slower)
1863 * mode enabled.  This is much faster than pmap_remove
1864 * in the case of running down an entire address space.
1865 */
1866void
1867pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1868{
1869	pmap_t oldpmap;
1870	pv_entry_t pv, npv;
1871
1872	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
1873		printf("warning: pmap_remove_pages called with non-current pmap\n");
1874		return;
1875	}
1876
1877	vm_page_lock_queues();
1878	PMAP_LOCK(pmap);
1879	oldpmap = pmap_install(pmap);
1880
1881	for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
1882		struct ia64_lpte *pte;
1883
1884		npv = TAILQ_NEXT(pv, pv_plist);
1885
1886		if (pv->pv_va >= eva || pv->pv_va < sva)
1887			continue;
1888
1889		pte = pmap_find_vhpt(pv->pv_va);
1890		KASSERT(pte != NULL, ("pte"));
1891		if (!pmap_wired(pte))
1892			pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
1893	}
1894
1895	pmap_install(oldpmap);
1896	PMAP_UNLOCK(pmap);
1897	vm_page_unlock_queues();
1898}
1899
1900/*
1901 *      pmap_page_protect:
1902 *
1903 *      Lower the permission for all mappings to a given page.
1904 */
1905void
1906pmap_page_protect(vm_page_t m, vm_prot_t prot)
1907{
1908	struct ia64_lpte *pte;
1909	pmap_t oldpmap, pmap;
1910	pv_entry_t pv;
1911
1912	if ((prot & VM_PROT_WRITE) != 0)
1913		return;
1914	if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
1915		if ((m->flags & PG_WRITEABLE) == 0)
1916			return;
1917		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1918			pmap = pv->pv_pmap;
1919			PMAP_LOCK(pmap);
1920			oldpmap = pmap_install(pmap);
1921			pte = pmap_find_vhpt(pv->pv_va);
1922			KASSERT(pte != NULL, ("pte"));
1923			pmap_pte_prot(pmap, pte, prot);
1924			pmap_invalidate_page(pmap, pv->pv_va);
1925			pmap_install(oldpmap);
1926			PMAP_UNLOCK(pmap);
1927		}
1928		vm_page_flag_clear(m, PG_WRITEABLE);
1929	} else {
1930		pmap_remove_all(m);
1931	}
1932}
1933
1934/*
1935 *	pmap_ts_referenced:
1936 *
1937 *	Return a count of reference bits for a page, clearing those bits.
1938 *	It is not necessary for every reference bit to be cleared, but it
1939 *	is necessary that 0 only be returned when there are truly no
1940 *	reference bits set.
1941 *
1942 *	XXX: The exact number of bits to check and clear is a matter that
1943 *	should be tested and standardized at some point in the future for
1944 *	optimal aging of shared pages.
1945 */
1946int
1947pmap_ts_referenced(vm_page_t m)
1948{
1949	struct ia64_lpte *pte;
1950	pmap_t oldpmap;
1951	pv_entry_t pv;
1952	int count = 0;
1953
1954	if (m->flags & PG_FICTITIOUS)
1955		return 0;
1956
1957	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1958		PMAP_LOCK(pv->pv_pmap);
1959		oldpmap = pmap_install(pv->pv_pmap);
1960		pte = pmap_find_vhpt(pv->pv_va);
1961		KASSERT(pte != NULL, ("pte"));
1962		if (pmap_accessed(pte)) {
1963			count++;
1964			pmap_clear_accessed(pte);
1965			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
1966		}
1967		pmap_install(oldpmap);
1968		PMAP_UNLOCK(pv->pv_pmap);
1969	}
1970
1971	return count;
1972}
1973
1974/*
1975 *	pmap_is_modified:
1976 *
1977 *	Return whether or not the specified physical page was modified
1978 *	in any physical maps.
1979 */
1980boolean_t
1981pmap_is_modified(vm_page_t m)
1982{
1983	struct ia64_lpte *pte;
1984	pmap_t oldpmap;
1985	pv_entry_t pv;
1986	boolean_t rv;
1987
1988	rv = FALSE;
1989	if (m->flags & PG_FICTITIOUS)
1990		return (rv);
1991
1992	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1993		PMAP_LOCK(pv->pv_pmap);
1994		oldpmap = pmap_install(pv->pv_pmap);
1995		pte = pmap_find_vhpt(pv->pv_va);
1996		pmap_install(oldpmap);
1997		KASSERT(pte != NULL, ("pte"));
1998		rv = pmap_dirty(pte) ? TRUE : FALSE;
1999		PMAP_UNLOCK(pv->pv_pmap);
2000		if (rv)
2001			break;
2002	}
2003
2004	return (rv);
2005}
2006
2007/*
2008 *	pmap_is_prefaultable:
2009 *
2010 *	Return whether or not the specified virtual address is elgible
2011 *	for prefault.
2012 */
2013boolean_t
2014pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2015{
2016	struct ia64_lpte *pte;
2017
2018	pte = pmap_find_vhpt(addr);
2019	if (pte != NULL && pmap_present(pte))
2020		return (FALSE);
2021	return (TRUE);
2022}
2023
2024/*
2025 *	Clear the modify bits on the specified physical page.
2026 */
2027void
2028pmap_clear_modify(vm_page_t m)
2029{
2030	struct ia64_lpte *pte;
2031	pmap_t oldpmap;
2032	pv_entry_t pv;
2033
2034	if (m->flags & PG_FICTITIOUS)
2035		return;
2036
2037	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2038		PMAP_LOCK(pv->pv_pmap);
2039		oldpmap = pmap_install(pv->pv_pmap);
2040		pte = pmap_find_vhpt(pv->pv_va);
2041		KASSERT(pte != NULL, ("pte"));
2042		if (pmap_dirty(pte)) {
2043			pmap_clear_dirty(pte);
2044			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2045		}
2046		pmap_install(oldpmap);
2047		PMAP_UNLOCK(pv->pv_pmap);
2048	}
2049}
2050
2051/*
2052 *	pmap_clear_reference:
2053 *
2054 *	Clear the reference bit on the specified physical page.
2055 */
2056void
2057pmap_clear_reference(vm_page_t m)
2058{
2059	struct ia64_lpte *pte;
2060	pmap_t oldpmap;
2061	pv_entry_t pv;
2062
2063	if (m->flags & PG_FICTITIOUS)
2064		return;
2065
2066	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2067		PMAP_LOCK(pv->pv_pmap);
2068		oldpmap = pmap_install(pv->pv_pmap);
2069		pte = pmap_find_vhpt(pv->pv_va);
2070		KASSERT(pte != NULL, ("pte"));
2071		if (pmap_accessed(pte)) {
2072			pmap_clear_accessed(pte);
2073			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2074		}
2075		pmap_install(oldpmap);
2076		PMAP_UNLOCK(pv->pv_pmap);
2077	}
2078}
2079
2080/*
2081 * Map a set of physical memory pages into the kernel virtual
2082 * address space. Return a pointer to where it is mapped. This
2083 * routine is intended to be used for mapping device memory,
2084 * NOT real memory.
2085 */
2086void *
2087pmap_mapdev(vm_offset_t pa, vm_size_t size)
2088{
2089	return (void*) IA64_PHYS_TO_RR6(pa);
2090}
2091
2092/*
2093 * 'Unmap' a range mapped by pmap_mapdev().
2094 */
2095void
2096pmap_unmapdev(vm_offset_t va, vm_size_t size)
2097{
2098	return;
2099}
2100
2101/*
2102 * perform the pmap work for mincore
2103 */
2104int
2105pmap_mincore(pmap_t pmap, vm_offset_t addr)
2106{
2107	pmap_t oldpmap;
2108	struct ia64_lpte *pte, tpte;
2109	int val = 0;
2110
2111	PMAP_LOCK(pmap);
2112	oldpmap = pmap_install(pmap);
2113	pte = pmap_find_vhpt(addr);
2114	if (pte != NULL) {
2115		tpte = *pte;
2116		pte = &tpte;
2117	}
2118	pmap_install(oldpmap);
2119	PMAP_UNLOCK(pmap);
2120
2121	if (pte == NULL)
2122		return 0;
2123
2124	if (pmap_present(pte)) {
2125		vm_page_t m;
2126		vm_offset_t pa;
2127
2128		val = MINCORE_INCORE;
2129		if (!pmap_managed(pte))
2130			return val;
2131
2132		pa = pmap_ppn(pte);
2133
2134		m = PHYS_TO_VM_PAGE(pa);
2135
2136		/*
2137		 * Modified by us
2138		 */
2139		if (pmap_dirty(pte))
2140			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2141		else {
2142			/*
2143			 * Modified by someone
2144			 */
2145			vm_page_lock_queues();
2146			if (pmap_is_modified(m))
2147				val |= MINCORE_MODIFIED_OTHER;
2148			vm_page_unlock_queues();
2149		}
2150		/*
2151		 * Referenced by us
2152		 */
2153		if (pmap_accessed(pte))
2154			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2155		else {
2156			/*
2157			 * Referenced by someone
2158			 */
2159			vm_page_lock_queues();
2160			if (pmap_ts_referenced(m)) {
2161				val |= MINCORE_REFERENCED_OTHER;
2162				vm_page_flag_set(m, PG_REFERENCED);
2163			}
2164			vm_page_unlock_queues();
2165		}
2166	}
2167	return val;
2168}
2169
2170void
2171pmap_activate(struct thread *td)
2172{
2173	pmap_install(vmspace_pmap(td->td_proc->p_vmspace));
2174}
2175
2176pmap_t
2177pmap_switch(pmap_t pm)
2178{
2179	pmap_t prevpm;
2180	int i;
2181
2182	mtx_assert(&sched_lock, MA_OWNED);
2183
2184	prevpm = PCPU_GET(current_pmap);
2185	if (prevpm == pm)
2186		return (prevpm);
2187	if (prevpm != NULL)
2188		atomic_clear_32(&prevpm->pm_active, PCPU_GET(cpumask));
2189	if (pm == NULL) {
2190		for (i = 0; i < 5; i++) {
2191			ia64_set_rr(IA64_RR_BASE(i),
2192			    (i << 8)|(PAGE_SHIFT << 2)|1);
2193		}
2194	} else {
2195		for (i = 0; i < 5; i++) {
2196			ia64_set_rr(IA64_RR_BASE(i),
2197			    (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2198		}
2199		atomic_set_32(&pm->pm_active, PCPU_GET(cpumask));
2200	}
2201	PCPU_SET(current_pmap, pm);
2202	__asm __volatile("srlz.d");
2203	return (prevpm);
2204}
2205
2206static pmap_t
2207pmap_install(pmap_t pm)
2208{
2209	pmap_t prevpm;
2210
2211	mtx_lock_spin(&sched_lock);
2212	prevpm = pmap_switch(pm);
2213	mtx_unlock_spin(&sched_lock);
2214	return (prevpm);
2215}
2216
2217vm_offset_t
2218pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2219{
2220
2221	return addr;
2222}
2223
2224#include "opt_ddb.h"
2225
2226#ifdef DDB
2227
2228#include <ddb/ddb.h>
2229
2230static const char*	psnames[] = {
2231	"1B",	"2B",	"4B",	"8B",
2232	"16B",	"32B",	"64B",	"128B",
2233	"256B",	"512B",	"1K",	"2K",
2234	"4K",	"8K",	"16K",	"32K",
2235	"64K",	"128K",	"256K",	"512K",
2236	"1M",	"2M",	"4M",	"8M",
2237	"16M",	"32M",	"64M",	"128M",
2238	"256M",	"512M",	"1G",	"2G"
2239};
2240
2241static void
2242print_trs(int type)
2243{
2244	struct ia64_pal_result res;
2245	int i, maxtr;
2246	struct {
2247		pt_entry_t	pte;
2248		uint64_t	itir;
2249		uint64_t	ifa;
2250		struct ia64_rr	rr;
2251	} buf;
2252	static const char *manames[] = {
2253		"WB",	"bad",	"bad",	"bad",
2254		"UC",	"UCE",	"WC",	"NaT",
2255	};
2256
2257	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2258	if (res.pal_status != 0) {
2259		db_printf("Can't get VM summary\n");
2260		return;
2261	}
2262
2263	if (type == 0)
2264		maxtr = (res.pal_result[0] >> 40) & 0xff;
2265	else
2266		maxtr = (res.pal_result[0] >> 32) & 0xff;
2267
2268	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2269	for (i = 0; i <= maxtr; i++) {
2270		bzero(&buf, sizeof(buf));
2271		res = ia64_call_pal_stacked_physical
2272			(PAL_VM_TR_READ, i, type, ia64_tpa((uint64_t) &buf));
2273		if (!(res.pal_result[0] & 1))
2274			buf.pte &= ~PTE_AR_MASK;
2275		if (!(res.pal_result[0] & 2))
2276			buf.pte &= ~PTE_PL_MASK;
2277		if (!(res.pal_result[0] & 4))
2278			pmap_clear_dirty(&buf);
2279		if (!(res.pal_result[0] & 8))
2280			buf.pte &= ~PTE_MA_MASK;
2281		db_printf("%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s "
2282		    "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2283		    buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2284		    psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2285		    (buf.pte & PTE_ED) ? 1 : 0,
2286		    (int)(buf.pte & PTE_AR_MASK) >> 9,
2287		    (int)(buf.pte & PTE_PL_MASK) >> 7,
2288		    (pmap_dirty(&buf)) ? 1 : 0,
2289		    (pmap_accessed(&buf)) ? 1 : 0,
2290		    manames[(buf.pte & PTE_MA_MASK) >> 2],
2291		    (pmap_present(&buf)) ? 1 : 0,
2292		    (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2293	}
2294}
2295
2296DB_COMMAND(itr, db_itr)
2297{
2298	print_trs(0);
2299}
2300
2301DB_COMMAND(dtr, db_dtr)
2302{
2303	print_trs(1);
2304}
2305
2306DB_COMMAND(rr, db_rr)
2307{
2308	int i;
2309	uint64_t t;
2310	struct ia64_rr rr;
2311
2312	printf("RR RID    PgSz VE\n");
2313	for (i = 0; i < 8; i++) {
2314		__asm __volatile ("mov %0=rr[%1]"
2315				  : "=r"(t)
2316				  : "r"(IA64_RR_BASE(i)));
2317		*(uint64_t *) &rr = t;
2318		printf("%d  %06x %4s %d\n",
2319		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2320	}
2321}
2322
2323DB_COMMAND(thash, db_thash)
2324{
2325	if (!have_addr)
2326		return;
2327
2328	db_printf("%p\n", (void *) ia64_thash(addr));
2329}
2330
2331DB_COMMAND(ttag, db_ttag)
2332{
2333	if (!have_addr)
2334		return;
2335
2336	db_printf("0x%lx\n", ia64_ttag(addr));
2337}
2338
2339DB_COMMAND(kpte, db_kpte)
2340{
2341	struct ia64_lpte *pte;
2342
2343	if (!have_addr) {
2344		db_printf("usage: kpte <kva>\n");
2345		return;
2346	}
2347	if (addr < VM_MIN_KERNEL_ADDRESS) {
2348		db_printf("kpte: error: invalid <kva>\n");
2349		return;
2350	}
2351	pte = &ia64_kptdir[KPTE_DIR_INDEX(addr)][KPTE_PTE_INDEX(addr)];
2352	db_printf("kpte at %p:\n", pte);
2353	db_printf("  pte  =%016lx\n", pte->pte);
2354	db_printf("  itir =%016lx\n", pte->itir);
2355	db_printf("  tag  =%016lx\n", pte->tag);
2356	db_printf("  chain=%016lx\n", pte->chain);
2357}
2358
2359#endif
2360