pmap.c revision 96019
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 * $FreeBSD: head/sys/ia64/ia64/pmap.c 96019 2002-05-04 08:04:28Z marcel $
47 */
48
49/*
50 *	Manages physical address maps.
51 *
52 *	In addition to hardware address maps, this
53 *	module is called upon to provide software-use-only
54 *	maps which may or may not be stored in the same
55 *	form as hardware maps.  These pseudo-maps are
56 *	used to store intermediate results from copy
57 *	operations to and from address spaces.
58 *
59 *	Since the information managed by this module is
60 *	also stored by the logical address mapping module,
61 *	this module may throw away valid virtual-to-physical
62 *	mappings at almost any time.  However, invalidations
63 *	of virtual-to-physical mappings must be done as
64 *	requested.
65 *
66 *	In order to cope with hardware architectures which
67 *	make virtual-to-physical map invalidates expensive,
68 *	this module may delay invalidate or reduced protection
69 *	operations until such time as they are actually
70 *	necessary.  This module is given full information as
71 *	to which processors are currently using which maps,
72 *	and to when physical maps must be made correct.
73 */
74
75/*
76 * Following the Linux model, region IDs are allocated in groups of
77 * eight so that a single region ID can be used for as many RRs as we
78 * want by encoding the RR number into the low bits of the ID.
79 *
80 * We reserve region ID 0 for the kernel and allocate the remaining
81 * IDs for user pmaps.
82 *
83 * Region 0..4
84 *	User virtually mapped
85 *
86 * Region 5
87 *	Kernel virtually mapped
88 *
89 * Region 6
90 *	Kernel physically mapped uncacheable
91 *
92 * Region 7
93 *	Kernel physically mapped cacheable
94 */
95
96#include <sys/param.h>
97#include <sys/kernel.h>
98#include <sys/lock.h>
99#include <sys/malloc.h>
100#include <sys/mman.h>
101#include <sys/msgbuf.h>
102#include <sys/mutex.h>
103#include <sys/proc.h>
104#include <sys/sx.h>
105#include <sys/systm.h>
106#include <sys/vmmeter.h>
107#include <sys/smp.h>
108#include <sys/sysctl.h>
109
110#include <vm/vm.h>
111#include <vm/vm_param.h>
112#include <vm/vm_kern.h>
113#include <vm/vm_page.h>
114#include <vm/vm_map.h>
115#include <vm/vm_object.h>
116#include <vm/vm_extern.h>
117#include <vm/vm_pageout.h>
118#include <vm/vm_pager.h>
119#include <vm/uma.h>
120
121#include <sys/user.h>
122
123#include <machine/pal.h>
124#include <machine/md_var.h>
125
126MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
127
128#ifndef PMAP_SHPGPERPROC
129#define PMAP_SHPGPERPROC 200
130#endif
131
132#if defined(DIAGNOSTIC)
133#define PMAP_DIAGNOSTIC
134#endif
135
136#define MINPV 2048
137
138#if 0
139#define PMAP_DIAGNOSTIC
140#define PMAP_DEBUG
141#endif
142
143#if !defined(PMAP_DIAGNOSTIC)
144#define PMAP_INLINE __inline
145#else
146#define PMAP_INLINE
147#endif
148
149/*
150 * Get PDEs and PTEs for user/kernel address space
151 */
152#define pmap_pte_w(pte)		((pte)->pte_ig & PTE_IG_WIRED)
153#define pmap_pte_managed(pte)	((pte)->pte_ig & PTE_IG_MANAGED)
154#define pmap_pte_v(pte)		((pte)->pte_p)
155#define pmap_pte_pa(pte)	(((pte)->pte_ppn) << 12)
156#define pmap_pte_prot(pte)	(((pte)->pte_ar << 2) | (pte)->pte_pl)
157
158#define pmap_pte_set_w(pte, v) ((v)?((pte)->pte_ig |= PTE_IG_WIRED) \
159				:((pte)->pte_ig &= ~PTE_IG_WIRED))
160#define pmap_pte_set_prot(pte, v) do {		\
161    (pte)->pte_ar = v >> 2;			\
162    (pte)->pte_pl = v & 3;			\
163} while (0)
164
165/*
166 * Given a map and a machine independent protection code,
167 * convert to an ia64 protection code.
168 */
169#define pte_prot(m, p)		(protection_codes[m == kernel_pmap ? 0 : 1][p])
170#define pte_prot_pl(m, p)	(pte_prot(m, p) & 3)
171#define pte_prot_ar(m, p)	(pte_prot(m, p) >> 2)
172int	protection_codes[2][8];
173
174/*
175 * Return non-zero if this pmap is currently active
176 */
177#define pmap_isactive(pmap)	(pmap->pm_active)
178
179/*
180 * Statically allocated kernel pmap
181 */
182struct pmap kernel_pmap_store;
183
184vm_offset_t avail_start;	/* PA of first available physical page */
185vm_offset_t avail_end;		/* PA of last available physical page */
186vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
187vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
188static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
189
190vm_offset_t vhpt_base, vhpt_size;
191
192/*
193 * We use an object to own the kernel's 'page tables'. For simplicity,
194 * we use one page directory to index a set of pages containing
195 * ia64_lptes. This gives us up to 2Gb of kernel virtual space.
196 */
197static vm_object_t kptobj;
198static int nkpt;
199static struct ia64_lpte **kptdir;
200#define KPTE_DIR_INDEX(va) \
201	((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
202#define KPTE_PTE_INDEX(va) \
203	((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
204#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
205
206vm_offset_t kernel_vm_end;
207
208/*
209 * Values for ptc.e. XXX values for SKI.
210 */
211static u_int64_t pmap_ptc_e_base = 0x100000000;
212static u_int64_t pmap_ptc_e_count1 = 3;
213static u_int64_t pmap_ptc_e_count2 = 2;
214static u_int64_t pmap_ptc_e_stride1 = 0x2000;
215static u_int64_t pmap_ptc_e_stride2 = 0x100000000;
216
217/*
218 * Data for the RID allocator
219 */
220static u_int64_t *pmap_ridbusy;
221static int pmap_ridmax, pmap_ridcount;
222struct mtx pmap_ridmutex;
223
224/*
225 * Data for the pv entry allocation mechanism
226 */
227static uma_zone_t pvzone;
228static struct vm_object pvzone_obj;
229static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
230static int pmap_pagedaemon_waken = 0;
231static struct pv_entry *pvbootentries;
232static int pvbootnext, pvbootmax;
233
234/*
235 * Data for allocating PTEs for user processes.
236 */
237static uma_zone_t ptezone;
238static struct vm_object ptezone_obj;
239#if 0
240static struct ia64_lpte *pteinit;
241#endif
242
243/*
244 * VHPT instrumentation.
245 */
246static int pmap_vhpt_inserts;
247static int pmap_vhpt_collisions;
248static int pmap_vhpt_resident;
249SYSCTL_DECL(_vm_stats);
250SYSCTL_NODE(_vm_stats, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
251SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
252	   &pmap_vhpt_inserts, 0, "");
253SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, collisions, CTLFLAG_RD,
254	   &pmap_vhpt_collisions, 0, "");
255SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, resident, CTLFLAG_RD,
256	   &pmap_vhpt_resident, 0, "");
257
258static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
259static pv_entry_t get_pv_entry(void);
260static void	ia64_protection_init(void);
261
262static void	pmap_invalidate_all(pmap_t pmap);
263static void	pmap_remove_all(vm_page_t m);
264static void	pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m);
265static void	*pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
266
267vm_offset_t
268pmap_steal_memory(vm_size_t size)
269{
270	vm_size_t bank_size;
271	vm_offset_t pa, va;
272
273	size = round_page(size);
274
275	bank_size = phys_avail[1] - phys_avail[0];
276	while (size > bank_size) {
277		int i;
278		for (i = 0; phys_avail[i+2]; i+= 2) {
279			phys_avail[i] = phys_avail[i+2];
280			phys_avail[i+1] = phys_avail[i+3];
281		}
282		phys_avail[i] = 0;
283		phys_avail[i+1] = 0;
284		if (!phys_avail[0])
285			panic("pmap_steal_memory: out of memory");
286		bank_size = phys_avail[1] - phys_avail[0];
287	}
288
289	pa = phys_avail[0];
290	phys_avail[0] += size;
291
292	va = IA64_PHYS_TO_RR7(pa);
293	bzero((caddr_t) va, size);
294	return va;
295}
296
297/*
298 *	Bootstrap the system enough to run with virtual memory.
299 */
300void
301pmap_bootstrap()
302{
303	int i, j, count, ridbits;
304	struct ia64_pal_result res;
305
306	/*
307	 * Query the PAL Code to find the loop parameters for the
308	 * ptc.e instruction.
309	 */
310	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
311	if (res.pal_status != 0)
312		panic("Can't configure ptc.e parameters");
313	pmap_ptc_e_base = res.pal_result[0];
314	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
315	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
316	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
317	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
318	if (bootverbose)
319		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
320		       "stride1=0x%lx, stride2=0x%lx\n",
321		       pmap_ptc_e_base,
322		       pmap_ptc_e_count1,
323		       pmap_ptc_e_count2,
324		       pmap_ptc_e_stride1,
325		       pmap_ptc_e_stride2);
326
327	/*
328	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
329	 */
330	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
331	if (res.pal_status != 0) {
332		if (bootverbose)
333			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
334		ridbits = 18; /* guaranteed minimum */
335	} else {
336		ridbits = (res.pal_result[1] >> 8) & 0xff;
337		if (bootverbose)
338			printf("Processor supports %d Region ID bits\n",
339			       ridbits);
340	}
341	pmap_ridmax = (1 << ridbits);
342	pmap_ridcount = 8;
343	pmap_ridbusy = (u_int64_t *)
344		pmap_steal_memory(pmap_ridmax / 8);
345	bzero(pmap_ridbusy, pmap_ridmax / 8);
346	pmap_ridbusy[0] |= 0xff;
347	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
348
349	/*
350	 * Allocate some memory for initial kernel 'page tables'.
351	 */
352	kptdir = (struct ia64_lpte **) pmap_steal_memory(PAGE_SIZE);
353	for (i = 0; i < NKPT; i++) {
354		kptdir[i] = (struct ia64_lpte *) pmap_steal_memory(PAGE_SIZE);
355	}
356	nkpt = NKPT;
357
358	avail_start = phys_avail[0];
359	for (i = 0; phys_avail[i+2]; i+= 2) ;
360	avail_end = phys_avail[i+1];
361	count = i+2;
362
363	/*
364	 * Figure out a useful size for the VHPT, based on the size of
365	 * physical memory and try to locate a region which is large
366	 * enough to contain the VHPT (which must be a power of two in
367	 * size and aligned to a natural boundary).
368	 */
369	vhpt_size = 15;
370	while ((1<<vhpt_size) < ia64_btop(avail_end - avail_start) * 32)
371		vhpt_size++;
372
373	vhpt_base = 0;
374	while (!vhpt_base) {
375		vm_offset_t mask;
376		if (bootverbose)
377			printf("Trying VHPT size 0x%lx\n", (1L<<vhpt_size));
378		mask = (1L << vhpt_size) - 1;
379		for (i = 0; i < count; i += 2) {
380			vm_offset_t base, limit;
381			base = (phys_avail[i] + mask) & ~mask;
382			limit = base + (1L << vhpt_size);
383			if (limit <= phys_avail[i+1])
384				/*
385				 * VHPT can fit in this region
386				 */
387				break;
388		}
389		if (!phys_avail[i]) {
390			/*
391			 * Can't fit, try next smaller size.
392			 */
393			vhpt_size--;
394		} else {
395			vhpt_base = (phys_avail[i] + mask) & ~mask;
396		}
397	}
398	if (vhpt_size < 15)
399		panic("Can't find space for VHPT");
400
401	if (bootverbose)
402		printf("Putting VHPT at %p\n", (void *) vhpt_base);
403	if (vhpt_base != phys_avail[i]) {
404		/*
405		 * Split this region.
406		 */
407		if (bootverbose)
408			printf("Splitting [%p-%p]\n",
409			       (void *) phys_avail[i],
410			       (void *) phys_avail[i+1]);
411		for (j = count; j > i; j -= 2) {
412			phys_avail[j] = phys_avail[j-2];
413			phys_avail[j+1] = phys_avail[j-2+1];
414		}
415		phys_avail[count+2] = 0;
416		phys_avail[count+3] = 0;
417		phys_avail[i+1] = vhpt_base;
418		phys_avail[i+2] = vhpt_base + (1L << vhpt_size);
419	} else {
420		phys_avail[i] = vhpt_base + (1L << vhpt_size);
421	}
422
423	vhpt_base = IA64_PHYS_TO_RR7(vhpt_base);
424	bzero((void *) vhpt_base, (1L << vhpt_size));
425	__asm __volatile("mov cr.pta=%0;; srlz.i;;"
426			 :: "r" (vhpt_base + (1<<8) + (vhpt_size<<2) + 1));
427
428	virtual_avail = IA64_RR_BASE(5);
429	virtual_end = IA64_RR_BASE(6)-1;
430
431	/*
432	 * Initialize protection array.
433	 */
434	ia64_protection_init();
435
436	/*
437	 * Initialize the kernel pmap (which is statically allocated).
438	 */
439	for (i = 0; i < 5; i++)
440		kernel_pmap->pm_rid[i] = 0;
441	kernel_pmap->pm_active = 1;
442	TAILQ_INIT(&kernel_pmap->pm_pvlist);
443	PCPU_SET(current_pmap, kernel_pmap);
444
445	/*
446	 * Region 5 is mapped via the vhpt.
447	 */
448	ia64_set_rr(IA64_RR_BASE(5),
449		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
450
451	/*
452	 * Region 6 is direct mapped UC and region 7 is direct mapped
453	 * WC. The details of this is controlled by the Alt {I,D}TLB
454	 * handlers. Here we just make sure that they have the largest
455	 * possible page size to minimise TLB usage.
456	 */
457	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (28 << 2));
458	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (28 << 2));
459
460	/*
461	 * Set up proc0's PCB.
462	 */
463#if 0
464	thread0.td_pcb->pcb_hw.apcb_asn = 0;
465#endif
466
467	/*
468	 * Reserve some memory for allocating pvs while bootstrapping
469	 * the pv allocator. We need to have enough to cover mapping
470	 * the kmem_alloc region used to allocate the initial_pvs in
471	 * pmap_init. In general, the size of this region is
472	 * approximately (# physical pages) * (size of pv entry).
473	 */
474	pvbootmax = ((physmem * sizeof(struct pv_entry)) >> PAGE_SHIFT) + 128;
475	pvbootentries = (struct pv_entry *)
476		pmap_steal_memory(pvbootmax * sizeof(struct pv_entry));
477	pvbootnext = 0;
478
479	/*
480	 * Clear out any random TLB entries left over from booting.
481	 */
482	pmap_invalidate_all(kernel_pmap);
483}
484
485static void *
486pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
487{
488	*flags = UMA_SLAB_PRIV;
489	return (void *)kmem_alloc(kernel_map, bytes);
490}
491
492/*
493 *	Initialize the pmap module.
494 *	Called by vm_init, to initialize any structures that the pmap
495 *	system needs to map virtual memory.
496 *	pmap_init has been enhanced to support in a fairly consistant
497 *	way, discontiguous physical memory.
498 */
499void
500pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
501{
502	int i;
503	int initial_pvs;
504
505	/*
506	 * Allocate memory for random pmap data structures.  Includes the
507	 * pv_head_table.
508	 */
509
510	for(i = 0; i < vm_page_array_size; i++) {
511		vm_page_t m;
512
513		m = &vm_page_array[i];
514		TAILQ_INIT(&m->md.pv_list);
515		m->md.pv_list_count = 0;
516 	}
517
518	/*
519	 * Init the pv free list and the PTE free list.
520	 */
521	initial_pvs = vm_page_array_size;
522	if (initial_pvs < MINPV)
523		initial_pvs = MINPV;
524	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry),
525	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
526	uma_zone_set_allocf(pvzone, pmap_allocf);
527	uma_prealloc(pvzone, initial_pvs);
528
529	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
530	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
531	uma_zone_set_allocf(ptezone, pmap_allocf);
532	uma_prealloc(ptezone, initial_pvs);
533
534	/*
535	 * Create the object for the kernel's page tables.
536	 */
537	kptobj = vm_object_allocate(OBJT_DEFAULT, MAXKPT);
538
539	/*
540	 * Now it is safe to enable pv_table recording.
541	 */
542	pmap_initialized = TRUE;
543}
544
545/*
546 * Initialize the address space (zone) for the pv_entries.  Set a
547 * high water mark so that the system can recover from excessive
548 * numbers of pv entries.
549 */
550void
551pmap_init2()
552{
553	int shpgperproc = PMAP_SHPGPERPROC;
554
555	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
556	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
557	pv_entry_high_water = 9 * (pv_entry_max / 10);
558	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
559	uma_zone_set_obj(ptezone, &ptezone_obj, pv_entry_max);
560}
561
562
563/***************************************************
564 * Manipulate TLBs for a pmap
565 ***************************************************/
566
567static void
568pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
569{
570	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
571		("invalidating TLB for non-current pmap"));
572	ia64_ptc_g(va, PAGE_SHIFT << 2);
573}
574
575static void
576pmap_invalidate_all_1(void *arg)
577{
578	u_int64_t addr;
579	int i, j;
580	register_t psr;
581
582	psr = intr_disable();
583	addr = pmap_ptc_e_base;
584	for (i = 0; i < pmap_ptc_e_count1; i++) {
585		for (j = 0; j < pmap_ptc_e_count2; j++) {
586			ia64_ptc_e(addr);
587			addr += pmap_ptc_e_stride2;
588		}
589		addr += pmap_ptc_e_stride1;
590	}
591	intr_restore(psr);
592}
593
594static void
595pmap_invalidate_all(pmap_t pmap)
596{
597	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
598		("invalidating TLB for non-current pmap"));
599
600
601#ifdef SMP
602	smp_rendezvous(0, pmap_invalidate_all_1, 0, 0);
603#else
604	pmap_invalidate_all_1(0);
605#endif
606}
607
608static u_int32_t
609pmap_allocate_rid(void)
610{
611	int rid;
612
613	if (pmap_ridcount == pmap_ridmax)
614		panic("pmap_allocate_rid: All Region IDs used");
615
616	do {
617		rid = arc4random() & (pmap_ridmax - 1);
618	} while (pmap_ridbusy[rid / 64] & (1L << (rid & 63)));
619	pmap_ridbusy[rid / 64] |= (1L << (rid & 63));
620	pmap_ridcount++;
621
622	return rid;
623}
624
625static void
626pmap_free_rid(u_int32_t rid)
627{
628	mtx_lock(&pmap_ridmutex);
629	pmap_ridbusy[rid / 64] &= ~(1L << (rid & 63));
630	pmap_ridcount--;
631	mtx_unlock(&pmap_ridmutex);
632}
633
634static void
635pmap_ensure_rid(pmap_t pmap, vm_offset_t va)
636{
637	int rr;
638
639	rr = va >> 61;
640
641	/*
642	 * We get called for virtual addresses that may just as well be
643	 * kernel addresses (ie region 5, 6 or 7). Since the pm_rid field
644	 * only holds region IDs for user regions, we have to make sure
645	 * the region is within bounds.
646	 */
647	if (rr >= 5)
648		return;
649
650	if (pmap->pm_rid[rr])
651		return;
652
653	mtx_lock(&pmap_ridmutex);
654	pmap->pm_rid[rr] = pmap_allocate_rid();
655	if (pmap == PCPU_GET(current_pmap))
656		ia64_set_rr(IA64_RR_BASE(rr),
657			    (pmap->pm_rid[rr] << 8)|(PAGE_SHIFT << 2)|1);
658	mtx_unlock(&pmap_ridmutex);
659}
660
661/***************************************************
662 * Low level helper routines.....
663 ***************************************************/
664
665/*
666 * Install a pte into the VHPT
667 */
668static PMAP_INLINE void
669pmap_install_pte(struct ia64_lpte *vhpte, struct ia64_lpte *pte)
670{
671	u_int64_t *vhp, *p;
672
673	/* invalidate the pte */
674	atomic_set_64(&vhpte->pte_tag, 1L << 63);
675	ia64_mf();			/* make sure everyone sees */
676
677	vhp = (u_int64_t *) vhpte;
678	p = (u_int64_t *) pte;
679
680	vhp[0] = p[0];
681	vhp[1] = p[1];
682	vhp[2] = p[2];			/* sets ti to one */
683
684	ia64_mf();
685}
686
687/*
688 * Compare essential parts of pte.
689 */
690static PMAP_INLINE int
691pmap_equal_pte(struct ia64_lpte *pte1, struct ia64_lpte *pte2)
692{
693	return *(u_int64_t *) pte1 == *(u_int64_t *) pte2;
694}
695
696/*
697 * this routine defines the region(s) of memory that should
698 * not be tested for the modified bit.
699 */
700static PMAP_INLINE int
701pmap_track_modified(vm_offset_t va)
702{
703	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
704		return 1;
705	else
706		return 0;
707}
708
709/*
710 * Create the U area for a new process.
711 * This routine directly affects the fork perf for a process.
712 */
713void
714pmap_new_proc(struct proc *p)
715{
716	struct user *up;
717
718	/*
719	 * Use contigmalloc for user area so that we can use a region
720	 * 7 address for it which makes it impossible to accidentally
721	 * lose when recording a trapframe.
722	 */
723	up = contigmalloc(UAREA_PAGES * PAGE_SIZE, M_PMAP,
724			  M_WAITOK,
725			  0ul,
726			  256*1024*1024 - 1,
727			  PAGE_SIZE,
728			  256*1024*1024);
729
730	p->p_md.md_uservirt = up;
731	p->p_uarea = (struct user *)
732		IA64_PHYS_TO_RR7(ia64_tpa((u_int64_t) up));
733}
734
735/*
736 * Dispose the U area for a process that has exited.
737 * This routine directly impacts the exit perf of a process.
738 */
739void
740pmap_dispose_proc(struct proc *p)
741{
742	contigfree(p->p_md.md_uservirt, UAREA_PAGES * PAGE_SIZE, M_PMAP);
743	p->p_md.md_uservirt = 0;
744	p->p_uarea = 0;
745}
746
747/*
748 * Allow the U area for a process to be prejudicially paged out.
749 */
750void
751pmap_swapout_proc(struct proc *p)
752{
753}
754
755/*
756 * Bring the U area for a specified process back in.
757 */
758void
759pmap_swapin_proc(struct proc *p)
760{
761}
762
763/*
764 * Create the KSTACK for a new thread.
765 * This routine directly affects the fork perf for a process/thread.
766 */
767void
768pmap_new_thread(struct thread *td)
769{
770	vm_offset_t *ks;
771
772	/*
773	 * Use contigmalloc for user area so that we can use a region
774	 * 7 address for it which makes it impossible to accidentally
775	 * lose when recording a trapframe.
776	 */
777	ks = contigmalloc(KSTACK_PAGES * PAGE_SIZE, M_PMAP,
778			  M_WAITOK,
779			  0ul,
780			  256*1024*1024 - 1,
781			  PAGE_SIZE,
782			  256*1024*1024);
783
784	td->td_md.md_kstackvirt = ks;
785	td->td_kstack = IA64_PHYS_TO_RR7(ia64_tpa((u_int64_t)ks));
786}
787
788/*
789 * Dispose the KSTACK for a thread that has exited.
790 * This routine directly impacts the exit perf of a process/thread.
791 */
792void
793pmap_dispose_thread(struct thread *td)
794{
795	contigfree(td->td_md.md_kstackvirt, KSTACK_PAGES * PAGE_SIZE, M_PMAP);
796	td->td_md.md_kstackvirt = 0;
797	td->td_kstack = 0;
798}
799
800/*
801 * Allow the KSTACK for a thread to be prejudicially paged out.
802 */
803void
804pmap_swapout_thread(struct thread *td)
805{
806}
807
808/*
809 * Bring the KSTACK for a specified thread back in.
810 */
811void
812pmap_swapin_thread(struct thread *td)
813{
814}
815
816/***************************************************
817 * Page table page management routines.....
818 ***************************************************/
819
820void
821pmap_pinit0(struct pmap *pmap)
822{
823	/* kernel_pmap is the same as any other pmap. */
824	pmap_pinit(pmap);
825}
826
827/*
828 * Initialize a preallocated and zeroed pmap structure,
829 * such as one in a vmspace structure.
830 */
831void
832pmap_pinit(struct pmap *pmap)
833{
834	int i;
835
836	pmap->pm_flags = 0;
837	for (i = 0; i < 5; i++)
838		pmap->pm_rid[i] = 0;
839	pmap->pm_ptphint = NULL;
840	pmap->pm_active = 0;
841	TAILQ_INIT(&pmap->pm_pvlist);
842	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
843}
844
845/*
846 * Wire in kernel global address entries.  To avoid a race condition
847 * between pmap initialization and pmap_growkernel, this procedure
848 * should be called after the vmspace is attached to the process
849 * but before this pmap is activated.
850 */
851void
852pmap_pinit2(struct pmap *pmap)
853{
854}
855
856/***************************************************
857* Pmap allocation/deallocation routines.
858 ***************************************************/
859
860/*
861 * Release any resources held by the given physical map.
862 * Called when a pmap initialized by pmap_pinit is being released.
863 * Should only be called if the map contains no valid mappings.
864 */
865void
866pmap_release(pmap_t pmap)
867{
868	int i;
869
870	for (i = 0; i < 5; i++)
871		if (pmap->pm_rid[i])
872			pmap_free_rid(pmap->pm_rid[i]);
873}
874
875/*
876 * grow the number of kernel page table entries, if needed
877 */
878void
879pmap_growkernel(vm_offset_t addr)
880{
881	struct ia64_lpte *ptepage;
882	vm_page_t nkpg;
883
884	if (kernel_vm_end == 0) {
885		kernel_vm_end = nkpt * PAGE_SIZE * NKPTEPG
886			+ IA64_RR_BASE(5);
887	}
888	addr = (addr + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
889	while (kernel_vm_end < addr) {
890		if (kptdir[KPTE_DIR_INDEX(kernel_vm_end)]) {
891			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG)
892				& ~(PAGE_SIZE * NKPTEPG - 1);
893			continue;
894		}
895
896		/*
897		 * We could handle more by increasing the size of kptdir.
898		 */
899		if (nkpt == MAXKPT)
900			panic("pmap_growkernel: out of kernel address space");
901
902		/*
903		 * This index is bogus, but out of the way
904		 */
905		nkpg = vm_page_alloc(kptobj, nkpt, VM_ALLOC_SYSTEM);
906		if (!nkpg)
907			panic("pmap_growkernel: no memory to grow kernel");
908
909		nkpt++;
910
911		vm_page_wire(nkpg);
912		ptepage = (struct ia64_lpte *)
913			IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
914		bzero(ptepage, PAGE_SIZE);
915		kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage;
916
917		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
918	}
919}
920
921/***************************************************
922 * page management routines.
923 ***************************************************/
924
925/*
926 * free the pv_entry back to the free list
927 */
928static PMAP_INLINE void
929free_pv_entry(pv_entry_t pv)
930{
931	pv_entry_count--;
932	uma_zfree(pvzone, pv);
933}
934
935/*
936 * get a new pv_entry, allocating a block from the system
937 * when needed.
938 * the memory allocation is performed bypassing the malloc code
939 * because of the possibility of allocations at interrupt time.
940 */
941static pv_entry_t
942get_pv_entry(void)
943{
944	pv_entry_count++;
945	if (pv_entry_high_water &&
946		(pv_entry_count > pv_entry_high_water) &&
947		(pmap_pagedaemon_waken == 0)) {
948		pmap_pagedaemon_waken = 1;
949		wakeup (&vm_pages_needed);
950	}
951	return uma_zalloc(pvzone, M_WAITOK);
952}
953
954/*
955 * Add an ia64_lpte to the VHPT.
956 */
957static void
958pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
959{
960	struct ia64_lpte *vhpte;
961
962	pmap_vhpt_inserts++;
963	pmap_vhpt_resident++;
964
965	vhpte = (struct ia64_lpte *) ia64_thash(va);
966
967	if (vhpte->pte_chain)
968		pmap_vhpt_collisions++;
969
970	pte->pte_chain = vhpte->pte_chain;
971	vhpte->pte_chain = ia64_tpa((vm_offset_t) pte);
972
973	if (!vhpte->pte_p && pte->pte_p)
974		pmap_install_pte(vhpte, pte);
975	else
976		ia64_mf();
977}
978
979/*
980 * Update VHPT after a pte has changed.
981 */
982static void
983pmap_update_vhpt(struct ia64_lpte *pte, vm_offset_t va)
984{
985	struct ia64_lpte *vhpte;
986
987	vhpte = (struct ia64_lpte *) ia64_thash(va);
988
989	if ((!vhpte->pte_p || vhpte->pte_tag == pte->pte_tag)
990	    && pte->pte_p)
991		pmap_install_pte(vhpte, pte);
992}
993
994/*
995 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
996 * worked or an appropriate error code otherwise.
997 */
998static int
999pmap_remove_vhpt(vm_offset_t va)
1000{
1001	struct ia64_lpte *pte;
1002	struct ia64_lpte *lpte;
1003	struct ia64_lpte *vhpte;
1004	u_int64_t tag;
1005	int error = ENOENT;
1006
1007	vhpte = (struct ia64_lpte *) ia64_thash(va);
1008
1009	/*
1010	 * If the VHPTE is invalid, there can't be a collision chain.
1011	 */
1012	if (!vhpte->pte_p) {
1013		KASSERT(!vhpte->pte_chain, ("bad vhpte"));
1014		printf("can't remove vhpt entry for 0x%lx\n", va);
1015		goto done;
1016	}
1017
1018	lpte = vhpte;
1019	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(vhpte->pte_chain);
1020	tag = ia64_ttag(va);
1021
1022	while (pte->pte_tag != tag) {
1023		lpte = pte;
1024		if (pte->pte_chain)
1025			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1026		else {
1027			printf("can't remove vhpt entry for 0x%lx\n", va);
1028			goto done;
1029		}
1030	}
1031
1032	/*
1033	 * Snip this pv_entry out of the collision chain.
1034	 */
1035	lpte->pte_chain = pte->pte_chain;
1036
1037	/*
1038	 * If the VHPTE matches as well, change it to map the first
1039	 * element from the chain if there is one.
1040	 */
1041	if (vhpte->pte_tag == tag) {
1042		if (vhpte->pte_chain) {
1043			pte = (struct ia64_lpte *)
1044				IA64_PHYS_TO_RR7(vhpte->pte_chain);
1045			pmap_install_pte(vhpte, pte);
1046		} else {
1047			vhpte->pte_p = 0;
1048			ia64_mf();
1049		}
1050	}
1051
1052	pmap_vhpt_resident--;
1053	error = 0;
1054 done:
1055	return error;
1056}
1057
1058/*
1059 * Find the ia64_lpte for the given va, if any.
1060 */
1061static struct ia64_lpte *
1062pmap_find_vhpt(vm_offset_t va)
1063{
1064	struct ia64_lpte *pte;
1065	u_int64_t tag;
1066
1067	pte = (struct ia64_lpte *) ia64_thash(va);
1068	if (!pte->pte_chain) {
1069		pte = 0;
1070		goto done;
1071	}
1072
1073	tag = ia64_ttag(va);
1074	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1075
1076	while (pte->pte_tag != tag) {
1077		if (pte->pte_chain) {
1078			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1079		} else {
1080			pte = 0;
1081			break;
1082		}
1083	}
1084
1085 done:
1086	return pte;
1087}
1088
1089/*
1090 * Remove an entry from the list of managed mappings.
1091 */
1092static int
1093pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1094{
1095	if (!pv) {
1096		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1097			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1098				if (pmap == pv->pv_pmap && va == pv->pv_va)
1099					break;
1100			}
1101		} else {
1102			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1103				if (va == pv->pv_va)
1104					break;
1105			}
1106		}
1107	}
1108
1109	if (pv) {
1110		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1111		m->md.pv_list_count--;
1112		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1113			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
1114
1115		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1116		free_pv_entry(pv);
1117		return 0;
1118	} else {
1119		return ENOENT;
1120	}
1121}
1122
1123/*
1124 * Create a pv entry for page at pa for
1125 * (pmap, va).
1126 */
1127static void
1128pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1129{
1130	pv_entry_t pv;
1131
1132	pv = get_pv_entry();
1133	pv->pv_pmap = pmap;
1134	pv->pv_va = va;
1135
1136	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1137	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1138	m->md.pv_list_count++;
1139}
1140
1141/*
1142 *	Routine:	pmap_extract
1143 *	Function:
1144 *		Extract the physical page address associated
1145 *		with the given map/virtual_address pair.
1146 */
1147vm_offset_t
1148pmap_extract(pmap, va)
1149	register pmap_t pmap;
1150	vm_offset_t va;
1151{
1152	pmap_t oldpmap;
1153	vm_offset_t pa;
1154
1155	oldpmap = pmap_install(pmap);
1156	pa = ia64_tpa(va);
1157	pmap_install(oldpmap);
1158	return pa;
1159}
1160
1161/***************************************************
1162 * Low level mapping routines.....
1163 ***************************************************/
1164
1165/*
1166 * Find the kernel lpte for mapping the given virtual address, which
1167 * must be in the part of region 5 which we can cover with our kernel
1168 * 'page tables'.
1169 */
1170static struct ia64_lpte *
1171pmap_find_kpte(vm_offset_t va)
1172{
1173	KASSERT((va >> 61) == 5,
1174		("kernel mapping 0x%lx not in region 5", va));
1175	KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG),
1176		("kernel mapping 0x%lx out of range", va));
1177	return &kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)];
1178}
1179
1180/*
1181 * Find a pte suitable for mapping a user-space address. If one exists
1182 * in the VHPT, that one will be returned, otherwise a new pte is
1183 * allocated.
1184 */
1185static struct ia64_lpte *
1186pmap_find_pte(vm_offset_t va)
1187{
1188	struct ia64_lpte *pte;
1189
1190	if (va >= VM_MAXUSER_ADDRESS)
1191		return pmap_find_kpte(va);
1192
1193	pte = pmap_find_vhpt(va);
1194	if (!pte) {
1195		pte = uma_zalloc(ptezone, M_WAITOK);
1196		pte->pte_p = 0;
1197	}
1198	return pte;
1199}
1200
1201/*
1202 * Free a pte which is now unused. This simply returns it to the zone
1203 * allocator if it is a user mapping. For kernel mappings, clear the
1204 * valid bit to make it clear that the mapping is not currently used.
1205 */
1206static void
1207pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1208{
1209	if (va < VM_MAXUSER_ADDRESS)
1210		uma_zfree(ptezone, pte);
1211	else
1212		pte->pte_p = 0;
1213}
1214
1215/*
1216 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1217 * the pte was orginally valid, then its assumed to already be in the
1218 * VHPT.
1219 */
1220static void
1221pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1222	     int ig, int pl, int ar)
1223{
1224	int wasvalid = pte->pte_p;
1225
1226	pte->pte_p = 1;
1227	pte->pte_ma = PTE_MA_WB;
1228	if (ig & PTE_IG_MANAGED) {
1229		pte->pte_a = 0;
1230		pte->pte_d = 0;
1231	} else {
1232		pte->pte_a = 1;
1233		pte->pte_d = 1;
1234	}
1235	pte->pte_pl = pl;
1236	pte->pte_ar = ar;
1237	pte->pte_ppn = pa >> 12;
1238	pte->pte_ed = 0;
1239	pte->pte_ig = ig;
1240
1241	pte->pte_ps = PAGE_SHIFT;
1242	pte->pte_key = 0;
1243
1244	pte->pte_tag = ia64_ttag(va);
1245
1246	if (wasvalid) {
1247		pmap_update_vhpt(pte, va);
1248	} else {
1249		pmap_enter_vhpt(pte, va);
1250	}
1251}
1252
1253/*
1254 * If a pte contains a valid mapping, clear it and update the VHPT.
1255 */
1256static void
1257pmap_clear_pte(struct ia64_lpte *pte, vm_offset_t va)
1258{
1259	if (pte->pte_p) {
1260		pmap_remove_vhpt(va);
1261		ia64_ptc_g(va, PAGE_SHIFT << 2);
1262		pte->pte_p = 0;
1263	}
1264}
1265
1266/*
1267 * Remove the (possibly managed) mapping represented by pte from the
1268 * given pmap.
1269 */
1270static int
1271pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1272		pv_entry_t pv, int freepte)
1273{
1274	int error;
1275	vm_page_t m;
1276
1277	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1278		("removing pte for non-current pmap"));
1279
1280	/*
1281	 * First remove from the VHPT.
1282	 */
1283	error = pmap_remove_vhpt(va);
1284	if (error)
1285		return error;
1286
1287	/*
1288	 * Make sure pmap_set_pte() knows it isn't in the VHPT.
1289	 */
1290	pte->pte_p = 0;
1291
1292	if (pte->pte_ig & PTE_IG_WIRED)
1293		pmap->pm_stats.wired_count -= 1;
1294
1295	pmap->pm_stats.resident_count -= 1;
1296	if (pte->pte_ig & PTE_IG_MANAGED) {
1297		m = PHYS_TO_VM_PAGE(pmap_pte_pa(pte));
1298		if (pte->pte_d)
1299			if (pmap_track_modified(va))
1300				vm_page_dirty(m);
1301		if (pte->pte_a)
1302			vm_page_flag_set(m, PG_REFERENCED);
1303
1304		if (freepte)
1305			pmap_free_pte(pte, va);
1306		return pmap_remove_entry(pmap, m, va, pv);
1307	} else {
1308		if (freepte)
1309			pmap_free_pte(pte, va);
1310		return 0;
1311	}
1312}
1313
1314/*
1315 * Add a list of wired pages to the kva
1316 * this routine is only used for temporary
1317 * kernel mappings that do not need to have
1318 * page modification or references recorded.
1319 * Note that old mappings are simply written
1320 * over.  The page *must* be wired.
1321 */
1322void
1323pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1324{
1325	int i;
1326	struct ia64_lpte *pte;
1327
1328	for (i = 0; i < count; i++) {
1329		vm_offset_t tva = va + i * PAGE_SIZE;
1330		int wasvalid;
1331		pte = pmap_find_kpte(tva);
1332		wasvalid = pte->pte_p;
1333		pmap_set_pte(pte, tva, VM_PAGE_TO_PHYS(m[i]),
1334			     0, PTE_PL_KERN, PTE_AR_RWX);
1335		if (wasvalid)
1336			ia64_ptc_g(tva, PAGE_SHIFT << 2);
1337	}
1338}
1339
1340/*
1341 * this routine jerks page mappings from the
1342 * kernel -- it is meant only for temporary mappings.
1343 */
1344void
1345pmap_qremove(vm_offset_t va, int count)
1346{
1347	int i;
1348	struct ia64_lpte *pte;
1349
1350	for (i = 0; i < count; i++) {
1351		pte = pmap_find_kpte(va);
1352		pmap_clear_pte(pte, va);
1353		va += PAGE_SIZE;
1354	}
1355}
1356
1357/*
1358 * Add a wired page to the kva.
1359 */
1360void
1361pmap_kenter(vm_offset_t va, vm_offset_t pa)
1362{
1363	struct ia64_lpte *pte;
1364	int wasvalid;
1365
1366	pte = pmap_find_kpte(va);
1367	wasvalid = pte->pte_p;
1368	pmap_set_pte(pte, va, pa, 0, PTE_PL_KERN, PTE_AR_RWX);
1369	if (wasvalid)
1370		ia64_ptc_g(va, PAGE_SHIFT << 2);
1371}
1372
1373/*
1374 * Remove a page from the kva
1375 */
1376void
1377pmap_kremove(vm_offset_t va)
1378{
1379	struct ia64_lpte *pte;
1380
1381	pte = pmap_find_kpte(va);
1382	pmap_clear_pte(pte, va);
1383}
1384
1385/*
1386 *	Used to map a range of physical addresses into kernel
1387 *	virtual address space.
1388 *
1389 *	The value passed in '*virt' is a suggested virtual address for
1390 *	the mapping. Architectures which can support a direct-mapped
1391 *	physical to virtual region can return the appropriate address
1392 *	within that region, leaving '*virt' unchanged. Other
1393 *	architectures should map the pages starting at '*virt' and
1394 *	update '*virt' with the first usable address after the mapped
1395 *	region.
1396 */
1397vm_offset_t
1398pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1399{
1400	return IA64_PHYS_TO_RR7(start);
1401}
1402
1403/*
1404 * This routine is very drastic, but can save the system
1405 * in a pinch.
1406 */
1407void
1408pmap_collect()
1409{
1410	int i;
1411	vm_page_t m;
1412	static int warningdone = 0;
1413
1414	if (pmap_pagedaemon_waken == 0)
1415		return;
1416
1417	if (warningdone < 5) {
1418		printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
1419		warningdone++;
1420	}
1421
1422	for(i = 0; i < vm_page_array_size; i++) {
1423		m = &vm_page_array[i];
1424		if (m->wire_count || m->hold_count || m->busy ||
1425		    (m->flags & (PG_BUSY | PG_UNMANAGED)))
1426			continue;
1427		pmap_remove_all(m);
1428	}
1429	pmap_pagedaemon_waken = 0;
1430}
1431
1432/*
1433 * Remove a single page from a process address space
1434 */
1435static void
1436pmap_remove_page(pmap_t pmap, vm_offset_t va)
1437{
1438	struct ia64_lpte *pte;
1439
1440	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1441		("removing page for non-current pmap"));
1442
1443	pte = pmap_find_vhpt(va);
1444	if (pte) {
1445		pmap_remove_pte(pmap, pte, va, 0, 1);
1446		pmap_invalidate_page(pmap, va);
1447	}
1448	return;
1449}
1450
1451/*
1452 *	Remove the given range of addresses from the specified map.
1453 *
1454 *	It is assumed that the start and end are properly
1455 *	rounded to the page size.
1456 */
1457void
1458pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1459{
1460	pmap_t oldpmap;
1461	vm_offset_t va;
1462	pv_entry_t pv;
1463	struct ia64_lpte *pte;
1464
1465	if (pmap == NULL)
1466		return;
1467
1468	if (pmap->pm_stats.resident_count == 0)
1469		return;
1470
1471	oldpmap = pmap_install(pmap);
1472
1473	/*
1474	 * special handling of removing one page.  a very
1475	 * common operation and easy to short circuit some
1476	 * code.
1477	 */
1478	if (sva + PAGE_SIZE == eva) {
1479		pmap_remove_page(pmap, sva);
1480		pmap_install(oldpmap);
1481		return;
1482	}
1483
1484	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1485		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1486			va = pv->pv_va;
1487			if (va >= sva && va < eva) {
1488				pte = pmap_find_vhpt(va);
1489				pmap_remove_pte(pmap, pte, va, pv, 1);
1490				pmap_invalidate_page(pmap, va);
1491			}
1492		}
1493
1494	} else {
1495		for (va = sva; va < eva; va = va += PAGE_SIZE) {
1496			pte = pmap_find_vhpt(va);
1497			if (pte) {
1498				pmap_remove_pte(pmap, pte, va, 0, 1);
1499				pmap_invalidate_page(pmap, va);
1500			}
1501		}
1502	}
1503
1504	pmap_install(oldpmap);
1505}
1506
1507/*
1508 *	Routine:	pmap_remove_all
1509 *	Function:
1510 *		Removes this physical page from
1511 *		all physical maps in which it resides.
1512 *		Reflects back modify bits to the pager.
1513 *
1514 *	Notes:
1515 *		Original versions of this routine were very
1516 *		inefficient because they iteratively called
1517 *		pmap_remove (slow...)
1518 */
1519
1520static void
1521pmap_remove_all(vm_page_t m)
1522{
1523	pmap_t oldpmap;
1524	pv_entry_t pv;
1525	int nmodify;
1526	int s;
1527
1528	nmodify = 0;
1529#if defined(PMAP_DIAGNOSTIC)
1530	/*
1531	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1532	 * pages!
1533	 */
1534	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1535		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1536	}
1537#endif
1538
1539	s = splvm();
1540
1541	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1542		struct ia64_lpte *pte;
1543		pmap_t pmap = pv->pv_pmap;
1544		vm_offset_t va = pv->pv_va;
1545
1546		oldpmap = pmap_install(pmap);
1547		pte = pmap_find_vhpt(va);
1548		if (pmap_pte_pa(pte) != VM_PAGE_TO_PHYS(m))
1549			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1550		pmap_remove_pte(pmap, pte, va, pv, 1);
1551		pmap_invalidate_page(pmap, va);
1552		pmap_install(oldpmap);
1553	}
1554
1555	vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
1556
1557	splx(s);
1558	return;
1559}
1560
1561/*
1562 *	Set the physical protection on the
1563 *	specified range of this map as requested.
1564 */
1565void
1566pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1567{
1568	pmap_t oldpmap;
1569	struct ia64_lpte *pte;
1570	int newprot;
1571
1572	if (pmap == NULL)
1573		return;
1574
1575	oldpmap = pmap_install(pmap);
1576
1577	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1578		pmap_remove(pmap, sva, eva);
1579		pmap_install(oldpmap);
1580		return;
1581	}
1582
1583	if (prot & VM_PROT_WRITE) {
1584		pmap_install(oldpmap);
1585		return;
1586	}
1587
1588	newprot = pte_prot(pmap, prot);
1589
1590	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1591		panic("pmap_protect: unaligned addresses");
1592
1593	while (sva < eva) {
1594		/*
1595		 * If page is invalid, skip this page
1596		 */
1597		pte = pmap_find_vhpt(sva);
1598		if (!pte) {
1599			sva += PAGE_SIZE;
1600			continue;
1601		}
1602
1603		if (pmap_pte_prot(pte) != newprot) {
1604			if (pte->pte_ig & PTE_IG_MANAGED) {
1605				vm_offset_t pa = pmap_pte_pa(pte);
1606				vm_page_t m = PHYS_TO_VM_PAGE(pa);
1607				if (pte->pte_d) {
1608					if (pmap_track_modified(sva))
1609						vm_page_dirty(m);
1610					pte->pte_d = 0;
1611				}
1612				if (pte->pte_a) {
1613					vm_page_flag_set(m, PG_REFERENCED);
1614					pte->pte_a = 0;
1615				}
1616			}
1617			pmap_pte_set_prot(pte, newprot);
1618			pmap_update_vhpt(pte, sva);
1619			pmap_invalidate_page(pmap, sva);
1620		}
1621
1622		sva += PAGE_SIZE;
1623	}
1624	pmap_install(oldpmap);
1625}
1626
1627/*
1628 *	Insert the given physical page (p) at
1629 *	the specified virtual address (v) in the
1630 *	target physical map with the protection requested.
1631 *
1632 *	If specified, the page will be wired down, meaning
1633 *	that the related pte can not be reclaimed.
1634 *
1635 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1636 *	or lose information.  That is, this routine must actually
1637 *	insert this page into the given map NOW.
1638 */
1639void
1640pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1641	   boolean_t wired)
1642{
1643	pmap_t oldpmap;
1644	vm_offset_t pa;
1645	vm_offset_t opa;
1646	struct ia64_lpte origpte;
1647	struct ia64_lpte *pte;
1648	int managed;
1649
1650	if (pmap == NULL)
1651		return;
1652
1653	pmap_ensure_rid(pmap, va);
1654
1655	oldpmap = pmap_install(pmap);
1656
1657	va &= ~PAGE_MASK;
1658#ifdef PMAP_DIAGNOSTIC
1659	if (va > VM_MAX_KERNEL_ADDRESS)
1660		panic("pmap_enter: toobig");
1661#endif
1662
1663	/*
1664	 * Find (or create) a pte for the given mapping.
1665	 */
1666	pte = pmap_find_pte(va);
1667	origpte = *pte;
1668
1669	if (origpte.pte_p)
1670		opa = pmap_pte_pa(&origpte);
1671	else
1672		opa = 0;
1673	managed = 0;
1674
1675	pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK;
1676
1677	/*
1678	 * Mapping has not changed, must be protection or wiring change.
1679	 */
1680	if (origpte.pte_p && (opa == pa)) {
1681		/*
1682		 * Wiring change, just update stats. We don't worry about
1683		 * wiring PT pages as they remain resident as long as there
1684		 * are valid mappings in them. Hence, if a user page is wired,
1685		 * the PT page will be also.
1686		 */
1687		if (wired && ((origpte.pte_ig & PTE_IG_WIRED) == 0))
1688			pmap->pm_stats.wired_count++;
1689		else if (!wired && (origpte.pte_ig & PTE_IG_WIRED))
1690			pmap->pm_stats.wired_count--;
1691
1692		/*
1693		 * We might be turning off write access to the page,
1694		 * so we go ahead and sense modify status.
1695		 */
1696		if (origpte.pte_ig & PTE_IG_MANAGED) {
1697			if (origpte.pte_d && pmap_track_modified(va)) {
1698				vm_page_t om;
1699				om = PHYS_TO_VM_PAGE(opa);
1700				vm_page_dirty(om);
1701			}
1702		}
1703
1704		managed = origpte.pte_ig & PTE_IG_MANAGED;
1705		goto validate;
1706	}
1707	/*
1708	 * Mapping has changed, invalidate old range and fall
1709	 * through to handle validating new mapping.
1710	 */
1711	if (opa) {
1712		int error;
1713		error = pmap_remove_pte(pmap, pte, va, 0, 0);
1714		if (error)
1715			panic("pmap_enter: pte vanished, va: 0x%lx", va);
1716	}
1717
1718	/*
1719	 * Enter on the PV list if part of our managed memory.
1720	 */
1721	if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
1722		pmap_insert_entry(pmap, va, m);
1723		managed |= PTE_IG_MANAGED;
1724	}
1725
1726	/*
1727	 * Increment counters
1728	 */
1729	pmap->pm_stats.resident_count++;
1730	if (wired)
1731		pmap->pm_stats.wired_count++;
1732
1733validate:
1734
1735	/*
1736	 * Now validate mapping with desired protection/wiring. This
1737	 * adds the pte to the VHPT if necessary.
1738	 */
1739	pmap_set_pte(pte, va, pa, managed | (wired ? PTE_IG_WIRED : 0),
1740		     pte_prot_pl(pmap, prot), pte_prot_ar(pmap, prot));
1741
1742	/*
1743	 * if the mapping or permission bits are different, we need
1744	 * to invalidate the page.
1745	 */
1746	if (!pmap_equal_pte(&origpte, pte))
1747		pmap_invalidate_page(pmap, va);
1748
1749	pmap_install(oldpmap);
1750}
1751
1752/*
1753 * this code makes some *MAJOR* assumptions:
1754 * 1. Current pmap & pmap exists.
1755 * 2. Not wired.
1756 * 3. Read access.
1757 * 4. No page table pages.
1758 * 5. Tlbflush is deferred to calling procedure.
1759 * 6. Page IS managed.
1760 * but is *MUCH* faster than pmap_enter...
1761 */
1762
1763static void
1764pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
1765{
1766	struct ia64_lpte *pte;
1767	pmap_t oldpmap;
1768
1769	pmap_ensure_rid(pmap, va);
1770
1771	oldpmap = pmap_install(pmap);
1772
1773	pte = pmap_find_pte(va);
1774	if (pte->pte_p)
1775		return;
1776
1777	/*
1778	 * Enter on the PV list since its part of our managed memory.
1779	 */
1780	pmap_insert_entry(pmap, va, m);
1781
1782	/*
1783	 * Increment counters
1784	 */
1785	pmap->pm_stats.resident_count++;
1786
1787	/*
1788	 * Initialise PTE with read-only protection and enter into VHPT.
1789	 */
1790	pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m),
1791		     PTE_IG_MANAGED,
1792		     PTE_PL_USER, PTE_AR_R);
1793
1794	pmap_install(oldpmap);
1795}
1796
1797/*
1798 * Make temporary mapping for a physical address. This is called
1799 * during dump.
1800 */
1801void *
1802pmap_kenter_temporary(vm_offset_t pa, int i)
1803{
1804	return (void *) IA64_PHYS_TO_RR7(pa - (i * PAGE_SIZE));
1805}
1806
1807#define MAX_INIT_PT (96)
1808/*
1809 * pmap_object_init_pt preloads the ptes for a given object
1810 * into the specified pmap.  This eliminates the blast of soft
1811 * faults on process startup and immediately after an mmap.
1812 */
1813void
1814pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1815		    vm_object_t object, vm_pindex_t pindex,
1816		    vm_size_t size, int limit)
1817{
1818	pmap_t oldpmap;
1819	vm_offset_t tmpidx;
1820	int psize;
1821	vm_page_t p;
1822	int objpgs;
1823
1824	if (pmap == NULL || object == NULL)
1825		return;
1826
1827	oldpmap = pmap_install(pmap);
1828
1829	psize = ia64_btop(size);
1830
1831	if ((object->type != OBJT_VNODE) ||
1832		((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
1833			(object->resident_page_count > MAX_INIT_PT))) {
1834		pmap_install(oldpmap);
1835		return;
1836	}
1837
1838	if (psize + pindex > object->size) {
1839		if (object->size < pindex)
1840			return;
1841		psize = object->size - pindex;
1842	}
1843
1844	/*
1845	 * if we are processing a major portion of the object, then scan the
1846	 * entire thing.
1847	 */
1848	if (psize > (object->resident_page_count >> 2)) {
1849		objpgs = psize;
1850
1851		for (p = TAILQ_FIRST(&object->memq);
1852		    ((objpgs > 0) && (p != NULL));
1853		    p = TAILQ_NEXT(p, listq)) {
1854
1855			tmpidx = p->pindex;
1856			if (tmpidx < pindex) {
1857				continue;
1858			}
1859			tmpidx -= pindex;
1860			if (tmpidx >= psize) {
1861				continue;
1862			}
1863			/*
1864			 * don't allow an madvise to blow away our really
1865			 * free pages allocating pv entries.
1866			 */
1867			if ((limit & MAP_PREFAULT_MADVISE) &&
1868			    cnt.v_free_count < cnt.v_free_reserved) {
1869				break;
1870			}
1871			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1872				(p->busy == 0) &&
1873			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1874				if ((p->queue - p->pc) == PQ_CACHE)
1875					vm_page_deactivate(p);
1876				vm_page_busy(p);
1877				pmap_enter_quick(pmap,
1878						 addr + ia64_ptob(tmpidx), p);
1879				vm_page_flag_set(p, PG_MAPPED);
1880				vm_page_wakeup(p);
1881			}
1882			objpgs -= 1;
1883		}
1884	} else {
1885		/*
1886		 * else lookup the pages one-by-one.
1887		 */
1888		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
1889			/*
1890			 * don't allow an madvise to blow away our really
1891			 * free pages allocating pv entries.
1892			 */
1893			if ((limit & MAP_PREFAULT_MADVISE) &&
1894			    cnt.v_free_count < cnt.v_free_reserved) {
1895				break;
1896			}
1897			p = vm_page_lookup(object, tmpidx + pindex);
1898			if (p &&
1899			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1900				(p->busy == 0) &&
1901			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1902				if ((p->queue - p->pc) == PQ_CACHE)
1903					vm_page_deactivate(p);
1904				vm_page_busy(p);
1905				pmap_enter_quick(pmap,
1906						 addr + ia64_ptob(tmpidx), p);
1907				vm_page_flag_set(p, PG_MAPPED);
1908				vm_page_wakeup(p);
1909			}
1910		}
1911	}
1912	pmap_install(oldpmap);
1913	return;
1914}
1915
1916/*
1917 * pmap_prefault provides a quick way of clustering
1918 * pagefaults into a processes address space.  It is a "cousin"
1919 * of pmap_object_init_pt, except it runs at page fault time instead
1920 * of mmap time.
1921 */
1922#define PFBAK 4
1923#define PFFOR 4
1924#define PAGEORDER_SIZE (PFBAK+PFFOR)
1925
1926static int pmap_prefault_pageorder[] = {
1927	-PAGE_SIZE, PAGE_SIZE,
1928	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
1929	-3 * PAGE_SIZE, 3 * PAGE_SIZE
1930	-4 * PAGE_SIZE, 4 * PAGE_SIZE
1931};
1932
1933void
1934pmap_prefault(pmap, addra, entry)
1935	pmap_t pmap;
1936	vm_offset_t addra;
1937	vm_map_entry_t entry;
1938{
1939	int i;
1940	vm_offset_t starta;
1941	vm_offset_t addr;
1942	vm_pindex_t pindex;
1943	vm_page_t m, mpte;
1944	vm_object_t object;
1945
1946	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)))
1947		return;
1948
1949	object = entry->object.vm_object;
1950
1951	starta = addra - PFBAK * PAGE_SIZE;
1952	if (starta < entry->start) {
1953		starta = entry->start;
1954	} else if (starta > addra) {
1955		starta = 0;
1956	}
1957
1958	mpte = NULL;
1959	for (i = 0; i < PAGEORDER_SIZE; i++) {
1960		vm_object_t lobject;
1961		struct ia64_lpte *pte;
1962
1963		addr = addra + pmap_prefault_pageorder[i];
1964		if (addr > addra + (PFFOR * PAGE_SIZE))
1965			addr = 0;
1966
1967		if (addr < starta || addr >= entry->end)
1968			continue;
1969
1970		pte = pmap_find_vhpt(addr);
1971		if (pte && pte->pte_p)
1972			continue;
1973
1974		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
1975		lobject = object;
1976		for (m = vm_page_lookup(lobject, pindex);
1977		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
1978		    lobject = lobject->backing_object) {
1979			if (lobject->backing_object_offset & PAGE_MASK)
1980				break;
1981			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
1982			m = vm_page_lookup(lobject->backing_object, pindex);
1983		}
1984
1985		/*
1986		 * give-up when a page is not in memory
1987		 */
1988		if (m == NULL)
1989			break;
1990
1991		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1992			(m->busy == 0) &&
1993		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1994
1995			if ((m->queue - m->pc) == PQ_CACHE) {
1996				vm_page_deactivate(m);
1997			}
1998			vm_page_busy(m);
1999			pmap_enter_quick(pmap, addr, m);
2000			vm_page_flag_set(m, PG_MAPPED);
2001			vm_page_wakeup(m);
2002		}
2003	}
2004}
2005
2006/*
2007 *	Routine:	pmap_change_wiring
2008 *	Function:	Change the wiring attribute for a map/virtual-address
2009 *			pair.
2010 *	In/out conditions:
2011 *			The mapping must already exist in the pmap.
2012 */
2013void
2014pmap_change_wiring(pmap, va, wired)
2015	register pmap_t pmap;
2016	vm_offset_t va;
2017	boolean_t wired;
2018{
2019	pmap_t oldpmap;
2020	struct ia64_lpte *pte;
2021
2022	if (pmap == NULL)
2023		return;
2024
2025	oldpmap = pmap_install(pmap);
2026
2027	pte = pmap_find_vhpt(va);
2028
2029	if (wired && !pmap_pte_w(pte))
2030		pmap->pm_stats.wired_count++;
2031	else if (!wired && pmap_pte_w(pte))
2032		pmap->pm_stats.wired_count--;
2033
2034	/*
2035	 * Wiring is not a hardware characteristic so there is no need to
2036	 * invalidate TLB.
2037	 */
2038	pmap_pte_set_w(pte, wired);
2039
2040	pmap_install(oldpmap);
2041}
2042
2043
2044
2045/*
2046 *	Copy the range specified by src_addr/len
2047 *	from the source map to the range dst_addr/len
2048 *	in the destination map.
2049 *
2050 *	This routine is only advisory and need not do anything.
2051 */
2052
2053void
2054pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2055	  vm_offset_t src_addr)
2056{
2057}
2058
2059
2060/*
2061 *	pmap_zero_page zeros the specified hardware page by
2062 *	mapping it into virtual memory and using bzero to clear
2063 *	its contents.
2064 */
2065
2066void
2067pmap_zero_page(vm_page_t m)
2068{
2069	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2070	bzero((caddr_t) va, PAGE_SIZE);
2071}
2072
2073
2074/*
2075 *	pmap_zero_page_area zeros the specified hardware page by
2076 *	mapping it into virtual memory and using bzero to clear
2077 *	its contents.
2078 *
2079 *	off and size must reside within a single page.
2080 */
2081
2082void
2083pmap_zero_page_area(vm_page_t m, int off, int size)
2084{
2085	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2086	bzero((char *)(caddr_t)va + off, size);
2087}
2088
2089/*
2090 *	pmap_copy_page copies the specified (machine independent)
2091 *	page by mapping the page into virtual memory and using
2092 *	bcopy to copy the page, one machine dependent page at a
2093 *	time.
2094 */
2095void
2096pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2097{
2098	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
2099	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
2100	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
2101}
2102
2103
2104/*
2105 *	Routine:	pmap_pageable
2106 *	Function:
2107 *		Make the specified pages (by pmap, offset)
2108 *		pageable (or not) as requested.
2109 *
2110 *		A page which is not pageable may not take
2111 *		a fault; therefore, its page table entry
2112 *		must remain valid for the duration.
2113 *
2114 *		This routine is merely advisory; pmap_enter
2115 *		will specify that these pages are to be wired
2116 *		down (or not) as appropriate.
2117 */
2118void
2119pmap_pageable(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
2120	      boolean_t pageable)
2121{
2122}
2123
2124/*
2125 * Returns true if the pmap's pv is one of the first
2126 * 16 pvs linked to from this page.  This count may
2127 * be changed upwards or downwards in the future; it
2128 * is only necessary that true be returned for a small
2129 * subset of pmaps for proper page aging.
2130 */
2131boolean_t
2132pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2133{
2134	pv_entry_t pv;
2135	int loops = 0;
2136	int s;
2137
2138	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2139		return FALSE;
2140
2141	s = splvm();
2142
2143	/*
2144	 * Not found, check current mappings returning immediately if found.
2145	 */
2146	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2147		if (pv->pv_pmap == pmap) {
2148			splx(s);
2149			return TRUE;
2150		}
2151		loops++;
2152		if (loops >= 16)
2153			break;
2154	}
2155	splx(s);
2156	return (FALSE);
2157}
2158
2159#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2160/*
2161 * Remove all pages from specified address space
2162 * this aids process exit speeds.  Also, this code
2163 * is special cased for current process only, but
2164 * can have the more generic (and slightly slower)
2165 * mode enabled.  This is much faster than pmap_remove
2166 * in the case of running down an entire address space.
2167 */
2168void
2169pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2170{
2171	pv_entry_t pv, npv;
2172	int s;
2173
2174#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2175	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
2176		printf("warning: pmap_remove_pages called with non-current pmap\n");
2177		return;
2178	}
2179#endif
2180
2181	s = splvm();
2182	for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
2183		pv;
2184		pv = npv) {
2185		struct ia64_lpte *pte;
2186
2187		npv = TAILQ_NEXT(pv, pv_plist);
2188
2189		if (pv->pv_va >= eva || pv->pv_va < sva) {
2190			continue;
2191		}
2192
2193		pte = pmap_find_vhpt(pv->pv_va);
2194		if (!pte)
2195			panic("pmap_remove_pages: page on pm_pvlist has no pte\n");
2196
2197
2198/*
2199 * We cannot remove wired pages from a process' mapping at this time
2200 */
2201		if (pte->pte_ig & PTE_IG_WIRED) {
2202			continue;
2203		}
2204
2205		pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
2206	}
2207	splx(s);
2208
2209	pmap_invalidate_all(pmap);
2210}
2211
2212/*
2213 *      pmap_page_protect:
2214 *
2215 *      Lower the permission for all mappings to a given page.
2216 */
2217void
2218pmap_page_protect(vm_page_t m, vm_prot_t prot)
2219{
2220	pv_entry_t pv;
2221
2222	if ((prot & VM_PROT_WRITE) != 0)
2223		return;
2224	if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
2225		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2226			int newprot = pte_prot(pv->pv_pmap, prot);
2227			pmap_t oldpmap = pmap_install(pv->pv_pmap);
2228			struct ia64_lpte *pte;
2229			pte = pmap_find_vhpt(pv->pv_va);
2230			pmap_pte_set_prot(pte, newprot);
2231			pmap_update_vhpt(pte, pv->pv_va);
2232			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2233			pmap_install(oldpmap);
2234		}
2235	} else {
2236		pmap_remove_all(m);
2237	}
2238}
2239
2240vm_offset_t
2241pmap_phys_address(int ppn)
2242{
2243	return (ia64_ptob(ppn));
2244}
2245
2246/*
2247 *	pmap_ts_referenced:
2248 *
2249 *	Return a count of reference bits for a page, clearing those bits.
2250 *	It is not necessary for every reference bit to be cleared, but it
2251 *	is necessary that 0 only be returned when there are truly no
2252 *	reference bits set.
2253 *
2254 *	XXX: The exact number of bits to check and clear is a matter that
2255 *	should be tested and standardized at some point in the future for
2256 *	optimal aging of shared pages.
2257 */
2258int
2259pmap_ts_referenced(vm_page_t m)
2260{
2261	pv_entry_t pv;
2262	int count = 0;
2263
2264	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2265		return 0;
2266
2267	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2268		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2269		struct ia64_lpte *pte;
2270		pte = pmap_find_vhpt(pv->pv_va);
2271		if (pte->pte_a) {
2272			count++;
2273			pte->pte_a = 0;
2274			pmap_update_vhpt(pte, pv->pv_va);
2275			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2276		}
2277		pmap_install(oldpmap);
2278	}
2279
2280	return count;
2281}
2282
2283#if 0
2284/*
2285 *	pmap_is_referenced:
2286 *
2287 *	Return whether or not the specified physical page was referenced
2288 *	in any physical maps.
2289 */
2290static boolean_t
2291pmap_is_referenced(vm_page_t m)
2292{
2293	pv_entry_t pv;
2294
2295	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2296		return FALSE;
2297
2298	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2299		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2300		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2301		pmap_install(oldpmap);
2302		if (pte->pte_a)
2303			return 1;
2304	}
2305
2306	return 0;
2307}
2308#endif
2309
2310/*
2311 *	pmap_is_modified:
2312 *
2313 *	Return whether or not the specified physical page was modified
2314 *	in any physical maps.
2315 */
2316boolean_t
2317pmap_is_modified(vm_page_t m)
2318{
2319	pv_entry_t pv;
2320
2321	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2322		return FALSE;
2323
2324	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2325		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2326		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2327		pmap_install(oldpmap);
2328		if (pte->pte_d)
2329			return 1;
2330	}
2331
2332	return 0;
2333}
2334
2335/*
2336 *	Clear the modify bits on the specified physical page.
2337 */
2338void
2339pmap_clear_modify(vm_page_t m)
2340{
2341	pv_entry_t pv;
2342
2343	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2344		return;
2345
2346	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2347		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2348		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2349		if (pte->pte_d) {
2350			pte->pte_d = 0;
2351			pmap_update_vhpt(pte, pv->pv_va);
2352			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2353		}
2354		pmap_install(oldpmap);
2355	}
2356}
2357
2358/*
2359 *	pmap_clear_reference:
2360 *
2361 *	Clear the reference bit on the specified physical page.
2362 */
2363void
2364pmap_clear_reference(vm_page_t m)
2365{
2366	pv_entry_t pv;
2367
2368	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2369		return;
2370
2371	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2372		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2373		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2374		if (pte->pte_a) {
2375			pte->pte_a = 0;
2376			pmap_update_vhpt(pte, pv->pv_va);
2377			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2378		}
2379		pmap_install(oldpmap);
2380	}
2381}
2382
2383/*
2384 * Miscellaneous support routines follow
2385 */
2386
2387static void
2388ia64_protection_init()
2389{
2390	int prot, *kp, *up;
2391
2392	kp = protection_codes[0];
2393	up = protection_codes[1];
2394
2395	for (prot = 0; prot < 8; prot++) {
2396		switch (prot) {
2397		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
2398			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2399			*up++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2400			break;
2401
2402		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
2403			*kp++ = (PTE_AR_X_RX << 2) | PTE_PL_KERN;
2404			*up++ = (PTE_AR_X_RX << 2) | PTE_PL_USER;
2405			break;
2406
2407		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
2408			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2409			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2410			break;
2411
2412		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
2413			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2414			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2415			break;
2416
2417		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
2418			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2419			*up++ = (PTE_AR_R << 2) | PTE_PL_USER;
2420			break;
2421
2422		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
2423			*kp++ = (PTE_AR_RX << 2) | PTE_PL_KERN;
2424			*up++ = (PTE_AR_RX << 2) | PTE_PL_USER;
2425			break;
2426
2427		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
2428			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2429			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2430			break;
2431
2432		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
2433			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2434			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2435			break;
2436		}
2437	}
2438}
2439
2440/*
2441 * Map a set of physical memory pages into the kernel virtual
2442 * address space. Return a pointer to where it is mapped. This
2443 * routine is intended to be used for mapping device memory,
2444 * NOT real memory.
2445 */
2446void *
2447pmap_mapdev(vm_offset_t pa, vm_size_t size)
2448{
2449	return (void*) IA64_PHYS_TO_RR6(pa);
2450}
2451
2452/*
2453 * 'Unmap' a range mapped by pmap_mapdev().
2454 */
2455void
2456pmap_unmapdev(vm_offset_t va, vm_size_t size)
2457{
2458	return;
2459}
2460
2461/*
2462 * perform the pmap work for mincore
2463 */
2464int
2465pmap_mincore(pmap_t pmap, vm_offset_t addr)
2466{
2467	pmap_t oldpmap;
2468	struct ia64_lpte *pte;
2469	int val = 0;
2470
2471	oldpmap = pmap_install(pmap);
2472	pte = pmap_find_vhpt(addr);
2473	pmap_install(oldpmap);
2474
2475	if (!pte)
2476		return 0;
2477
2478	if (pmap_pte_v(pte)) {
2479		vm_page_t m;
2480		vm_offset_t pa;
2481
2482		val = MINCORE_INCORE;
2483		if ((pte->pte_ig & PTE_IG_MANAGED) == 0)
2484			return val;
2485
2486		pa = pmap_pte_pa(pte);
2487
2488		m = PHYS_TO_VM_PAGE(pa);
2489
2490		/*
2491		 * Modified by us
2492		 */
2493		if (pte->pte_d)
2494			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2495		/*
2496		 * Modified by someone
2497		 */
2498		else if (pmap_is_modified(m))
2499			val |= MINCORE_MODIFIED_OTHER;
2500		/*
2501		 * Referenced by us
2502		 */
2503		if (pte->pte_a)
2504			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2505
2506		/*
2507		 * Referenced by someone
2508		 */
2509		else if (pmap_ts_referenced(m)) {
2510			val |= MINCORE_REFERENCED_OTHER;
2511			vm_page_flag_set(m, PG_REFERENCED);
2512		}
2513	}
2514	return val;
2515}
2516
2517void
2518pmap_activate(struct thread *td)
2519{
2520	pmap_install(vmspace_pmap(td->td_proc->p_vmspace));
2521}
2522
2523pmap_t
2524pmap_install(pmap_t pmap)
2525{
2526	pmap_t oldpmap;
2527	int i;
2528
2529	critical_enter();
2530
2531	oldpmap = PCPU_GET(current_pmap);
2532
2533	if (pmap == oldpmap || pmap == kernel_pmap) {
2534		critical_exit();
2535		return pmap;
2536	}
2537
2538	if (oldpmap) {
2539		atomic_clear_32(&pmap->pm_active, PCPU_GET(cpumask));
2540	}
2541
2542	PCPU_SET(current_pmap, pmap);
2543	if (!pmap) {
2544		/*
2545		 * RIDs 0..4 have no mappings to make sure we generate
2546		 * page faults on accesses.
2547		 */
2548		ia64_set_rr(IA64_RR_BASE(0), (0 << 8)|(PAGE_SHIFT << 2)|1);
2549		ia64_set_rr(IA64_RR_BASE(1), (1 << 8)|(PAGE_SHIFT << 2)|1);
2550		ia64_set_rr(IA64_RR_BASE(2), (2 << 8)|(PAGE_SHIFT << 2)|1);
2551		ia64_set_rr(IA64_RR_BASE(3), (3 << 8)|(PAGE_SHIFT << 2)|1);
2552		ia64_set_rr(IA64_RR_BASE(4), (4 << 8)|(PAGE_SHIFT << 2)|1);
2553		critical_exit();
2554		return oldpmap;
2555	}
2556
2557	atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask));
2558
2559	for (i = 0; i < 5; i++)
2560		ia64_set_rr(IA64_RR_BASE(i),
2561			    (pmap->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2562
2563	critical_exit();
2564	return oldpmap;
2565}
2566
2567vm_offset_t
2568pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2569{
2570
2571	return addr;
2572}
2573
2574#include "opt_ddb.h"
2575
2576#ifdef DDB
2577
2578#include <ddb/ddb.h>
2579
2580static const char*	psnames[] = {
2581	"1B",	"2B",	"4B",	"8B",
2582	"16B",	"32B",	"64B",	"128B",
2583	"256B",	"512B",	"1K",	"2K",
2584	"4K",	"8K",	"16K",	"32K",
2585	"64K",	"128K",	"256K",	"512K",
2586	"1M",	"2M",	"4M",	"8M",
2587	"16M",	"32M",	"64M",	"128M",
2588	"256M",	"512M",	"1G",	"2G"
2589};
2590
2591static void
2592print_trs(int type)
2593{
2594	struct ia64_pal_result	res;
2595	int			i, maxtr;
2596	struct {
2597		struct ia64_pte	pte;
2598		struct ia64_itir itir;
2599		struct ia64_ifa ifa;
2600		struct ia64_rr	rr;
2601	}			buf;
2602	static const char*	manames[] = {
2603		"WB",	"bad",	"bad",	"bad",
2604		"UC",	"UCE",	"WC",	"NaT",
2605
2606	};
2607
2608	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2609	if (res.pal_status != 0) {
2610		db_printf("Can't get VM summary\n");
2611		return;
2612	}
2613
2614	if (type == 0)
2615		maxtr = (res.pal_result[0] >> 40) & 0xff;
2616	else
2617		maxtr = (res.pal_result[0] >> 32) & 0xff;
2618
2619	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2620	for (i = 0; i <= maxtr; i++) {
2621		bzero(&buf, sizeof(buf));
2622		res = ia64_call_pal_stacked_physical
2623			(PAL_VM_TR_READ, i, type, ia64_tpa((u_int64_t) &buf));
2624		if (!(res.pal_result[0] & 1))
2625			buf.pte.pte_ar = 0;
2626		if (!(res.pal_result[0] & 2))
2627			buf.pte.pte_pl = 0;
2628		if (!(res.pal_result[0] & 4))
2629			buf.pte.pte_d = 0;
2630		if (!(res.pal_result[0] & 8))
2631			buf.pte.pte_ma = 0;
2632		db_printf(
2633			"%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s %d %06x\n",
2634			buf.ifa.ifa_ig & 1,
2635			buf.rr.rr_rid,
2636			buf.ifa.ifa_vpn,
2637			buf.pte.pte_ppn,
2638			psnames[buf.itir.itir_ps],
2639			buf.pte.pte_ed,
2640			buf.pte.pte_ar,
2641			buf.pte.pte_pl,
2642			buf.pte.pte_d,
2643			buf.pte.pte_a,
2644			manames[buf.pte.pte_ma],
2645			buf.pte.pte_p,
2646			buf.itir.itir_key);
2647	}
2648}
2649
2650DB_COMMAND(itr, db_itr)
2651{
2652	print_trs(0);
2653}
2654
2655DB_COMMAND(dtr, db_dtr)
2656{
2657	print_trs(1);
2658}
2659
2660DB_COMMAND(rr, db_rr)
2661{
2662	int i;
2663	u_int64_t t;
2664	struct ia64_rr rr;
2665
2666	printf("RR RID    PgSz VE\n");
2667	for (i = 0; i < 8; i++) {
2668		__asm __volatile ("mov %0=rr[%1]"
2669				  : "=r"(t)
2670				  : "r"(IA64_RR_BASE(i)));
2671		*(u_int64_t *) &rr = t;
2672		printf("%d  %06x %4s %d\n",
2673		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2674	}
2675}
2676
2677DB_COMMAND(thash, db_thash)
2678{
2679	if (!have_addr)
2680		return;
2681
2682	db_printf("%p\n", (void *) ia64_thash(addr));
2683}
2684
2685DB_COMMAND(ttag, db_ttag)
2686{
2687	if (!have_addr)
2688		return;
2689
2690	db_printf("0x%lx\n", ia64_ttag(addr));
2691}
2692
2693#endif
2694