pmap.c revision 126728
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 */
47
48#include <sys/cdefs.h>
49__FBSDID("$FreeBSD: head/sys/ia64/ia64/pmap.c 126728 2004-03-07 21:06:48Z alc $");
50
51#include <sys/param.h>
52#include <sys/kernel.h>
53#include <sys/lock.h>
54#include <sys/mman.h>
55#include <sys/mutex.h>
56#include <sys/proc.h>
57#include <sys/smp.h>
58#include <sys/sysctl.h>
59#include <sys/systm.h>
60
61#include <vm/vm.h>
62#include <vm/vm_page.h>
63#include <vm/vm_map.h>
64#include <vm/vm_object.h>
65#include <vm/vm_pageout.h>
66#include <vm/uma.h>
67
68#include <machine/md_var.h>
69#include <machine/pal.h>
70
71/*
72 *	Manages physical address maps.
73 *
74 *	In addition to hardware address maps, this
75 *	module is called upon to provide software-use-only
76 *	maps which may or may not be stored in the same
77 *	form as hardware maps.  These pseudo-maps are
78 *	used to store intermediate results from copy
79 *	operations to and from address spaces.
80 *
81 *	Since the information managed by this module is
82 *	also stored by the logical address mapping module,
83 *	this module may throw away valid virtual-to-physical
84 *	mappings at almost any time.  However, invalidations
85 *	of virtual-to-physical mappings must be done as
86 *	requested.
87 *
88 *	In order to cope with hardware architectures which
89 *	make virtual-to-physical map invalidates expensive,
90 *	this module may delay invalidate or reduced protection
91 *	operations until such time as they are actually
92 *	necessary.  This module is given full information as
93 *	to which processors are currently using which maps,
94 *	and to when physical maps must be made correct.
95 */
96
97/*
98 * Following the Linux model, region IDs are allocated in groups of
99 * eight so that a single region ID can be used for as many RRs as we
100 * want by encoding the RR number into the low bits of the ID.
101 *
102 * We reserve region ID 0 for the kernel and allocate the remaining
103 * IDs for user pmaps.
104 *
105 * Region 0..4
106 *	User virtually mapped
107 *
108 * Region 5
109 *	Kernel virtually mapped
110 *
111 * Region 6
112 *	Kernel physically mapped uncacheable
113 *
114 * Region 7
115 *	Kernel physically mapped cacheable
116 */
117
118/* XXX move to a header. */
119extern u_int64_t ia64_gateway_page[];
120
121MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
122
123#ifndef PMAP_SHPGPERPROC
124#define PMAP_SHPGPERPROC 200
125#endif
126
127#if defined(DIAGNOSTIC)
128#define PMAP_DIAGNOSTIC
129#endif
130
131#define MINPV 2048	/* Preallocate at least this many */
132#define MAXPV 20480	/* But no more than this */
133
134#if 0
135#define PMAP_DIAGNOSTIC
136#define PMAP_DEBUG
137#endif
138
139#if !defined(PMAP_DIAGNOSTIC)
140#define PMAP_INLINE __inline
141#else
142#define PMAP_INLINE
143#endif
144
145/*
146 * Get PDEs and PTEs for user/kernel address space
147 */
148#define pmap_pte_w(pte)		((pte)->pte_ig & PTE_IG_WIRED)
149#define pmap_pte_managed(pte)	((pte)->pte_ig & PTE_IG_MANAGED)
150#define pmap_pte_v(pte)		((pte)->pte_p)
151#define pmap_pte_pa(pte)	(((pte)->pte_ppn) << 12)
152#define pmap_pte_prot(pte)	(((pte)->pte_ar << 2) | (pte)->pte_pl)
153
154#define pmap_pte_set_w(pte, v) ((v)?((pte)->pte_ig |= PTE_IG_WIRED) \
155				:((pte)->pte_ig &= ~PTE_IG_WIRED))
156#define pmap_pte_set_prot(pte, v) do {		\
157    (pte)->pte_ar = v >> 2;			\
158    (pte)->pte_pl = v & 3;			\
159} while (0)
160
161/*
162 * Given a map and a machine independent protection code,
163 * convert to an ia64 protection code.
164 */
165#define pte_prot(m, p)		(protection_codes[m == kernel_pmap ? 0 : 1][p])
166#define pte_prot_pl(m, p)	(pte_prot(m, p) & 3)
167#define pte_prot_ar(m, p)	(pte_prot(m, p) >> 2)
168int	protection_codes[2][8];
169
170/*
171 * Return non-zero if this pmap is currently active
172 */
173#define pmap_isactive(pmap)	(pmap->pm_active)
174
175/*
176 * Statically allocated kernel pmap
177 */
178struct pmap kernel_pmap_store;
179
180vm_offset_t avail_start;	/* PA of first available physical page */
181vm_offset_t avail_end;		/* PA of last available physical page */
182vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
183vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
184static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
185
186vm_offset_t vhpt_base, vhpt_size;
187struct mtx pmap_vhptmutex;
188
189/*
190 * We use an object to own the kernel's 'page tables'. For simplicity,
191 * we use one page directory to index a set of pages containing
192 * ia64_lptes. This gives us up to 2Gb of kernel virtual space.
193 */
194static int nkpt;
195struct ia64_lpte **ia64_kptdir;
196#define KPTE_DIR_INDEX(va) \
197	((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
198#define KPTE_PTE_INDEX(va) \
199	((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
200#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
201
202vm_offset_t kernel_vm_end;
203
204/* Values for ptc.e. XXX values for SKI. */
205static u_int64_t pmap_ptc_e_base = 0x100000000;
206static u_int64_t pmap_ptc_e_count1 = 3;
207static u_int64_t pmap_ptc_e_count2 = 2;
208static u_int64_t pmap_ptc_e_stride1 = 0x2000;
209static u_int64_t pmap_ptc_e_stride2 = 0x100000000;
210
211/*
212 * Data for the RID allocator
213 */
214static int pmap_ridcount;
215static int pmap_rididx;
216static int pmap_ridmapsz;
217static int pmap_ridmax;
218static u_int64_t *pmap_ridmap;
219struct mtx pmap_ridmutex;
220
221/*
222 * Data for the pv entry allocation mechanism
223 */
224static uma_zone_t pvzone;
225static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
226int pmap_pagedaemon_waken;
227static struct pv_entry *pvbootentries;
228static int pvbootnext, pvbootmax;
229
230/*
231 * Data for allocating PTEs for user processes.
232 */
233static uma_zone_t ptezone;
234
235/*
236 * VHPT instrumentation.
237 */
238static int pmap_vhpt_inserts;
239static int pmap_vhpt_collisions;
240static int pmap_vhpt_resident;
241SYSCTL_DECL(_vm_stats);
242SYSCTL_NODE(_vm_stats, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
243SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
244	   &pmap_vhpt_inserts, 0, "");
245SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, collisions, CTLFLAG_RD,
246	   &pmap_vhpt_collisions, 0, "");
247SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, resident, CTLFLAG_RD,
248	   &pmap_vhpt_resident, 0, "");
249
250static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
251static pv_entry_t get_pv_entry(void);
252static void	ia64_protection_init(void);
253
254static pmap_t	pmap_install(pmap_t);
255static void	pmap_invalidate_all(pmap_t pmap);
256
257vm_offset_t
258pmap_steal_memory(vm_size_t size)
259{
260	vm_size_t bank_size;
261	vm_offset_t pa, va;
262
263	size = round_page(size);
264
265	bank_size = phys_avail[1] - phys_avail[0];
266	while (size > bank_size) {
267		int i;
268		for (i = 0; phys_avail[i+2]; i+= 2) {
269			phys_avail[i] = phys_avail[i+2];
270			phys_avail[i+1] = phys_avail[i+3];
271		}
272		phys_avail[i] = 0;
273		phys_avail[i+1] = 0;
274		if (!phys_avail[0])
275			panic("pmap_steal_memory: out of memory");
276		bank_size = phys_avail[1] - phys_avail[0];
277	}
278
279	pa = phys_avail[0];
280	phys_avail[0] += size;
281
282	va = IA64_PHYS_TO_RR7(pa);
283	bzero((caddr_t) va, size);
284	return va;
285}
286
287/*
288 *	Bootstrap the system enough to run with virtual memory.
289 */
290void
291pmap_bootstrap()
292{
293	int i, j, count, ridbits;
294	struct ia64_pal_result res;
295
296	/*
297	 * Query the PAL Code to find the loop parameters for the
298	 * ptc.e instruction.
299	 */
300	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
301	if (res.pal_status != 0)
302		panic("Can't configure ptc.e parameters");
303	pmap_ptc_e_base = res.pal_result[0];
304	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
305	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
306	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
307	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
308	if (bootverbose)
309		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
310		       "stride1=0x%lx, stride2=0x%lx\n",
311		       pmap_ptc_e_base,
312		       pmap_ptc_e_count1,
313		       pmap_ptc_e_count2,
314		       pmap_ptc_e_stride1,
315		       pmap_ptc_e_stride2);
316
317	/*
318	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
319	 *
320	 * We currently need at least 19 bits in the RID because PID_MAX
321	 * can only be encoded in 17 bits and we need RIDs for 5 regions
322	 * per process. With PID_MAX equalling 99999 this means that we
323	 * need to be able to encode 499995 (=5*PID_MAX).
324	 * The Itanium processor only has 18 bits and the architected
325	 * minimum is exactly that. So, we cannot use a PID based scheme
326	 * in those cases. Enter pmap_ridmap...
327	 * We should avoid the map when running on a processor that has
328	 * implemented enough bits. This means that we should pass the
329	 * process/thread ID to pmap. This we currently don't do, so we
330	 * use the map anyway. However, we don't want to allocate a map
331	 * that is large enough to cover the range dictated by the number
332	 * of bits in the RID, because that may result in a RID map of
333	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
334	 * The bottomline: we create a 32KB map when the processor only
335	 * implements 18 bits (or when we can't figure it out). Otherwise
336	 * we create a 64KB map.
337	 */
338	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
339	if (res.pal_status != 0) {
340		if (bootverbose)
341			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
342		ridbits = 18; /* guaranteed minimum */
343	} else {
344		ridbits = (res.pal_result[1] >> 8) & 0xff;
345		if (bootverbose)
346			printf("Processor supports %d Region ID bits\n",
347			    ridbits);
348	}
349	if (ridbits > 19)
350		ridbits = 19;
351
352	pmap_ridmax = (1 << ridbits);
353	pmap_ridmapsz = pmap_ridmax / 64;
354	pmap_ridmap = (u_int64_t *)pmap_steal_memory(pmap_ridmax / 8);
355	pmap_ridmap[0] |= 0xff;
356	pmap_rididx = 0;
357	pmap_ridcount = 8;
358	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
359
360	/*
361	 * Allocate some memory for initial kernel 'page tables'.
362	 */
363	ia64_kptdir = (void *)pmap_steal_memory(PAGE_SIZE);
364	for (i = 0; i < NKPT; i++) {
365		ia64_kptdir[i] = (void*)pmap_steal_memory(PAGE_SIZE);
366	}
367	nkpt = NKPT;
368	kernel_vm_end = NKPT * PAGE_SIZE * NKPTEPG + VM_MIN_KERNEL_ADDRESS -
369	    VM_GATEWAY_SIZE;
370
371	avail_start = phys_avail[0];
372	for (i = 0; phys_avail[i+2]; i+= 2) ;
373	avail_end = phys_avail[i+1];
374	count = i+2;
375
376	/*
377	 * Figure out a useful size for the VHPT, based on the size of
378	 * physical memory and try to locate a region which is large
379	 * enough to contain the VHPT (which must be a power of two in
380	 * size and aligned to a natural boundary).
381	 * Don't use the difference between avail_start and avail_end
382	 * as a measure for memory size. The address space is often
383	 * enough sparse, causing us to (try to) create a huge VHPT.
384	 */
385	vhpt_size = 15;
386	while ((1<<vhpt_size) < Maxmem * 32)
387		vhpt_size++;
388
389	vhpt_base = 0;
390	while (!vhpt_base) {
391		vm_offset_t mask;
392		if (bootverbose)
393			printf("Trying VHPT size 0x%lx\n", (1L<<vhpt_size));
394		mask = (1L << vhpt_size) - 1;
395		for (i = 0; i < count; i += 2) {
396			vm_offset_t base, limit;
397			base = (phys_avail[i] + mask) & ~mask;
398			limit = base + (1L << vhpt_size);
399			if (limit <= phys_avail[i+1])
400				/*
401				 * VHPT can fit in this region
402				 */
403				break;
404		}
405		if (!phys_avail[i]) {
406			/*
407			 * Can't fit, try next smaller size.
408			 */
409			vhpt_size--;
410		} else {
411			vhpt_base = (phys_avail[i] + mask) & ~mask;
412		}
413	}
414	if (vhpt_size < 15)
415		panic("Can't find space for VHPT");
416
417	if (bootverbose)
418		printf("Putting VHPT at %p\n", (void *) vhpt_base);
419	if (vhpt_base != phys_avail[i]) {
420		/*
421		 * Split this region.
422		 */
423		if (bootverbose)
424			printf("Splitting [%p-%p]\n",
425			       (void *) phys_avail[i],
426			       (void *) phys_avail[i+1]);
427		for (j = count; j > i; j -= 2) {
428			phys_avail[j] = phys_avail[j-2];
429			phys_avail[j+1] = phys_avail[j-2+1];
430		}
431		phys_avail[count+2] = 0;
432		phys_avail[count+3] = 0;
433		phys_avail[i+1] = vhpt_base;
434		phys_avail[i+2] = vhpt_base + (1L << vhpt_size);
435	} else {
436		phys_avail[i] = vhpt_base + (1L << vhpt_size);
437	}
438
439	vhpt_base = IA64_PHYS_TO_RR7(vhpt_base);
440	bzero((void *) vhpt_base, (1L << vhpt_size));
441
442	mtx_init(&pmap_vhptmutex, "VHPT collision chain lock", NULL, MTX_DEF);
443
444	__asm __volatile("mov cr.pta=%0;; srlz.i;;"
445			 :: "r" (vhpt_base + (1<<8) + (vhpt_size<<2) + 1));
446
447	virtual_avail = VM_MIN_KERNEL_ADDRESS;
448	virtual_end = VM_MAX_KERNEL_ADDRESS;
449
450	/*
451	 * Initialize protection array.
452	 */
453	ia64_protection_init();
454
455	/*
456	 * Initialize the kernel pmap (which is statically allocated).
457	 */
458	for (i = 0; i < 5; i++)
459		kernel_pmap->pm_rid[i] = 0;
460	kernel_pmap->pm_active = 1;
461	TAILQ_INIT(&kernel_pmap->pm_pvlist);
462	PCPU_SET(current_pmap, kernel_pmap);
463
464	/*
465	 * Region 5 is mapped via the vhpt.
466	 */
467	ia64_set_rr(IA64_RR_BASE(5),
468		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
469
470	/*
471	 * Region 6 is direct mapped UC and region 7 is direct mapped
472	 * WC. The details of this is controlled by the Alt {I,D}TLB
473	 * handlers. Here we just make sure that they have the largest
474	 * possible page size to minimise TLB usage.
475	 */
476	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (IA64_ID_PAGE_SHIFT << 2));
477	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (IA64_ID_PAGE_SHIFT << 2));
478
479	/*
480	 * Reserve some memory for allocating pvs while bootstrapping
481	 * the pv allocator. We need to have enough to cover mapping
482	 * the kmem_alloc region used to allocate the initial_pvs in
483	 * pmap_init. In general, the size of this region is
484	 * approximately (# physical pages) * (size of pv entry).
485	 */
486	pvbootmax = ((physmem * sizeof(struct pv_entry)) >> PAGE_SHIFT) + 128;
487	pvbootentries = (struct pv_entry *)
488		pmap_steal_memory(pvbootmax * sizeof(struct pv_entry));
489	pvbootnext = 0;
490
491	/*
492	 * Clear out any random TLB entries left over from booting.
493	 */
494	pmap_invalidate_all(kernel_pmap);
495
496	map_gateway_page();
497}
498
499/*
500 *	Initialize the pmap module.
501 *	Called by vm_init, to initialize any structures that the pmap
502 *	system needs to map virtual memory.
503 *	pmap_init has been enhanced to support in a fairly consistant
504 *	way, discontiguous physical memory.
505 */
506void
507pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
508{
509	int i;
510	int initial_pvs;
511
512	/*
513	 * Allocate memory for random pmap data structures.  Includes the
514	 * pv_head_table.
515	 */
516
517	for(i = 0; i < vm_page_array_size; i++) {
518		vm_page_t m;
519
520		m = &vm_page_array[i];
521		TAILQ_INIT(&m->md.pv_list);
522		m->md.pv_list_count = 0;
523 	}
524
525	/*
526	 * Init the pv free list and the PTE free list.
527	 */
528	initial_pvs = vm_page_array_size;
529	if (initial_pvs < MINPV)
530		initial_pvs = MINPV;
531	if (initial_pvs > MAXPV)
532		initial_pvs = MAXPV;
533	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry),
534	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
535	uma_prealloc(pvzone, initial_pvs);
536
537	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
538	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
539	uma_prealloc(ptezone, initial_pvs);
540
541	/*
542	 * Now it is safe to enable pv_table recording.
543	 */
544	pmap_initialized = TRUE;
545}
546
547/*
548 * Initialize the address space (zone) for the pv_entries.  Set a
549 * high water mark so that the system can recover from excessive
550 * numbers of pv entries.
551 */
552void
553pmap_init2()
554{
555	int shpgperproc = PMAP_SHPGPERPROC;
556
557	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
558	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
559	pv_entry_high_water = 9 * (pv_entry_max / 10);
560}
561
562
563/***************************************************
564 * Manipulate TLBs for a pmap
565 ***************************************************/
566
567static void
568pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
569{
570	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
571		("invalidating TLB for non-current pmap"));
572	ia64_ptc_g(va, PAGE_SHIFT << 2);
573}
574
575static void
576pmap_invalidate_all_1(void *arg)
577{
578	u_int64_t addr;
579	int i, j;
580	register_t psr;
581
582	psr = intr_disable();
583	addr = pmap_ptc_e_base;
584	for (i = 0; i < pmap_ptc_e_count1; i++) {
585		for (j = 0; j < pmap_ptc_e_count2; j++) {
586			ia64_ptc_e(addr);
587			addr += pmap_ptc_e_stride2;
588		}
589		addr += pmap_ptc_e_stride1;
590	}
591	intr_restore(psr);
592}
593
594static void
595pmap_invalidate_all(pmap_t pmap)
596{
597	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
598		("invalidating TLB for non-current pmap"));
599
600
601#ifdef SMP
602	smp_rendezvous(0, pmap_invalidate_all_1, 0, 0);
603#else
604	pmap_invalidate_all_1(0);
605#endif
606}
607
608static u_int32_t
609pmap_allocate_rid(void)
610{
611	uint64_t bit, bits;
612	int rid;
613
614	mtx_lock(&pmap_ridmutex);
615	if (pmap_ridcount == pmap_ridmax)
616		panic("pmap_allocate_rid: All Region IDs used");
617
618	/* Find an index with a free bit. */
619	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
620		pmap_rididx++;
621		if (pmap_rididx == pmap_ridmapsz)
622			pmap_rididx = 0;
623	}
624	rid = pmap_rididx * 64;
625
626	/* Find a free bit. */
627	bit = 1UL;
628	while (bits & bit) {
629		rid++;
630		bit <<= 1;
631	}
632
633	pmap_ridmap[pmap_rididx] |= bit;
634	pmap_ridcount++;
635	mtx_unlock(&pmap_ridmutex);
636
637	return rid;
638}
639
640static void
641pmap_free_rid(u_int32_t rid)
642{
643	uint64_t bit;
644	int idx;
645
646	idx = rid / 64;
647	bit = ~(1UL << (rid & 63));
648
649	mtx_lock(&pmap_ridmutex);
650	pmap_ridmap[idx] &= bit;
651	pmap_ridcount--;
652	mtx_unlock(&pmap_ridmutex);
653}
654
655/***************************************************
656 * Low level helper routines.....
657 ***************************************************/
658
659/*
660 * Install a pte into the VHPT
661 */
662static PMAP_INLINE void
663pmap_install_pte(struct ia64_lpte *vhpte, struct ia64_lpte *pte)
664{
665	u_int64_t *vhp, *p;
666
667	vhp = (u_int64_t *)vhpte;
668	p = (u_int64_t *)pte;
669
670	critical_enter();
671
672	/* Invalidate the tag so the VHPT walker will not match this entry. */
673	vhp[2] = 1UL << 63;
674	ia64_mf();
675
676	vhp[0] = p[0];
677	vhp[1] = p[1];
678	ia64_mf();
679
680	/* Install a proper tag now that we're done. */
681	vhp[2] = p[2];
682	ia64_mf();
683
684	critical_exit();
685}
686
687/*
688 * Compare essential parts of pte.
689 */
690static PMAP_INLINE int
691pmap_equal_pte(struct ia64_lpte *pte1, struct ia64_lpte *pte2)
692{
693	return *(u_int64_t *) pte1 == *(u_int64_t *) pte2;
694}
695
696/*
697 * this routine defines the region(s) of memory that should
698 * not be tested for the modified bit.
699 */
700static PMAP_INLINE int
701pmap_track_modified(vm_offset_t va)
702{
703	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
704		return 1;
705	else
706		return 0;
707}
708
709/***************************************************
710 * Page table page management routines.....
711 ***************************************************/
712
713void
714pmap_pinit0(struct pmap *pmap)
715{
716	/* kernel_pmap is the same as any other pmap. */
717	pmap_pinit(pmap);
718}
719
720/*
721 * Initialize a preallocated and zeroed pmap structure,
722 * such as one in a vmspace structure.
723 */
724void
725pmap_pinit(struct pmap *pmap)
726{
727	int i;
728
729	pmap->pm_flags = 0;
730	for (i = 0; i < 5; i++)
731		pmap->pm_rid[i] = pmap_allocate_rid();
732	pmap->pm_ptphint = NULL;
733	pmap->pm_active = 0;
734	TAILQ_INIT(&pmap->pm_pvlist);
735	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
736}
737
738/***************************************************
739 * Pmap allocation/deallocation routines.
740 ***************************************************/
741
742/*
743 * Release any resources held by the given physical map.
744 * Called when a pmap initialized by pmap_pinit is being released.
745 * Should only be called if the map contains no valid mappings.
746 */
747void
748pmap_release(pmap_t pmap)
749{
750	int i;
751
752	for (i = 0; i < 5; i++)
753		if (pmap->pm_rid[i])
754			pmap_free_rid(pmap->pm_rid[i]);
755}
756
757/*
758 * grow the number of kernel page table entries, if needed
759 */
760void
761pmap_growkernel(vm_offset_t addr)
762{
763	struct ia64_lpte *ptepage;
764	vm_page_t nkpg;
765
766	if (kernel_vm_end >= addr)
767		return;
768
769	critical_enter();
770
771	while (kernel_vm_end < addr) {
772		/* We could handle more by increasing the size of kptdir. */
773		if (nkpt == MAXKPT)
774			panic("pmap_growkernel: out of kernel address space");
775
776		nkpg = vm_page_alloc(NULL, nkpt,
777		    VM_ALLOC_NOOBJ | VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
778		if (!nkpg)
779			panic("pmap_growkernel: no memory to grow kernel");
780
781		ptepage = (struct ia64_lpte *)
782		    IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
783		bzero(ptepage, PAGE_SIZE);
784		ia64_kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage;
785
786		nkpt++;
787		kernel_vm_end += PAGE_SIZE * NKPTEPG;
788	}
789
790	critical_exit();
791}
792
793/***************************************************
794 * page management routines.
795 ***************************************************/
796
797/*
798 * free the pv_entry back to the free list
799 */
800static PMAP_INLINE void
801free_pv_entry(pv_entry_t pv)
802{
803	pv_entry_count--;
804	uma_zfree(pvzone, pv);
805}
806
807/*
808 * get a new pv_entry, allocating a block from the system
809 * when needed.
810 * the memory allocation is performed bypassing the malloc code
811 * because of the possibility of allocations at interrupt time.
812 */
813static pv_entry_t
814get_pv_entry(void)
815{
816	pv_entry_count++;
817	if (pv_entry_high_water &&
818		(pv_entry_count > pv_entry_high_water) &&
819		(pmap_pagedaemon_waken == 0)) {
820		pmap_pagedaemon_waken = 1;
821		wakeup (&vm_pages_needed);
822	}
823	return uma_zalloc(pvzone, M_NOWAIT);
824}
825
826/*
827 * Add an ia64_lpte to the VHPT.
828 */
829static void
830pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
831{
832	struct ia64_lpte *vhpte;
833
834	pmap_vhpt_inserts++;
835	pmap_vhpt_resident++;
836
837	vhpte = (struct ia64_lpte *) ia64_thash(va);
838
839	if (vhpte->pte_chain)
840		pmap_vhpt_collisions++;
841
842	mtx_lock(&pmap_vhptmutex);
843
844	pte->pte_chain = vhpte->pte_chain;
845	ia64_mf();
846	vhpte->pte_chain = ia64_tpa((vm_offset_t)pte);
847	ia64_mf();
848
849	if (!vhpte->pte_p && pte->pte_p)
850		pmap_install_pte(vhpte, pte);
851
852	mtx_unlock(&pmap_vhptmutex);
853}
854
855/*
856 * Update VHPT after a pte has changed.
857 */
858static void
859pmap_update_vhpt(struct ia64_lpte *pte, vm_offset_t va)
860{
861	struct ia64_lpte *vhpte;
862
863	vhpte = (struct ia64_lpte *)ia64_thash(va);
864
865	mtx_lock(&pmap_vhptmutex);
866
867	if ((!vhpte->pte_p || vhpte->pte_tag == pte->pte_tag) && pte->pte_p)
868		pmap_install_pte(vhpte, pte);
869
870	mtx_unlock(&pmap_vhptmutex);
871}
872
873/*
874 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
875 * worked or an appropriate error code otherwise.
876 */
877static int
878pmap_remove_vhpt(vm_offset_t va)
879{
880	struct ia64_lpte *pte;
881	struct ia64_lpte *lpte;
882	struct ia64_lpte *vhpte;
883	u_int64_t tag;
884
885	vhpte = (struct ia64_lpte *)ia64_thash(va);
886
887	/*
888	 * If the VHPTE is invalid, there can't be a collision chain.
889	 */
890	if (!vhpte->pte_p) {
891		KASSERT(!vhpte->pte_chain, ("bad vhpte"));
892		return (ENOENT);
893	}
894
895	lpte = vhpte;
896	tag = ia64_ttag(va);
897
898	mtx_lock(&pmap_vhptmutex);
899
900	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(vhpte->pte_chain);
901	KASSERT(pte != NULL, ("foo"));
902
903	while (pte->pte_tag != tag) {
904		lpte = pte;
905		if (pte->pte_chain == 0) {
906			mtx_unlock(&pmap_vhptmutex);
907			return (ENOENT);
908		}
909		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(pte->pte_chain);
910	}
911
912	/* Snip this pv_entry out of the collision chain. */
913	lpte->pte_chain = pte->pte_chain;
914	ia64_mf();
915
916	/*
917	 * If the VHPTE matches as well, change it to map the first
918	 * element from the chain if there is one.
919	 */
920	if (vhpte->pte_tag == tag) {
921		if (vhpte->pte_chain) {
922			pte = (void*)IA64_PHYS_TO_RR7(vhpte->pte_chain);
923			pmap_install_pte(vhpte, pte);
924		} else
925			vhpte->pte_p = 0;
926	}
927
928	mtx_unlock(&pmap_vhptmutex);
929	pmap_vhpt_resident--;
930	return (0);
931}
932
933/*
934 * Find the ia64_lpte for the given va, if any.
935 */
936static struct ia64_lpte *
937pmap_find_vhpt(vm_offset_t va)
938{
939	struct ia64_lpte *pte;
940	u_int64_t tag;
941
942	tag = ia64_ttag(va);
943	pte = (struct ia64_lpte *)ia64_thash(va);
944	if (pte->pte_chain == 0)
945		return (NULL);
946	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(pte->pte_chain);
947	while (pte->pte_tag != tag) {
948		if (pte->pte_chain == 0)
949			return (NULL);
950		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(pte->pte_chain);
951	}
952	return (pte);
953}
954
955/*
956 * Remove an entry from the list of managed mappings.
957 */
958static int
959pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
960{
961	if (!pv) {
962		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
963			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
964				if (pmap == pv->pv_pmap && va == pv->pv_va)
965					break;
966			}
967		} else {
968			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
969				if (va == pv->pv_va)
970					break;
971			}
972		}
973	}
974
975	if (pv) {
976		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
977		m->md.pv_list_count--;
978		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
979			vm_page_flag_clear(m, PG_WRITEABLE);
980
981		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
982		free_pv_entry(pv);
983		return 0;
984	} else {
985		return ENOENT;
986	}
987}
988
989/*
990 * Create a pv entry for page at pa for
991 * (pmap, va).
992 */
993static void
994pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
995{
996	pv_entry_t pv;
997
998	pv = get_pv_entry();
999	pv->pv_pmap = pmap;
1000	pv->pv_va = va;
1001
1002	vm_page_lock_queues();
1003	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1004	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1005	m->md.pv_list_count++;
1006	vm_page_unlock_queues();
1007}
1008
1009/*
1010 *	Routine:	pmap_extract
1011 *	Function:
1012 *		Extract the physical page address associated
1013 *		with the given map/virtual_address pair.
1014 */
1015vm_offset_t
1016pmap_extract(pmap, va)
1017	register pmap_t pmap;
1018	vm_offset_t va;
1019{
1020	struct ia64_lpte *pte;
1021	pmap_t oldpmap;
1022
1023	if (!pmap)
1024		return 0;
1025
1026	oldpmap = pmap_install(pmap);
1027	pte = pmap_find_vhpt(va);
1028	pmap_install(oldpmap);
1029
1030	if (!pte)
1031		return 0;
1032
1033	return pmap_pte_pa(pte);
1034}
1035
1036/*
1037 *	Routine:	pmap_extract_and_hold
1038 *	Function:
1039 *		Atomically extract and hold the physical page
1040 *		with the given pmap and virtual address pair
1041 *		if that mapping permits the given protection.
1042 */
1043vm_page_t
1044pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1045{
1046	vm_paddr_t pa;
1047	vm_page_t m;
1048
1049	m = NULL;
1050	mtx_lock(&Giant);
1051	if ((pa = pmap_extract(pmap, va)) != 0) {
1052		m = PHYS_TO_VM_PAGE(pa);
1053		vm_page_lock_queues();
1054		vm_page_hold(m);
1055		vm_page_unlock_queues();
1056	}
1057	mtx_unlock(&Giant);
1058	return (m);
1059}
1060
1061/***************************************************
1062 * Low level mapping routines.....
1063 ***************************************************/
1064
1065/*
1066 * Find the kernel lpte for mapping the given virtual address, which
1067 * must be in the part of region 5 which we can cover with our kernel
1068 * 'page tables'.
1069 */
1070static struct ia64_lpte *
1071pmap_find_kpte(vm_offset_t va)
1072{
1073	KASSERT((va >> 61) == 5,
1074		("kernel mapping 0x%lx not in region 5", va));
1075	KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG),
1076		("kernel mapping 0x%lx out of range", va));
1077	return (&ia64_kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)]);
1078}
1079
1080/*
1081 * Find a pte suitable for mapping a user-space address. If one exists
1082 * in the VHPT, that one will be returned, otherwise a new pte is
1083 * allocated.
1084 */
1085static struct ia64_lpte *
1086pmap_find_pte(vm_offset_t va)
1087{
1088	struct ia64_lpte *pte;
1089
1090	if (va >= VM_MAXUSER_ADDRESS)
1091		return pmap_find_kpte(va);
1092
1093	pte = pmap_find_vhpt(va);
1094	if (!pte) {
1095		pte = uma_zalloc(ptezone, M_WAITOK);
1096		pte->pte_p = 0;
1097	}
1098	return pte;
1099}
1100
1101/*
1102 * Free a pte which is now unused. This simply returns it to the zone
1103 * allocator if it is a user mapping. For kernel mappings, clear the
1104 * valid bit to make it clear that the mapping is not currently used.
1105 */
1106static void
1107pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1108{
1109	if (va < VM_MAXUSER_ADDRESS)
1110		uma_zfree(ptezone, pte);
1111	else
1112		pte->pte_p = 0;
1113}
1114
1115/*
1116 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1117 * the pte was orginally valid, then its assumed to already be in the
1118 * VHPT.
1119 */
1120static void
1121pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1122	     int ig, int pl, int ar)
1123{
1124	int wasvalid = pte->pte_p;
1125
1126	pte->pte_p = 1;
1127	pte->pte_ma = PTE_MA_WB;
1128	if (ig & PTE_IG_MANAGED) {
1129		pte->pte_a = 0;
1130		pte->pte_d = 0;
1131	} else {
1132		pte->pte_a = 1;
1133		pte->pte_d = 1;
1134	}
1135	pte->pte_pl = pl;
1136	pte->pte_ar = ar;
1137	pte->pte_ppn = pa >> 12;
1138	pte->pte_ed = 0;
1139	pte->pte_ig = ig;
1140
1141	pte->pte_ps = PAGE_SHIFT;
1142	pte->pte_key = 0;
1143
1144	pte->pte_tag = ia64_ttag(va);
1145
1146	if (wasvalid) {
1147		pmap_update_vhpt(pte, va);
1148	} else {
1149		pmap_enter_vhpt(pte, va);
1150	}
1151}
1152
1153/*
1154 * If a pte contains a valid mapping, clear it and update the VHPT.
1155 */
1156static void
1157pmap_clear_pte(struct ia64_lpte *pte, vm_offset_t va)
1158{
1159	if (pte->pte_p) {
1160		pmap_remove_vhpt(va);
1161		ia64_ptc_g(va, PAGE_SHIFT << 2);
1162		pte->pte_p = 0;
1163	}
1164}
1165
1166/*
1167 * Remove the (possibly managed) mapping represented by pte from the
1168 * given pmap.
1169 */
1170static int
1171pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1172		pv_entry_t pv, int freepte)
1173{
1174	int error;
1175	vm_page_t m;
1176
1177	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1178		("removing pte for non-current pmap"));
1179
1180	/*
1181	 * First remove from the VHPT.
1182	 */
1183	error = pmap_remove_vhpt(va);
1184	if (error)
1185		return error;
1186
1187	/*
1188	 * Make sure pmap_set_pte() knows it isn't in the VHPT.
1189	 */
1190	pte->pte_p = 0;
1191
1192	if (pte->pte_ig & PTE_IG_WIRED)
1193		pmap->pm_stats.wired_count -= 1;
1194
1195	pmap->pm_stats.resident_count -= 1;
1196	if (pte->pte_ig & PTE_IG_MANAGED) {
1197		m = PHYS_TO_VM_PAGE(pmap_pte_pa(pte));
1198		if (pte->pte_d)
1199			if (pmap_track_modified(va))
1200				vm_page_dirty(m);
1201		if (pte->pte_a)
1202			vm_page_flag_set(m, PG_REFERENCED);
1203
1204		if (freepte)
1205			pmap_free_pte(pte, va);
1206		return pmap_remove_entry(pmap, m, va, pv);
1207	} else {
1208		if (freepte)
1209			pmap_free_pte(pte, va);
1210		return 0;
1211	}
1212}
1213
1214/*
1215 * Extract the physical page address associated with a kernel
1216 * virtual address.
1217 */
1218vm_paddr_t
1219pmap_kextract(vm_offset_t va)
1220{
1221	struct ia64_lpte *pte;
1222	vm_offset_t gwpage;
1223
1224	KASSERT(va >= IA64_RR_BASE(5), ("Must be kernel VA"));
1225
1226	/* Regions 6 and 7 are direct mapped. */
1227	if (va >= IA64_RR_BASE(6))
1228		return (IA64_RR_MASK(va));
1229
1230	/* EPC gateway page? */
1231	gwpage = (vm_offset_t)ia64_get_k5();
1232	if (va >= gwpage && va < gwpage + VM_GATEWAY_SIZE)
1233		return (IA64_RR_MASK((vm_offset_t)ia64_gateway_page));
1234
1235	/* Bail out if the virtual address is beyond our limits. */
1236	if (IA64_RR_MASK(va) >= nkpt * PAGE_SIZE * NKPTEPG)
1237		return (0);
1238
1239	pte = pmap_find_kpte(va);
1240	if (!pte->pte_p)
1241		return (0);
1242	return ((pte->pte_ppn << 12) | (va & PAGE_MASK));
1243}
1244
1245/*
1246 * Add a list of wired pages to the kva
1247 * this routine is only used for temporary
1248 * kernel mappings that do not need to have
1249 * page modification or references recorded.
1250 * Note that old mappings are simply written
1251 * over.  The page *must* be wired.
1252 */
1253void
1254pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1255{
1256	int i;
1257	struct ia64_lpte *pte;
1258
1259	for (i = 0; i < count; i++) {
1260		vm_offset_t tva = va + i * PAGE_SIZE;
1261		int wasvalid;
1262		pte = pmap_find_kpte(tva);
1263		wasvalid = pte->pte_p;
1264		pmap_set_pte(pte, tva, VM_PAGE_TO_PHYS(m[i]),
1265			     0, PTE_PL_KERN, PTE_AR_RWX);
1266		if (wasvalid)
1267			ia64_ptc_g(tva, PAGE_SHIFT << 2);
1268	}
1269}
1270
1271/*
1272 * this routine jerks page mappings from the
1273 * kernel -- it is meant only for temporary mappings.
1274 */
1275void
1276pmap_qremove(vm_offset_t va, int count)
1277{
1278	int i;
1279	struct ia64_lpte *pte;
1280
1281	for (i = 0; i < count; i++) {
1282		pte = pmap_find_kpte(va);
1283		pmap_clear_pte(pte, va);
1284		va += PAGE_SIZE;
1285	}
1286}
1287
1288/*
1289 * Add a wired page to the kva.
1290 */
1291void
1292pmap_kenter(vm_offset_t va, vm_offset_t pa)
1293{
1294	struct ia64_lpte *pte;
1295	int wasvalid;
1296
1297	pte = pmap_find_kpte(va);
1298	wasvalid = pte->pte_p;
1299	pmap_set_pte(pte, va, pa, 0, PTE_PL_KERN, PTE_AR_RWX);
1300	if (wasvalid)
1301		ia64_ptc_g(va, PAGE_SHIFT << 2);
1302}
1303
1304/*
1305 * Remove a page from the kva
1306 */
1307void
1308pmap_kremove(vm_offset_t va)
1309{
1310	struct ia64_lpte *pte;
1311
1312	pte = pmap_find_kpte(va);
1313	pmap_clear_pte(pte, va);
1314}
1315
1316/*
1317 *	Used to map a range of physical addresses into kernel
1318 *	virtual address space.
1319 *
1320 *	The value passed in '*virt' is a suggested virtual address for
1321 *	the mapping. Architectures which can support a direct-mapped
1322 *	physical to virtual region can return the appropriate address
1323 *	within that region, leaving '*virt' unchanged. Other
1324 *	architectures should map the pages starting at '*virt' and
1325 *	update '*virt' with the first usable address after the mapped
1326 *	region.
1327 */
1328vm_offset_t
1329pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1330{
1331	return IA64_PHYS_TO_RR7(start);
1332}
1333
1334/*
1335 * Remove a single page from a process address space
1336 */
1337static void
1338pmap_remove_page(pmap_t pmap, vm_offset_t va)
1339{
1340	struct ia64_lpte *pte;
1341
1342	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1343		("removing page for non-current pmap"));
1344
1345	pte = pmap_find_vhpt(va);
1346	if (pte) {
1347		pmap_remove_pte(pmap, pte, va, 0, 1);
1348		pmap_invalidate_page(pmap, va);
1349	}
1350	return;
1351}
1352
1353/*
1354 *	Remove the given range of addresses from the specified map.
1355 *
1356 *	It is assumed that the start and end are properly
1357 *	rounded to the page size.
1358 */
1359void
1360pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1361{
1362	pmap_t oldpmap;
1363	vm_offset_t va;
1364	pv_entry_t pv;
1365	struct ia64_lpte *pte;
1366
1367	if (pmap == NULL)
1368		return;
1369
1370	if (pmap->pm_stats.resident_count == 0)
1371		return;
1372
1373	oldpmap = pmap_install(pmap);
1374
1375	/*
1376	 * special handling of removing one page.  a very
1377	 * common operation and easy to short circuit some
1378	 * code.
1379	 */
1380	if (sva + PAGE_SIZE == eva) {
1381		pmap_remove_page(pmap, sva);
1382		pmap_install(oldpmap);
1383		return;
1384	}
1385
1386	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1387		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1388			va = pv->pv_va;
1389			if (va >= sva && va < eva) {
1390				pte = pmap_find_vhpt(va);
1391				KASSERT(pte != NULL, ("pte"));
1392				pmap_remove_pte(pmap, pte, va, pv, 1);
1393				pmap_invalidate_page(pmap, va);
1394			}
1395		}
1396
1397	} else {
1398		for (va = sva; va < eva; va = va += PAGE_SIZE) {
1399			pte = pmap_find_vhpt(va);
1400			if (pte) {
1401				pmap_remove_pte(pmap, pte, va, 0, 1);
1402				pmap_invalidate_page(pmap, va);
1403			}
1404		}
1405	}
1406
1407	pmap_install(oldpmap);
1408}
1409
1410/*
1411 *	Routine:	pmap_remove_all
1412 *	Function:
1413 *		Removes this physical page from
1414 *		all physical maps in which it resides.
1415 *		Reflects back modify bits to the pager.
1416 *
1417 *	Notes:
1418 *		Original versions of this routine were very
1419 *		inefficient because they iteratively called
1420 *		pmap_remove (slow...)
1421 */
1422
1423void
1424pmap_remove_all(vm_page_t m)
1425{
1426	pmap_t oldpmap;
1427	pv_entry_t pv;
1428	int s;
1429
1430#if defined(PMAP_DIAGNOSTIC)
1431	/*
1432	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1433	 * pages!
1434	 */
1435	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1436		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1437	}
1438#endif
1439
1440	s = splvm();
1441
1442	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1443		struct ia64_lpte *pte;
1444		pmap_t pmap = pv->pv_pmap;
1445		vm_offset_t va = pv->pv_va;
1446
1447		oldpmap = pmap_install(pmap);
1448		pte = pmap_find_vhpt(va);
1449		KASSERT(pte != NULL, ("pte"));
1450		if (pmap_pte_pa(pte) != VM_PAGE_TO_PHYS(m))
1451			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1452		pmap_remove_pte(pmap, pte, va, pv, 1);
1453		pmap_invalidate_page(pmap, va);
1454		pmap_install(oldpmap);
1455	}
1456
1457	vm_page_flag_clear(m, PG_WRITEABLE);
1458
1459	splx(s);
1460	return;
1461}
1462
1463/*
1464 *	Set the physical protection on the
1465 *	specified range of this map as requested.
1466 */
1467void
1468pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1469{
1470	pmap_t oldpmap;
1471	struct ia64_lpte *pte;
1472	int newprot;
1473
1474	if (pmap == NULL)
1475		return;
1476
1477	oldpmap = pmap_install(pmap);
1478
1479	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1480		pmap_remove(pmap, sva, eva);
1481		pmap_install(oldpmap);
1482		return;
1483	}
1484
1485	if (prot & VM_PROT_WRITE) {
1486		pmap_install(oldpmap);
1487		return;
1488	}
1489
1490	newprot = pte_prot(pmap, prot);
1491
1492	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1493		panic("pmap_protect: unaligned addresses");
1494
1495	while (sva < eva) {
1496		/*
1497		 * If page is invalid, skip this page
1498		 */
1499		pte = pmap_find_vhpt(sva);
1500		if (!pte) {
1501			sva += PAGE_SIZE;
1502			continue;
1503		}
1504
1505		if (pmap_pte_prot(pte) != newprot) {
1506			if (pte->pte_ig & PTE_IG_MANAGED) {
1507				vm_offset_t pa = pmap_pte_pa(pte);
1508				vm_page_t m = PHYS_TO_VM_PAGE(pa);
1509				if (pte->pte_d) {
1510					if (pmap_track_modified(sva))
1511						vm_page_dirty(m);
1512					pte->pte_d = 0;
1513				}
1514				if (pte->pte_a) {
1515					vm_page_flag_set(m, PG_REFERENCED);
1516					pte->pte_a = 0;
1517				}
1518			}
1519			pmap_pte_set_prot(pte, newprot);
1520			pmap_update_vhpt(pte, sva);
1521			pmap_invalidate_page(pmap, sva);
1522		}
1523
1524		sva += PAGE_SIZE;
1525	}
1526	pmap_install(oldpmap);
1527}
1528
1529/*
1530 *	Insert the given physical page (p) at
1531 *	the specified virtual address (v) in the
1532 *	target physical map with the protection requested.
1533 *
1534 *	If specified, the page will be wired down, meaning
1535 *	that the related pte can not be reclaimed.
1536 *
1537 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1538 *	or lose information.  That is, this routine must actually
1539 *	insert this page into the given map NOW.
1540 */
1541void
1542pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1543	   boolean_t wired)
1544{
1545	pmap_t oldpmap;
1546	vm_offset_t pa;
1547	vm_offset_t opa;
1548	struct ia64_lpte origpte;
1549	struct ia64_lpte *pte;
1550	int managed;
1551
1552	if (pmap == NULL)
1553		return;
1554
1555	oldpmap = pmap_install(pmap);
1556
1557	va &= ~PAGE_MASK;
1558#ifdef PMAP_DIAGNOSTIC
1559	if (va > VM_MAX_KERNEL_ADDRESS)
1560		panic("pmap_enter: toobig");
1561#endif
1562
1563	/*
1564	 * Find (or create) a pte for the given mapping.
1565	 */
1566	pte = pmap_find_pte(va);
1567	origpte = *pte;
1568
1569	if (origpte.pte_p)
1570		opa = pmap_pte_pa(&origpte);
1571	else
1572		opa = 0;
1573	managed = 0;
1574
1575	pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK;
1576
1577	/*
1578	 * Mapping has not changed, must be protection or wiring change.
1579	 */
1580	if (origpte.pte_p && (opa == pa)) {
1581		/*
1582		 * Wiring change, just update stats. We don't worry about
1583		 * wiring PT pages as they remain resident as long as there
1584		 * are valid mappings in them. Hence, if a user page is wired,
1585		 * the PT page will be also.
1586		 */
1587		if (wired && ((origpte.pte_ig & PTE_IG_WIRED) == 0))
1588			pmap->pm_stats.wired_count++;
1589		else if (!wired && (origpte.pte_ig & PTE_IG_WIRED))
1590			pmap->pm_stats.wired_count--;
1591
1592		/*
1593		 * We might be turning off write access to the page,
1594		 * so we go ahead and sense modify status.
1595		 */
1596		if (origpte.pte_ig & PTE_IG_MANAGED) {
1597			if (origpte.pte_d && pmap_track_modified(va)) {
1598				vm_page_t om;
1599				om = PHYS_TO_VM_PAGE(opa);
1600				vm_page_dirty(om);
1601			}
1602		}
1603
1604		managed = origpte.pte_ig & PTE_IG_MANAGED;
1605		goto validate;
1606	}
1607	/*
1608	 * Mapping has changed, invalidate old range and fall
1609	 * through to handle validating new mapping.
1610	 */
1611	if (opa) {
1612		int error;
1613		vm_page_lock_queues();
1614		error = pmap_remove_pte(pmap, pte, va, 0, 0);
1615		vm_page_unlock_queues();
1616		if (error)
1617			panic("pmap_enter: pte vanished, va: 0x%lx", va);
1618	}
1619
1620	/*
1621	 * Enter on the PV list if part of our managed memory.
1622	 */
1623	if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
1624		pmap_insert_entry(pmap, va, m);
1625		managed |= PTE_IG_MANAGED;
1626	}
1627
1628	/*
1629	 * Increment counters
1630	 */
1631	pmap->pm_stats.resident_count++;
1632	if (wired)
1633		pmap->pm_stats.wired_count++;
1634
1635validate:
1636
1637	/*
1638	 * Now validate mapping with desired protection/wiring. This
1639	 * adds the pte to the VHPT if necessary.
1640	 */
1641	pmap_set_pte(pte, va, pa, managed | (wired ? PTE_IG_WIRED : 0),
1642		     pte_prot_pl(pmap, prot), pte_prot_ar(pmap, prot));
1643
1644	/*
1645	 * if the mapping or permission bits are different, we need
1646	 * to invalidate the page.
1647	 */
1648	if (!pmap_equal_pte(&origpte, pte))
1649		pmap_invalidate_page(pmap, va);
1650
1651	pmap_install(oldpmap);
1652}
1653
1654/*
1655 * this code makes some *MAJOR* assumptions:
1656 * 1. Current pmap & pmap exists.
1657 * 2. Not wired.
1658 * 3. Read access.
1659 * 4. No page table pages.
1660 * 5. Tlbflush is deferred to calling procedure.
1661 * 6. Page IS managed.
1662 * but is *MUCH* faster than pmap_enter...
1663 */
1664
1665vm_page_t
1666pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
1667{
1668	struct ia64_lpte *pte;
1669	pmap_t oldpmap;
1670
1671	oldpmap = pmap_install(pmap);
1672
1673	pte = pmap_find_pte(va);
1674	if (pte->pte_p)
1675		goto reinstall;
1676
1677	/*
1678	 * Enter on the PV list since its part of our managed memory.
1679	 */
1680	pmap_insert_entry(pmap, va, m);
1681
1682	/*
1683	 * Increment counters
1684	 */
1685	pmap->pm_stats.resident_count++;
1686
1687	/*
1688	 * Initialise PTE with read-only protection and enter into VHPT.
1689	 */
1690	pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m),
1691		     PTE_IG_MANAGED,
1692		     PTE_PL_USER, PTE_AR_R);
1693reinstall:
1694	pmap_install(oldpmap);
1695	return (NULL);
1696}
1697
1698/*
1699 * Make temporary mapping for a physical address. This is called
1700 * during dump.
1701 */
1702void *
1703pmap_kenter_temporary(vm_offset_t pa, int i)
1704{
1705	return (void *) IA64_PHYS_TO_RR7(pa - (i * PAGE_SIZE));
1706}
1707
1708/*
1709 * pmap_object_init_pt preloads the ptes for a given object
1710 * into the specified pmap.  This eliminates the blast of soft
1711 * faults on process startup and immediately after an mmap.
1712 */
1713void
1714pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1715		    vm_object_t object, vm_pindex_t pindex,
1716		    vm_size_t size)
1717{
1718
1719	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1720	KASSERT(object->type == OBJT_DEVICE,
1721	    ("pmap_object_init_pt: non-device object"));
1722}
1723
1724/*
1725 *	Routine:	pmap_change_wiring
1726 *	Function:	Change the wiring attribute for a map/virtual-address
1727 *			pair.
1728 *	In/out conditions:
1729 *			The mapping must already exist in the pmap.
1730 */
1731void
1732pmap_change_wiring(pmap, va, wired)
1733	register pmap_t pmap;
1734	vm_offset_t va;
1735	boolean_t wired;
1736{
1737	pmap_t oldpmap;
1738	struct ia64_lpte *pte;
1739
1740	if (pmap == NULL)
1741		return;
1742
1743	oldpmap = pmap_install(pmap);
1744
1745	pte = pmap_find_vhpt(va);
1746	KASSERT(pte != NULL, ("pte"));
1747	if (wired && !pmap_pte_w(pte))
1748		pmap->pm_stats.wired_count++;
1749	else if (!wired && pmap_pte_w(pte))
1750		pmap->pm_stats.wired_count--;
1751
1752	/*
1753	 * Wiring is not a hardware characteristic so there is no need to
1754	 * invalidate TLB.
1755	 */
1756	pmap_pte_set_w(pte, wired);
1757
1758	pmap_install(oldpmap);
1759}
1760
1761
1762
1763/*
1764 *	Copy the range specified by src_addr/len
1765 *	from the source map to the range dst_addr/len
1766 *	in the destination map.
1767 *
1768 *	This routine is only advisory and need not do anything.
1769 */
1770
1771void
1772pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
1773	  vm_offset_t src_addr)
1774{
1775}
1776
1777
1778/*
1779 *	pmap_zero_page zeros the specified hardware page by
1780 *	mapping it into virtual memory and using bzero to clear
1781 *	its contents.
1782 */
1783
1784void
1785pmap_zero_page(vm_page_t m)
1786{
1787	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1788	bzero((caddr_t) va, PAGE_SIZE);
1789}
1790
1791
1792/*
1793 *	pmap_zero_page_area zeros the specified hardware page by
1794 *	mapping it into virtual memory and using bzero to clear
1795 *	its contents.
1796 *
1797 *	off and size must reside within a single page.
1798 */
1799
1800void
1801pmap_zero_page_area(vm_page_t m, int off, int size)
1802{
1803	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1804	bzero((char *)(caddr_t)va + off, size);
1805}
1806
1807
1808/*
1809 *	pmap_zero_page_idle zeros the specified hardware page by
1810 *	mapping it into virtual memory and using bzero to clear
1811 *	its contents.  This is for the vm_idlezero process.
1812 */
1813
1814void
1815pmap_zero_page_idle(vm_page_t m)
1816{
1817	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1818	bzero((caddr_t) va, PAGE_SIZE);
1819}
1820
1821
1822/*
1823 *	pmap_copy_page copies the specified (machine independent)
1824 *	page by mapping the page into virtual memory and using
1825 *	bcopy to copy the page, one machine dependent page at a
1826 *	time.
1827 */
1828void
1829pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1830{
1831	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
1832	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
1833	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
1834}
1835
1836/*
1837 * Returns true if the pmap's pv is one of the first
1838 * 16 pvs linked to from this page.  This count may
1839 * be changed upwards or downwards in the future; it
1840 * is only necessary that true be returned for a small
1841 * subset of pmaps for proper page aging.
1842 */
1843boolean_t
1844pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
1845{
1846	pv_entry_t pv;
1847	int loops = 0;
1848	int s;
1849
1850	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
1851		return FALSE;
1852
1853	s = splvm();
1854
1855	/*
1856	 * Not found, check current mappings returning immediately if found.
1857	 */
1858	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1859		if (pv->pv_pmap == pmap) {
1860			splx(s);
1861			return TRUE;
1862		}
1863		loops++;
1864		if (loops >= 16)
1865			break;
1866	}
1867	splx(s);
1868	return (FALSE);
1869}
1870
1871#define PMAP_REMOVE_PAGES_CURPROC_ONLY
1872/*
1873 * Remove all pages from specified address space
1874 * this aids process exit speeds.  Also, this code
1875 * is special cased for current process only, but
1876 * can have the more generic (and slightly slower)
1877 * mode enabled.  This is much faster than pmap_remove
1878 * in the case of running down an entire address space.
1879 */
1880void
1881pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1882{
1883	pv_entry_t pv, npv;
1884	int s;
1885
1886#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
1887	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
1888		printf("warning: pmap_remove_pages called with non-current pmap\n");
1889		return;
1890	}
1891#endif
1892
1893	s = splvm();
1894	for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
1895		pv;
1896		pv = npv) {
1897		struct ia64_lpte *pte;
1898
1899		npv = TAILQ_NEXT(pv, pv_plist);
1900
1901		if (pv->pv_va >= eva || pv->pv_va < sva) {
1902			continue;
1903		}
1904
1905		pte = pmap_find_vhpt(pv->pv_va);
1906		KASSERT(pte != NULL, ("pte"));
1907		if (pte->pte_ig & PTE_IG_WIRED)
1908			continue;
1909
1910		pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
1911	}
1912	splx(s);
1913
1914	pmap_invalidate_all(pmap);
1915}
1916
1917/*
1918 *      pmap_page_protect:
1919 *
1920 *      Lower the permission for all mappings to a given page.
1921 */
1922void
1923pmap_page_protect(vm_page_t m, vm_prot_t prot)
1924{
1925	pv_entry_t pv;
1926
1927	if ((prot & VM_PROT_WRITE) != 0)
1928		return;
1929	if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
1930		if ((m->flags & PG_WRITEABLE) == 0)
1931			return;
1932		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1933			int newprot = pte_prot(pv->pv_pmap, prot);
1934			pmap_t oldpmap = pmap_install(pv->pv_pmap);
1935			struct ia64_lpte *pte;
1936			pte = pmap_find_vhpt(pv->pv_va);
1937			KASSERT(pte != NULL, ("pte"));
1938			pmap_pte_set_prot(pte, newprot);
1939			pmap_update_vhpt(pte, pv->pv_va);
1940			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
1941			pmap_install(oldpmap);
1942		}
1943		vm_page_flag_clear(m, PG_WRITEABLE);
1944	} else {
1945		pmap_remove_all(m);
1946	}
1947}
1948
1949/*
1950 *	pmap_ts_referenced:
1951 *
1952 *	Return a count of reference bits for a page, clearing those bits.
1953 *	It is not necessary for every reference bit to be cleared, but it
1954 *	is necessary that 0 only be returned when there are truly no
1955 *	reference bits set.
1956 *
1957 *	XXX: The exact number of bits to check and clear is a matter that
1958 *	should be tested and standardized at some point in the future for
1959 *	optimal aging of shared pages.
1960 */
1961int
1962pmap_ts_referenced(vm_page_t m)
1963{
1964	pv_entry_t pv;
1965	int count = 0;
1966
1967	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
1968		return 0;
1969
1970	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1971		pmap_t oldpmap = pmap_install(pv->pv_pmap);
1972		struct ia64_lpte *pte;
1973		pte = pmap_find_vhpt(pv->pv_va);
1974		KASSERT(pte != NULL, ("pte"));
1975		if (pte->pte_a) {
1976			count++;
1977			pte->pte_a = 0;
1978			pmap_update_vhpt(pte, pv->pv_va);
1979			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
1980		}
1981		pmap_install(oldpmap);
1982	}
1983
1984	return count;
1985}
1986
1987#if 0
1988/*
1989 *	pmap_is_referenced:
1990 *
1991 *	Return whether or not the specified physical page was referenced
1992 *	in any physical maps.
1993 */
1994static boolean_t
1995pmap_is_referenced(vm_page_t m)
1996{
1997	pv_entry_t pv;
1998
1999	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2000		return FALSE;
2001
2002	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2003		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2004		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2005		pmap_install(oldpmap);
2006		KASSERT(pte != NULL, ("pte"));
2007		if (pte->pte_a)
2008			return 1;
2009	}
2010
2011	return 0;
2012}
2013#endif
2014
2015/*
2016 *	pmap_is_modified:
2017 *
2018 *	Return whether or not the specified physical page was modified
2019 *	in any physical maps.
2020 */
2021boolean_t
2022pmap_is_modified(vm_page_t m)
2023{
2024	pv_entry_t pv;
2025
2026	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2027		return FALSE;
2028
2029	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2030		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2031		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2032		pmap_install(oldpmap);
2033		KASSERT(pte != NULL, ("pte"));
2034		if (pte->pte_d)
2035			return 1;
2036	}
2037
2038	return 0;
2039}
2040
2041/*
2042 *	pmap_is_prefaultable:
2043 *
2044 *	Return whether or not the specified virtual address is elgible
2045 *	for prefault.
2046 */
2047boolean_t
2048pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2049{
2050	struct ia64_lpte *pte;
2051
2052	pte = pmap_find_vhpt(addr);
2053	if (pte && pte->pte_p)
2054		return (FALSE);
2055	return (TRUE);
2056}
2057
2058/*
2059 *	Clear the modify bits on the specified physical page.
2060 */
2061void
2062pmap_clear_modify(vm_page_t m)
2063{
2064	pv_entry_t pv;
2065
2066	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2067		return;
2068
2069	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2070		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2071		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2072		KASSERT(pte != NULL, ("pte"));
2073		if (pte->pte_d) {
2074			pte->pte_d = 0;
2075			pmap_update_vhpt(pte, pv->pv_va);
2076			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2077		}
2078		pmap_install(oldpmap);
2079	}
2080}
2081
2082/*
2083 *	pmap_clear_reference:
2084 *
2085 *	Clear the reference bit on the specified physical page.
2086 */
2087void
2088pmap_clear_reference(vm_page_t m)
2089{
2090	pv_entry_t pv;
2091
2092	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2093		return;
2094
2095	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2096		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2097		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2098		KASSERT(pte != NULL, ("pte"));
2099		if (pte->pte_a) {
2100			pte->pte_a = 0;
2101			pmap_update_vhpt(pte, pv->pv_va);
2102			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2103		}
2104		pmap_install(oldpmap);
2105	}
2106}
2107
2108/*
2109 * Miscellaneous support routines follow
2110 */
2111
2112static void
2113ia64_protection_init()
2114{
2115	int prot, *kp, *up;
2116
2117	kp = protection_codes[0];
2118	up = protection_codes[1];
2119
2120	for (prot = 0; prot < 8; prot++) {
2121		switch (prot) {
2122		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
2123			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2124			*up++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2125			break;
2126
2127		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
2128			*kp++ = (PTE_AR_X_RX << 2) | PTE_PL_KERN;
2129			*up++ = (PTE_AR_X_RX << 2) | PTE_PL_USER;
2130			break;
2131
2132		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
2133			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2134			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2135			break;
2136
2137		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
2138			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2139			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2140			break;
2141
2142		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
2143			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2144			*up++ = (PTE_AR_R << 2) | PTE_PL_USER;
2145			break;
2146
2147		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
2148			*kp++ = (PTE_AR_RX << 2) | PTE_PL_KERN;
2149			*up++ = (PTE_AR_RX << 2) | PTE_PL_USER;
2150			break;
2151
2152		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
2153			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2154			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2155			break;
2156
2157		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
2158			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2159			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2160			break;
2161		}
2162	}
2163}
2164
2165/*
2166 * Map a set of physical memory pages into the kernel virtual
2167 * address space. Return a pointer to where it is mapped. This
2168 * routine is intended to be used for mapping device memory,
2169 * NOT real memory.
2170 */
2171void *
2172pmap_mapdev(vm_offset_t pa, vm_size_t size)
2173{
2174	return (void*) IA64_PHYS_TO_RR6(pa);
2175}
2176
2177/*
2178 * 'Unmap' a range mapped by pmap_mapdev().
2179 */
2180void
2181pmap_unmapdev(vm_offset_t va, vm_size_t size)
2182{
2183	return;
2184}
2185
2186/*
2187 * perform the pmap work for mincore
2188 */
2189int
2190pmap_mincore(pmap_t pmap, vm_offset_t addr)
2191{
2192	pmap_t oldpmap;
2193	struct ia64_lpte *pte;
2194	int val = 0;
2195
2196	oldpmap = pmap_install(pmap);
2197	pte = pmap_find_vhpt(addr);
2198	pmap_install(oldpmap);
2199
2200	if (!pte)
2201		return 0;
2202
2203	if (pmap_pte_v(pte)) {
2204		vm_page_t m;
2205		vm_offset_t pa;
2206
2207		val = MINCORE_INCORE;
2208		if ((pte->pte_ig & PTE_IG_MANAGED) == 0)
2209			return val;
2210
2211		pa = pmap_pte_pa(pte);
2212
2213		m = PHYS_TO_VM_PAGE(pa);
2214
2215		/*
2216		 * Modified by us
2217		 */
2218		if (pte->pte_d)
2219			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2220		else {
2221			/*
2222			 * Modified by someone
2223			 */
2224			vm_page_lock_queues();
2225			if (pmap_is_modified(m))
2226				val |= MINCORE_MODIFIED_OTHER;
2227			vm_page_unlock_queues();
2228		}
2229		/*
2230		 * Referenced by us
2231		 */
2232		if (pte->pte_a)
2233			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2234		else {
2235			/*
2236			 * Referenced by someone
2237			 */
2238			vm_page_lock_queues();
2239			if (pmap_ts_referenced(m)) {
2240				val |= MINCORE_REFERENCED_OTHER;
2241				vm_page_flag_set(m, PG_REFERENCED);
2242			}
2243			vm_page_unlock_queues();
2244		}
2245	}
2246	return val;
2247}
2248
2249void
2250pmap_activate(struct thread *td)
2251{
2252	pmap_install(vmspace_pmap(td->td_proc->p_vmspace));
2253}
2254
2255pmap_t
2256pmap_switch(pmap_t pm)
2257{
2258	pmap_t prevpm;
2259	int i;
2260
2261	mtx_assert(&sched_lock, MA_OWNED);
2262
2263	prevpm = PCPU_GET(current_pmap);
2264	if (prevpm == pm)
2265		return (prevpm);
2266	if (prevpm != NULL)
2267		atomic_clear_32(&prevpm->pm_active, PCPU_GET(cpumask));
2268	if (pm == NULL) {
2269		for (i = 0; i < 5; i++) {
2270			ia64_set_rr(IA64_RR_BASE(i),
2271			    (i << 8)|(PAGE_SHIFT << 2)|1);
2272		}
2273	} else {
2274		for (i = 0; i < 5; i++) {
2275			ia64_set_rr(IA64_RR_BASE(i),
2276			    (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2277		}
2278		atomic_set_32(&pm->pm_active, PCPU_GET(cpumask));
2279	}
2280	PCPU_SET(current_pmap, pm);
2281	__asm __volatile("srlz.d");
2282	return (prevpm);
2283}
2284
2285static pmap_t
2286pmap_install(pmap_t pm)
2287{
2288	pmap_t prevpm;
2289
2290	mtx_lock_spin(&sched_lock);
2291	prevpm = pmap_switch(pm);
2292	mtx_unlock_spin(&sched_lock);
2293	return (prevpm);
2294}
2295
2296vm_offset_t
2297pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2298{
2299
2300	return addr;
2301}
2302
2303#include "opt_ddb.h"
2304
2305#ifdef DDB
2306
2307#include <ddb/ddb.h>
2308
2309static const char*	psnames[] = {
2310	"1B",	"2B",	"4B",	"8B",
2311	"16B",	"32B",	"64B",	"128B",
2312	"256B",	"512B",	"1K",	"2K",
2313	"4K",	"8K",	"16K",	"32K",
2314	"64K",	"128K",	"256K",	"512K",
2315	"1M",	"2M",	"4M",	"8M",
2316	"16M",	"32M",	"64M",	"128M",
2317	"256M",	"512M",	"1G",	"2G"
2318};
2319
2320static void
2321print_trs(int type)
2322{
2323	struct ia64_pal_result	res;
2324	int			i, maxtr;
2325	struct {
2326		struct ia64_pte	pte;
2327		struct ia64_itir itir;
2328		struct ia64_ifa ifa;
2329		struct ia64_rr	rr;
2330	}			buf;
2331	static const char*	manames[] = {
2332		"WB",	"bad",	"bad",	"bad",
2333		"UC",	"UCE",	"WC",	"NaT",
2334
2335	};
2336
2337	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2338	if (res.pal_status != 0) {
2339		db_printf("Can't get VM summary\n");
2340		return;
2341	}
2342
2343	if (type == 0)
2344		maxtr = (res.pal_result[0] >> 40) & 0xff;
2345	else
2346		maxtr = (res.pal_result[0] >> 32) & 0xff;
2347
2348	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2349	for (i = 0; i <= maxtr; i++) {
2350		bzero(&buf, sizeof(buf));
2351		res = ia64_call_pal_stacked_physical
2352			(PAL_VM_TR_READ, i, type, ia64_tpa((u_int64_t) &buf));
2353		if (!(res.pal_result[0] & 1))
2354			buf.pte.pte_ar = 0;
2355		if (!(res.pal_result[0] & 2))
2356			buf.pte.pte_pl = 0;
2357		if (!(res.pal_result[0] & 4))
2358			buf.pte.pte_d = 0;
2359		if (!(res.pal_result[0] & 8))
2360			buf.pte.pte_ma = 0;
2361		db_printf(
2362			"%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s %d %06x\n",
2363			buf.ifa.ifa_ig & 1,
2364			buf.rr.rr_rid,
2365			buf.ifa.ifa_vpn,
2366			buf.pte.pte_ppn,
2367			psnames[buf.itir.itir_ps],
2368			buf.pte.pte_ed,
2369			buf.pte.pte_ar,
2370			buf.pte.pte_pl,
2371			buf.pte.pte_d,
2372			buf.pte.pte_a,
2373			manames[buf.pte.pte_ma],
2374			buf.pte.pte_p,
2375			buf.itir.itir_key);
2376	}
2377}
2378
2379DB_COMMAND(itr, db_itr)
2380{
2381	print_trs(0);
2382}
2383
2384DB_COMMAND(dtr, db_dtr)
2385{
2386	print_trs(1);
2387}
2388
2389DB_COMMAND(rr, db_rr)
2390{
2391	int i;
2392	u_int64_t t;
2393	struct ia64_rr rr;
2394
2395	printf("RR RID    PgSz VE\n");
2396	for (i = 0; i < 8; i++) {
2397		__asm __volatile ("mov %0=rr[%1]"
2398				  : "=r"(t)
2399				  : "r"(IA64_RR_BASE(i)));
2400		*(u_int64_t *) &rr = t;
2401		printf("%d  %06x %4s %d\n",
2402		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2403	}
2404}
2405
2406DB_COMMAND(thash, db_thash)
2407{
2408	if (!have_addr)
2409		return;
2410
2411	db_printf("%p\n", (void *) ia64_thash(addr));
2412}
2413
2414DB_COMMAND(ttag, db_ttag)
2415{
2416	if (!have_addr)
2417		return;
2418
2419	db_printf("0x%lx\n", ia64_ttag(addr));
2420}
2421
2422#endif
2423