pmap.c revision 119861
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 * $FreeBSD: head/sys/ia64/ia64/pmap.c 119861 2003-09-07 20:02:38Z alc $
47 */
48
49/*
50 *	Manages physical address maps.
51 *
52 *	In addition to hardware address maps, this
53 *	module is called upon to provide software-use-only
54 *	maps which may or may not be stored in the same
55 *	form as hardware maps.  These pseudo-maps are
56 *	used to store intermediate results from copy
57 *	operations to and from address spaces.
58 *
59 *	Since the information managed by this module is
60 *	also stored by the logical address mapping module,
61 *	this module may throw away valid virtual-to-physical
62 *	mappings at almost any time.  However, invalidations
63 *	of virtual-to-physical mappings must be done as
64 *	requested.
65 *
66 *	In order to cope with hardware architectures which
67 *	make virtual-to-physical map invalidates expensive,
68 *	this module may delay invalidate or reduced protection
69 *	operations until such time as they are actually
70 *	necessary.  This module is given full information as
71 *	to which processors are currently using which maps,
72 *	and to when physical maps must be made correct.
73 */
74
75/*
76 * Following the Linux model, region IDs are allocated in groups of
77 * eight so that a single region ID can be used for as many RRs as we
78 * want by encoding the RR number into the low bits of the ID.
79 *
80 * We reserve region ID 0 for the kernel and allocate the remaining
81 * IDs for user pmaps.
82 *
83 * Region 0..4
84 *	User virtually mapped
85 *
86 * Region 5
87 *	Kernel virtually mapped
88 *
89 * Region 6
90 *	Kernel physically mapped uncacheable
91 *
92 * Region 7
93 *	Kernel physically mapped cacheable
94 */
95
96#include <sys/param.h>
97#include <sys/kernel.h>
98#include <sys/lock.h>
99#include <sys/malloc.h>
100#include <sys/mman.h>
101#include <sys/msgbuf.h>
102#include <sys/mutex.h>
103#include <sys/proc.h>
104#include <sys/sx.h>
105#include <sys/systm.h>
106#include <sys/vmmeter.h>
107#include <sys/smp.h>
108#include <sys/sysctl.h>
109
110#include <vm/vm.h>
111#include <vm/vm_param.h>
112#include <vm/vm_kern.h>
113#include <vm/vm_page.h>
114#include <vm/vm_map.h>
115#include <vm/vm_object.h>
116#include <vm/vm_extern.h>
117#include <vm/vm_pageout.h>
118#include <vm/vm_pager.h>
119#include <vm/uma.h>
120#include <vm/uma_int.h>
121
122#include <sys/user.h>
123
124#include <machine/cpu.h>
125#include <machine/pal.h>
126#include <machine/md_var.h>
127
128/* XXX move to a header. */
129extern u_int64_t ia64_gateway_page[];
130
131MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
132
133#ifndef PMAP_SHPGPERPROC
134#define PMAP_SHPGPERPROC 200
135#endif
136
137#if defined(DIAGNOSTIC)
138#define PMAP_DIAGNOSTIC
139#endif
140
141#define MINPV 2048	/* Preallocate at least this many */
142#define MAXPV 20480	/* But no more than this */
143
144#if 0
145#define PMAP_DIAGNOSTIC
146#define PMAP_DEBUG
147#endif
148
149#if !defined(PMAP_DIAGNOSTIC)
150#define PMAP_INLINE __inline
151#else
152#define PMAP_INLINE
153#endif
154
155/*
156 * Get PDEs and PTEs for user/kernel address space
157 */
158#define pmap_pte_w(pte)		((pte)->pte_ig & PTE_IG_WIRED)
159#define pmap_pte_managed(pte)	((pte)->pte_ig & PTE_IG_MANAGED)
160#define pmap_pte_v(pte)		((pte)->pte_p)
161#define pmap_pte_pa(pte)	(((pte)->pte_ppn) << 12)
162#define pmap_pte_prot(pte)	(((pte)->pte_ar << 2) | (pte)->pte_pl)
163
164#define pmap_pte_set_w(pte, v) ((v)?((pte)->pte_ig |= PTE_IG_WIRED) \
165				:((pte)->pte_ig &= ~PTE_IG_WIRED))
166#define pmap_pte_set_prot(pte, v) do {		\
167    (pte)->pte_ar = v >> 2;			\
168    (pte)->pte_pl = v & 3;			\
169} while (0)
170
171/*
172 * Given a map and a machine independent protection code,
173 * convert to an ia64 protection code.
174 */
175#define pte_prot(m, p)		(protection_codes[m == kernel_pmap ? 0 : 1][p])
176#define pte_prot_pl(m, p)	(pte_prot(m, p) & 3)
177#define pte_prot_ar(m, p)	(pte_prot(m, p) >> 2)
178int	protection_codes[2][8];
179
180/*
181 * Return non-zero if this pmap is currently active
182 */
183#define pmap_isactive(pmap)	(pmap->pm_active)
184
185/*
186 * Statically allocated kernel pmap
187 */
188struct pmap kernel_pmap_store;
189
190vm_offset_t avail_start;	/* PA of first available physical page */
191vm_offset_t avail_end;		/* PA of last available physical page */
192vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
193vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
194static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
195
196vm_offset_t vhpt_base, vhpt_size;
197struct mtx pmap_vhptmutex;
198
199/*
200 * We use an object to own the kernel's 'page tables'. For simplicity,
201 * we use one page directory to index a set of pages containing
202 * ia64_lptes. This gives us up to 2Gb of kernel virtual space.
203 */
204static int nkpt;
205struct ia64_lpte **ia64_kptdir;
206#define KPTE_DIR_INDEX(va) \
207	((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
208#define KPTE_PTE_INDEX(va) \
209	((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
210#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
211
212vm_offset_t kernel_vm_end;
213
214/* Values for ptc.e. XXX values for SKI. */
215static u_int64_t pmap_ptc_e_base = 0x100000000;
216static u_int64_t pmap_ptc_e_count1 = 3;
217static u_int64_t pmap_ptc_e_count2 = 2;
218static u_int64_t pmap_ptc_e_stride1 = 0x2000;
219static u_int64_t pmap_ptc_e_stride2 = 0x100000000;
220
221/*
222 * Data for the RID allocator
223 */
224static int pmap_ridcount;
225static int pmap_rididx;
226static int pmap_ridmapsz;
227static int pmap_ridmax;
228static u_int64_t *pmap_ridmap;
229struct mtx pmap_ridmutex;
230
231/*
232 * Data for the pv entry allocation mechanism
233 */
234static uma_zone_t pvzone;
235static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
236int pmap_pagedaemon_waken;
237static struct pv_entry *pvbootentries;
238static int pvbootnext, pvbootmax;
239
240/*
241 * Data for allocating PTEs for user processes.
242 */
243static uma_zone_t ptezone;
244
245/*
246 * VHPT instrumentation.
247 */
248static int pmap_vhpt_inserts;
249static int pmap_vhpt_collisions;
250static int pmap_vhpt_resident;
251SYSCTL_DECL(_vm_stats);
252SYSCTL_NODE(_vm_stats, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
253SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
254	   &pmap_vhpt_inserts, 0, "");
255SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, collisions, CTLFLAG_RD,
256	   &pmap_vhpt_collisions, 0, "");
257SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, resident, CTLFLAG_RD,
258	   &pmap_vhpt_resident, 0, "");
259
260static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
261static pv_entry_t get_pv_entry(void);
262static void	ia64_protection_init(void);
263
264static pmap_t	pmap_install(pmap_t);
265static void	pmap_invalidate_all(pmap_t pmap);
266
267vm_offset_t
268pmap_steal_memory(vm_size_t size)
269{
270	vm_size_t bank_size;
271	vm_offset_t pa, va;
272
273	size = round_page(size);
274
275	bank_size = phys_avail[1] - phys_avail[0];
276	while (size > bank_size) {
277		int i;
278		for (i = 0; phys_avail[i+2]; i+= 2) {
279			phys_avail[i] = phys_avail[i+2];
280			phys_avail[i+1] = phys_avail[i+3];
281		}
282		phys_avail[i] = 0;
283		phys_avail[i+1] = 0;
284		if (!phys_avail[0])
285			panic("pmap_steal_memory: out of memory");
286		bank_size = phys_avail[1] - phys_avail[0];
287	}
288
289	pa = phys_avail[0];
290	phys_avail[0] += size;
291
292	va = IA64_PHYS_TO_RR7(pa);
293	bzero((caddr_t) va, size);
294	return va;
295}
296
297/*
298 *	Bootstrap the system enough to run with virtual memory.
299 */
300void
301pmap_bootstrap()
302{
303	int i, j, count, ridbits;
304	struct ia64_pal_result res;
305
306	/*
307	 * Query the PAL Code to find the loop parameters for the
308	 * ptc.e instruction.
309	 */
310	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
311	if (res.pal_status != 0)
312		panic("Can't configure ptc.e parameters");
313	pmap_ptc_e_base = res.pal_result[0];
314	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
315	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
316	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
317	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
318	if (bootverbose)
319		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
320		       "stride1=0x%lx, stride2=0x%lx\n",
321		       pmap_ptc_e_base,
322		       pmap_ptc_e_count1,
323		       pmap_ptc_e_count2,
324		       pmap_ptc_e_stride1,
325		       pmap_ptc_e_stride2);
326
327	/*
328	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
329	 *
330	 * We currently need at least 19 bits in the RID because PID_MAX
331	 * can only be encoded in 17 bits and we need RIDs for 5 regions
332	 * per process. With PID_MAX equalling 99999 this means that we
333	 * need to be able to encode 499995 (=5*PID_MAX).
334	 * The Itanium processor only has 18 bits and the architected
335	 * minimum is exactly that. So, we cannot use a PID based scheme
336	 * in those cases. Enter pmap_ridmap...
337	 * We should avoid the map when running on a processor that has
338	 * implemented enough bits. This means that we should pass the
339	 * process/thread ID to pmap. This we currently don't do, so we
340	 * use the map anyway. However, we don't want to allocate a map
341	 * that is large enough to cover the range dictated by the number
342	 * of bits in the RID, because that may result in a RID map of
343	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
344	 * The bottomline: we create a 32KB map when the processor only
345	 * implements 18 bits (or when we can't figure it out). Otherwise
346	 * we create a 64KB map.
347	 */
348	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
349	if (res.pal_status != 0) {
350		if (bootverbose)
351			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
352		ridbits = 18; /* guaranteed minimum */
353	} else {
354		ridbits = (res.pal_result[1] >> 8) & 0xff;
355		if (bootverbose)
356			printf("Processor supports %d Region ID bits\n",
357			    ridbits);
358	}
359	if (ridbits > 19)
360		ridbits = 19;
361
362	pmap_ridmax = (1 << ridbits);
363	pmap_ridmapsz = pmap_ridmax / 64;
364	pmap_ridmap = (u_int64_t *)pmap_steal_memory(pmap_ridmax / 8);
365	pmap_ridmap[0] |= 0xff;
366	pmap_rididx = 0;
367	pmap_ridcount = 8;
368	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
369
370	/*
371	 * Allocate some memory for initial kernel 'page tables'.
372	 */
373	ia64_kptdir = (void *)pmap_steal_memory(PAGE_SIZE);
374	for (i = 0; i < NKPT; i++) {
375		ia64_kptdir[i] = (void*)pmap_steal_memory(PAGE_SIZE);
376	}
377	nkpt = NKPT;
378	kernel_vm_end = NKPT * PAGE_SIZE * NKPTEPG + VM_MIN_KERNEL_ADDRESS -
379	    VM_GATEWAY_SIZE;
380
381	avail_start = phys_avail[0];
382	for (i = 0; phys_avail[i+2]; i+= 2) ;
383	avail_end = phys_avail[i+1];
384	count = i+2;
385
386	/*
387	 * Figure out a useful size for the VHPT, based on the size of
388	 * physical memory and try to locate a region which is large
389	 * enough to contain the VHPT (which must be a power of two in
390	 * size and aligned to a natural boundary).
391	 * Don't use the difference between avail_start and avail_end
392	 * as a measure for memory size. The address space is often
393	 * enough sparse, causing us to (try to) create a huge VHPT.
394	 */
395	vhpt_size = 15;
396	while ((1<<vhpt_size) < Maxmem * 32)
397		vhpt_size++;
398
399	vhpt_base = 0;
400	while (!vhpt_base) {
401		vm_offset_t mask;
402		if (bootverbose)
403			printf("Trying VHPT size 0x%lx\n", (1L<<vhpt_size));
404		mask = (1L << vhpt_size) - 1;
405		for (i = 0; i < count; i += 2) {
406			vm_offset_t base, limit;
407			base = (phys_avail[i] + mask) & ~mask;
408			limit = base + (1L << vhpt_size);
409			if (limit <= phys_avail[i+1])
410				/*
411				 * VHPT can fit in this region
412				 */
413				break;
414		}
415		if (!phys_avail[i]) {
416			/*
417			 * Can't fit, try next smaller size.
418			 */
419			vhpt_size--;
420		} else {
421			vhpt_base = (phys_avail[i] + mask) & ~mask;
422		}
423	}
424	if (vhpt_size < 15)
425		panic("Can't find space for VHPT");
426
427	if (bootverbose)
428		printf("Putting VHPT at %p\n", (void *) vhpt_base);
429	if (vhpt_base != phys_avail[i]) {
430		/*
431		 * Split this region.
432		 */
433		if (bootverbose)
434			printf("Splitting [%p-%p]\n",
435			       (void *) phys_avail[i],
436			       (void *) phys_avail[i+1]);
437		for (j = count; j > i; j -= 2) {
438			phys_avail[j] = phys_avail[j-2];
439			phys_avail[j+1] = phys_avail[j-2+1];
440		}
441		phys_avail[count+2] = 0;
442		phys_avail[count+3] = 0;
443		phys_avail[i+1] = vhpt_base;
444		phys_avail[i+2] = vhpt_base + (1L << vhpt_size);
445	} else {
446		phys_avail[i] = vhpt_base + (1L << vhpt_size);
447	}
448
449	vhpt_base = IA64_PHYS_TO_RR7(vhpt_base);
450	bzero((void *) vhpt_base, (1L << vhpt_size));
451
452	mtx_init(&pmap_vhptmutex, "VHPT collision chain lock", NULL, MTX_DEF);
453
454	__asm __volatile("mov cr.pta=%0;; srlz.i;;"
455			 :: "r" (vhpt_base + (1<<8) + (vhpt_size<<2) + 1));
456
457	virtual_avail = VM_MIN_KERNEL_ADDRESS;
458	virtual_end = VM_MAX_KERNEL_ADDRESS;
459
460	/*
461	 * Initialize protection array.
462	 */
463	ia64_protection_init();
464
465	/*
466	 * Initialize the kernel pmap (which is statically allocated).
467	 */
468	for (i = 0; i < 5; i++)
469		kernel_pmap->pm_rid[i] = 0;
470	kernel_pmap->pm_active = 1;
471	TAILQ_INIT(&kernel_pmap->pm_pvlist);
472	PCPU_SET(current_pmap, kernel_pmap);
473
474	/*
475	 * Region 5 is mapped via the vhpt.
476	 */
477	ia64_set_rr(IA64_RR_BASE(5),
478		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
479
480	/*
481	 * Region 6 is direct mapped UC and region 7 is direct mapped
482	 * WC. The details of this is controlled by the Alt {I,D}TLB
483	 * handlers. Here we just make sure that they have the largest
484	 * possible page size to minimise TLB usage.
485	 */
486	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (28 << 2));
487	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (28 << 2));
488
489	/*
490	 * Reserve some memory for allocating pvs while bootstrapping
491	 * the pv allocator. We need to have enough to cover mapping
492	 * the kmem_alloc region used to allocate the initial_pvs in
493	 * pmap_init. In general, the size of this region is
494	 * approximately (# physical pages) * (size of pv entry).
495	 */
496	pvbootmax = ((physmem * sizeof(struct pv_entry)) >> PAGE_SHIFT) + 128;
497	pvbootentries = (struct pv_entry *)
498		pmap_steal_memory(pvbootmax * sizeof(struct pv_entry));
499	pvbootnext = 0;
500
501	/*
502	 * Clear out any random TLB entries left over from booting.
503	 */
504	pmap_invalidate_all(kernel_pmap);
505
506	map_gateway_page();
507}
508
509void *
510uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
511{
512	static vm_pindex_t color;
513	vm_page_t m;
514	int pflags;
515	void *va;
516
517	*flags = UMA_SLAB_PRIV;
518	if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT)
519		pflags = VM_ALLOC_INTERRUPT;
520	else
521		pflags = VM_ALLOC_SYSTEM;
522	if (wait & M_ZERO)
523		pflags |= VM_ALLOC_ZERO;
524
525	for (;;) {
526		m = vm_page_alloc(NULL, color++, pflags | VM_ALLOC_NOOBJ);
527		if (m == NULL) {
528			if (wait & M_NOWAIT)
529				return (NULL);
530			else
531				VM_WAIT;
532		} else
533			break;
534	}
535
536	va = (void *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
537	if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
538		bzero(va, PAGE_SIZE);
539	return (va);
540}
541
542void
543uma_small_free(void *mem, int size, u_int8_t flags)
544{
545	vm_page_t m;
546
547	m = PHYS_TO_VM_PAGE(IA64_RR_MASK((u_int64_t)mem));
548	vm_page_lock_queues();
549	vm_page_free(m);
550	vm_page_unlock_queues();
551}
552
553/*
554 *	Initialize the pmap module.
555 *	Called by vm_init, to initialize any structures that the pmap
556 *	system needs to map virtual memory.
557 *	pmap_init has been enhanced to support in a fairly consistant
558 *	way, discontiguous physical memory.
559 */
560void
561pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
562{
563	int i;
564	int initial_pvs;
565
566	/*
567	 * Allocate memory for random pmap data structures.  Includes the
568	 * pv_head_table.
569	 */
570
571	for(i = 0; i < vm_page_array_size; i++) {
572		vm_page_t m;
573
574		m = &vm_page_array[i];
575		TAILQ_INIT(&m->md.pv_list);
576		m->md.pv_list_count = 0;
577 	}
578
579	/*
580	 * Init the pv free list and the PTE free list.
581	 */
582	initial_pvs = vm_page_array_size;
583	if (initial_pvs < MINPV)
584		initial_pvs = MINPV;
585	if (initial_pvs > MAXPV)
586		initial_pvs = MAXPV;
587	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry),
588	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
589	uma_prealloc(pvzone, initial_pvs);
590
591	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
592	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
593	uma_prealloc(ptezone, initial_pvs);
594
595	/*
596	 * Now it is safe to enable pv_table recording.
597	 */
598	pmap_initialized = TRUE;
599}
600
601/*
602 * Initialize the address space (zone) for the pv_entries.  Set a
603 * high water mark so that the system can recover from excessive
604 * numbers of pv entries.
605 */
606void
607pmap_init2()
608{
609	int shpgperproc = PMAP_SHPGPERPROC;
610
611	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
612	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
613	pv_entry_high_water = 9 * (pv_entry_max / 10);
614}
615
616
617/***************************************************
618 * Manipulate TLBs for a pmap
619 ***************************************************/
620
621static void
622pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
623{
624	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
625		("invalidating TLB for non-current pmap"));
626	ia64_ptc_g(va, PAGE_SHIFT << 2);
627}
628
629static void
630pmap_invalidate_all_1(void *arg)
631{
632	u_int64_t addr;
633	int i, j;
634	register_t psr;
635
636	psr = intr_disable();
637	addr = pmap_ptc_e_base;
638	for (i = 0; i < pmap_ptc_e_count1; i++) {
639		for (j = 0; j < pmap_ptc_e_count2; j++) {
640			ia64_ptc_e(addr);
641			addr += pmap_ptc_e_stride2;
642		}
643		addr += pmap_ptc_e_stride1;
644	}
645	intr_restore(psr);
646}
647
648static void
649pmap_invalidate_all(pmap_t pmap)
650{
651	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
652		("invalidating TLB for non-current pmap"));
653
654
655#ifdef SMP
656	smp_rendezvous(0, pmap_invalidate_all_1, 0, 0);
657#else
658	pmap_invalidate_all_1(0);
659#endif
660}
661
662static u_int32_t
663pmap_allocate_rid(void)
664{
665	uint64_t bit, bits;
666	int rid;
667
668	mtx_lock(&pmap_ridmutex);
669	if (pmap_ridcount == pmap_ridmax)
670		panic("pmap_allocate_rid: All Region IDs used");
671
672	/* Find an index with a free bit. */
673	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
674		pmap_rididx++;
675		if (pmap_rididx == pmap_ridmapsz)
676			pmap_rididx = 0;
677	}
678	rid = pmap_rididx * 64;
679
680	/* Find a free bit. */
681	bit = 1UL;
682	while (bits & bit) {
683		rid++;
684		bit <<= 1;
685	}
686
687	pmap_ridmap[pmap_rididx] |= bit;
688	pmap_ridcount++;
689	mtx_unlock(&pmap_ridmutex);
690
691	return rid;
692}
693
694static void
695pmap_free_rid(u_int32_t rid)
696{
697	uint64_t bit;
698	int idx;
699
700	idx = rid / 64;
701	bit = ~(1UL << (rid & 63));
702
703	mtx_lock(&pmap_ridmutex);
704	pmap_ridmap[idx] &= bit;
705	pmap_ridcount--;
706	mtx_unlock(&pmap_ridmutex);
707}
708
709/***************************************************
710 * Low level helper routines.....
711 ***************************************************/
712
713/*
714 * Install a pte into the VHPT
715 */
716static PMAP_INLINE void
717pmap_install_pte(struct ia64_lpte *vhpte, struct ia64_lpte *pte)
718{
719	u_int64_t *vhp, *p;
720
721	vhp = (u_int64_t *)vhpte;
722	p = (u_int64_t *)pte;
723
724	critical_enter();
725
726	/* Invalidate the tag so the VHPT walker will not match this entry. */
727	vhp[2] = 1UL << 63;
728	ia64_mf();
729
730	vhp[0] = p[0];
731	vhp[1] = p[1];
732	ia64_mf();
733
734	/* Install a proper tag now that we're done. */
735	vhp[2] = p[2];
736	ia64_mf();
737
738	critical_exit();
739}
740
741/*
742 * Compare essential parts of pte.
743 */
744static PMAP_INLINE int
745pmap_equal_pte(struct ia64_lpte *pte1, struct ia64_lpte *pte2)
746{
747	return *(u_int64_t *) pte1 == *(u_int64_t *) pte2;
748}
749
750/*
751 * this routine defines the region(s) of memory that should
752 * not be tested for the modified bit.
753 */
754static PMAP_INLINE int
755pmap_track_modified(vm_offset_t va)
756{
757	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
758		return 1;
759	else
760		return 0;
761}
762
763/***************************************************
764 * Page table page management routines.....
765 ***************************************************/
766
767void
768pmap_pinit0(struct pmap *pmap)
769{
770	/* kernel_pmap is the same as any other pmap. */
771	pmap_pinit(pmap);
772}
773
774/*
775 * Initialize a preallocated and zeroed pmap structure,
776 * such as one in a vmspace structure.
777 */
778void
779pmap_pinit(struct pmap *pmap)
780{
781	int i;
782
783	pmap->pm_flags = 0;
784	for (i = 0; i < 5; i++)
785		pmap->pm_rid[i] = 0;
786	pmap->pm_ptphint = NULL;
787	pmap->pm_active = 0;
788	TAILQ_INIT(&pmap->pm_pvlist);
789	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
790}
791
792/*
793 * Wire in kernel global address entries.  To avoid a race condition
794 * between pmap initialization and pmap_growkernel, this procedure
795 * should be called after the vmspace is attached to the process
796 * but before this pmap is activated.
797 */
798void
799pmap_pinit2(struct pmap *pmap)
800{
801	int i;
802
803	for (i = 0; i < 5; i++)
804		pmap->pm_rid[i] = pmap_allocate_rid();
805}
806
807/***************************************************
808 * Pmap allocation/deallocation routines.
809 ***************************************************/
810
811/*
812 * Release any resources held by the given physical map.
813 * Called when a pmap initialized by pmap_pinit is being released.
814 * Should only be called if the map contains no valid mappings.
815 */
816void
817pmap_release(pmap_t pmap)
818{
819	int i;
820
821	for (i = 0; i < 5; i++)
822		if (pmap->pm_rid[i])
823			pmap_free_rid(pmap->pm_rid[i]);
824}
825
826/*
827 * grow the number of kernel page table entries, if needed
828 */
829void
830pmap_growkernel(vm_offset_t addr)
831{
832	struct ia64_lpte *ptepage;
833	vm_page_t nkpg;
834
835	if (kernel_vm_end >= addr)
836		return;
837
838	critical_enter();
839
840	while (kernel_vm_end < addr) {
841		/* We could handle more by increasing the size of kptdir. */
842		if (nkpt == MAXKPT)
843			panic("pmap_growkernel: out of kernel address space");
844
845		nkpg = vm_page_alloc(NULL, nkpt,
846		    VM_ALLOC_NOOBJ | VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
847		if (!nkpg)
848			panic("pmap_growkernel: no memory to grow kernel");
849
850		ptepage = (struct ia64_lpte *)
851		    IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
852		bzero(ptepage, PAGE_SIZE);
853		ia64_kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage;
854
855		nkpt++;
856		kernel_vm_end += PAGE_SIZE * NKPTEPG;
857	}
858
859	critical_exit();
860}
861
862/***************************************************
863 * page management routines.
864 ***************************************************/
865
866/*
867 * free the pv_entry back to the free list
868 */
869static PMAP_INLINE void
870free_pv_entry(pv_entry_t pv)
871{
872	pv_entry_count--;
873	uma_zfree(pvzone, pv);
874}
875
876/*
877 * get a new pv_entry, allocating a block from the system
878 * when needed.
879 * the memory allocation is performed bypassing the malloc code
880 * because of the possibility of allocations at interrupt time.
881 */
882static pv_entry_t
883get_pv_entry(void)
884{
885	pv_entry_count++;
886	if (pv_entry_high_water &&
887		(pv_entry_count > pv_entry_high_water) &&
888		(pmap_pagedaemon_waken == 0)) {
889		pmap_pagedaemon_waken = 1;
890		wakeup (&vm_pages_needed);
891	}
892	return uma_zalloc(pvzone, M_NOWAIT);
893}
894
895/*
896 * Add an ia64_lpte to the VHPT.
897 */
898static void
899pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
900{
901	struct ia64_lpte *vhpte;
902
903	pmap_vhpt_inserts++;
904	pmap_vhpt_resident++;
905
906	vhpte = (struct ia64_lpte *) ia64_thash(va);
907
908	if (vhpte->pte_chain)
909		pmap_vhpt_collisions++;
910
911	mtx_lock(&pmap_vhptmutex);
912
913	pte->pte_chain = vhpte->pte_chain;
914	ia64_mf();
915	vhpte->pte_chain = ia64_tpa((vm_offset_t)pte);
916	ia64_mf();
917
918	if (!vhpte->pte_p && pte->pte_p)
919		pmap_install_pte(vhpte, pte);
920
921	mtx_unlock(&pmap_vhptmutex);
922}
923
924/*
925 * Update VHPT after a pte has changed.
926 */
927static void
928pmap_update_vhpt(struct ia64_lpte *pte, vm_offset_t va)
929{
930	struct ia64_lpte *vhpte;
931
932	vhpte = (struct ia64_lpte *)ia64_thash(va);
933
934	mtx_lock(&pmap_vhptmutex);
935
936	if ((!vhpte->pte_p || vhpte->pte_tag == pte->pte_tag) && pte->pte_p)
937		pmap_install_pte(vhpte, pte);
938
939	mtx_unlock(&pmap_vhptmutex);
940}
941
942/*
943 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
944 * worked or an appropriate error code otherwise.
945 */
946static int
947pmap_remove_vhpt(vm_offset_t va)
948{
949	struct ia64_lpte *pte;
950	struct ia64_lpte *lpte;
951	struct ia64_lpte *vhpte;
952	u_int64_t tag;
953
954	vhpte = (struct ia64_lpte *)ia64_thash(va);
955
956	/*
957	 * If the VHPTE is invalid, there can't be a collision chain.
958	 */
959	if (!vhpte->pte_p) {
960		KASSERT(!vhpte->pte_chain, ("bad vhpte"));
961		return (ENOENT);
962	}
963
964	lpte = vhpte;
965	tag = ia64_ttag(va);
966
967	mtx_lock(&pmap_vhptmutex);
968
969	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(vhpte->pte_chain);
970	KASSERT(pte != NULL, ("foo"));
971
972	while (pte->pte_tag != tag) {
973		lpte = pte;
974		if (pte->pte_chain == 0) {
975			mtx_unlock(&pmap_vhptmutex);
976			return (ENOENT);
977		}
978		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(pte->pte_chain);
979	}
980
981	/* Snip this pv_entry out of the collision chain. */
982	lpte->pte_chain = pte->pte_chain;
983	ia64_mf();
984
985	/*
986	 * If the VHPTE matches as well, change it to map the first
987	 * element from the chain if there is one.
988	 */
989	if (vhpte->pte_tag == tag) {
990		if (vhpte->pte_chain) {
991			pte = (void*)IA64_PHYS_TO_RR7(vhpte->pte_chain);
992			pmap_install_pte(vhpte, pte);
993		} else
994			vhpte->pte_p = 0;
995	}
996
997	mtx_unlock(&pmap_vhptmutex);
998	pmap_vhpt_resident--;
999	return (0);
1000}
1001
1002/*
1003 * Find the ia64_lpte for the given va, if any.
1004 */
1005static struct ia64_lpte *
1006pmap_find_vhpt(vm_offset_t va)
1007{
1008	struct ia64_lpte *pte;
1009	u_int64_t tag;
1010
1011	tag = ia64_ttag(va);
1012	pte = (struct ia64_lpte *)ia64_thash(va);
1013	if (pte->pte_chain == 0)
1014		return (NULL);
1015	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(pte->pte_chain);
1016	while (pte->pte_tag != tag) {
1017		if (pte->pte_chain == 0)
1018			return (NULL);
1019		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(pte->pte_chain);
1020	}
1021	return (pte);
1022}
1023
1024/*
1025 * Remove an entry from the list of managed mappings.
1026 */
1027static int
1028pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1029{
1030	if (!pv) {
1031		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1032			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1033				if (pmap == pv->pv_pmap && va == pv->pv_va)
1034					break;
1035			}
1036		} else {
1037			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1038				if (va == pv->pv_va)
1039					break;
1040			}
1041		}
1042	}
1043
1044	if (pv) {
1045		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1046		m->md.pv_list_count--;
1047		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1048			vm_page_flag_clear(m, PG_WRITEABLE);
1049
1050		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1051		free_pv_entry(pv);
1052		return 0;
1053	} else {
1054		return ENOENT;
1055	}
1056}
1057
1058/*
1059 * Create a pv entry for page at pa for
1060 * (pmap, va).
1061 */
1062static void
1063pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1064{
1065	pv_entry_t pv;
1066
1067	pv = get_pv_entry();
1068	pv->pv_pmap = pmap;
1069	pv->pv_va = va;
1070
1071	vm_page_lock_queues();
1072	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1073	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1074	m->md.pv_list_count++;
1075	vm_page_unlock_queues();
1076}
1077
1078/*
1079 *	Routine:	pmap_extract
1080 *	Function:
1081 *		Extract the physical page address associated
1082 *		with the given map/virtual_address pair.
1083 */
1084vm_offset_t
1085pmap_extract(pmap, va)
1086	register pmap_t pmap;
1087	vm_offset_t va;
1088{
1089	struct ia64_lpte *pte;
1090	pmap_t oldpmap;
1091
1092	if (!pmap)
1093		return 0;
1094
1095	oldpmap = pmap_install(pmap);
1096	pte = pmap_find_vhpt(va);
1097	pmap_install(oldpmap);
1098
1099	if (!pte)
1100		return 0;
1101
1102	return pmap_pte_pa(pte);
1103}
1104
1105/***************************************************
1106 * Low level mapping routines.....
1107 ***************************************************/
1108
1109/*
1110 * Find the kernel lpte for mapping the given virtual address, which
1111 * must be in the part of region 5 which we can cover with our kernel
1112 * 'page tables'.
1113 */
1114static struct ia64_lpte *
1115pmap_find_kpte(vm_offset_t va)
1116{
1117	KASSERT((va >> 61) == 5,
1118		("kernel mapping 0x%lx not in region 5", va));
1119	KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG),
1120		("kernel mapping 0x%lx out of range", va));
1121	return (&ia64_kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)]);
1122}
1123
1124/*
1125 * Find a pte suitable for mapping a user-space address. If one exists
1126 * in the VHPT, that one will be returned, otherwise a new pte is
1127 * allocated.
1128 */
1129static struct ia64_lpte *
1130pmap_find_pte(vm_offset_t va)
1131{
1132	struct ia64_lpte *pte;
1133
1134	if (va >= VM_MAXUSER_ADDRESS)
1135		return pmap_find_kpte(va);
1136
1137	pte = pmap_find_vhpt(va);
1138	if (!pte) {
1139		pte = uma_zalloc(ptezone, M_WAITOK);
1140		pte->pte_p = 0;
1141	}
1142	return pte;
1143}
1144
1145/*
1146 * Free a pte which is now unused. This simply returns it to the zone
1147 * allocator if it is a user mapping. For kernel mappings, clear the
1148 * valid bit to make it clear that the mapping is not currently used.
1149 */
1150static void
1151pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1152{
1153	if (va < VM_MAXUSER_ADDRESS)
1154		uma_zfree(ptezone, pte);
1155	else
1156		pte->pte_p = 0;
1157}
1158
1159/*
1160 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1161 * the pte was orginally valid, then its assumed to already be in the
1162 * VHPT.
1163 */
1164static void
1165pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1166	     int ig, int pl, int ar)
1167{
1168	int wasvalid = pte->pte_p;
1169
1170	pte->pte_p = 1;
1171	pte->pte_ma = PTE_MA_WB;
1172	if (ig & PTE_IG_MANAGED) {
1173		pte->pte_a = 0;
1174		pte->pte_d = 0;
1175	} else {
1176		pte->pte_a = 1;
1177		pte->pte_d = 1;
1178	}
1179	pte->pte_pl = pl;
1180	pte->pte_ar = ar;
1181	pte->pte_ppn = pa >> 12;
1182	pte->pte_ed = 0;
1183	pte->pte_ig = ig;
1184
1185	pte->pte_ps = PAGE_SHIFT;
1186	pte->pte_key = 0;
1187
1188	pte->pte_tag = ia64_ttag(va);
1189
1190	if (wasvalid) {
1191		pmap_update_vhpt(pte, va);
1192	} else {
1193		pmap_enter_vhpt(pte, va);
1194	}
1195}
1196
1197/*
1198 * If a pte contains a valid mapping, clear it and update the VHPT.
1199 */
1200static void
1201pmap_clear_pte(struct ia64_lpte *pte, vm_offset_t va)
1202{
1203	if (pte->pte_p) {
1204		pmap_remove_vhpt(va);
1205		ia64_ptc_g(va, PAGE_SHIFT << 2);
1206		pte->pte_p = 0;
1207	}
1208}
1209
1210/*
1211 * Remove the (possibly managed) mapping represented by pte from the
1212 * given pmap.
1213 */
1214static int
1215pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1216		pv_entry_t pv, int freepte)
1217{
1218	int error;
1219	vm_page_t m;
1220
1221	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1222		("removing pte for non-current pmap"));
1223
1224	/*
1225	 * First remove from the VHPT.
1226	 */
1227	error = pmap_remove_vhpt(va);
1228	if (error)
1229		return error;
1230
1231	/*
1232	 * Make sure pmap_set_pte() knows it isn't in the VHPT.
1233	 */
1234	pte->pte_p = 0;
1235
1236	if (pte->pte_ig & PTE_IG_WIRED)
1237		pmap->pm_stats.wired_count -= 1;
1238
1239	pmap->pm_stats.resident_count -= 1;
1240	if (pte->pte_ig & PTE_IG_MANAGED) {
1241		m = PHYS_TO_VM_PAGE(pmap_pte_pa(pte));
1242		if (pte->pte_d)
1243			if (pmap_track_modified(va))
1244				vm_page_dirty(m);
1245		if (pte->pte_a)
1246			vm_page_flag_set(m, PG_REFERENCED);
1247
1248		if (freepte)
1249			pmap_free_pte(pte, va);
1250		return pmap_remove_entry(pmap, m, va, pv);
1251	} else {
1252		if (freepte)
1253			pmap_free_pte(pte, va);
1254		return 0;
1255	}
1256}
1257
1258/*
1259 * Extract the physical page address associated with a kernel
1260 * virtual address.
1261 */
1262vm_paddr_t
1263pmap_kextract(vm_offset_t va)
1264{
1265	struct ia64_lpte *pte;
1266	vm_offset_t gwpage;
1267
1268	KASSERT(va >= IA64_RR_BASE(5), ("Must be kernel VA"));
1269
1270	/* Regions 6 and 7 are direct mapped. */
1271	if (va >= IA64_RR_BASE(6))
1272		return (IA64_RR_MASK(va));
1273
1274	/* EPC gateway page? */
1275	gwpage = (vm_offset_t)ia64_get_k5();
1276	if (va >= gwpage && va < gwpage + VM_GATEWAY_SIZE)
1277		return (IA64_RR_MASK((vm_offset_t)ia64_gateway_page));
1278
1279	/* Bail out if the virtual address is beyond our limits. */
1280	if (IA64_RR_MASK(va) >= nkpt * PAGE_SIZE * NKPTEPG)
1281		return (0);
1282
1283	pte = pmap_find_kpte(va);
1284	if (!pte->pte_p)
1285		return (0);
1286	return ((pte->pte_ppn << 12) | (va & PAGE_MASK));
1287}
1288
1289/*
1290 * Add a list of wired pages to the kva
1291 * this routine is only used for temporary
1292 * kernel mappings that do not need to have
1293 * page modification or references recorded.
1294 * Note that old mappings are simply written
1295 * over.  The page *must* be wired.
1296 */
1297void
1298pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1299{
1300	int i;
1301	struct ia64_lpte *pte;
1302
1303	for (i = 0; i < count; i++) {
1304		vm_offset_t tva = va + i * PAGE_SIZE;
1305		int wasvalid;
1306		pte = pmap_find_kpte(tva);
1307		wasvalid = pte->pte_p;
1308		pmap_set_pte(pte, tva, VM_PAGE_TO_PHYS(m[i]),
1309			     0, PTE_PL_KERN, PTE_AR_RWX);
1310		if (wasvalid)
1311			ia64_ptc_g(tva, PAGE_SHIFT << 2);
1312	}
1313}
1314
1315/*
1316 * this routine jerks page mappings from the
1317 * kernel -- it is meant only for temporary mappings.
1318 */
1319void
1320pmap_qremove(vm_offset_t va, int count)
1321{
1322	int i;
1323	struct ia64_lpte *pte;
1324
1325	for (i = 0; i < count; i++) {
1326		pte = pmap_find_kpte(va);
1327		pmap_clear_pte(pte, va);
1328		va += PAGE_SIZE;
1329	}
1330}
1331
1332/*
1333 * Add a wired page to the kva.
1334 */
1335void
1336pmap_kenter(vm_offset_t va, vm_offset_t pa)
1337{
1338	struct ia64_lpte *pte;
1339	int wasvalid;
1340
1341	pte = pmap_find_kpte(va);
1342	wasvalid = pte->pte_p;
1343	pmap_set_pte(pte, va, pa, 0, PTE_PL_KERN, PTE_AR_RWX);
1344	if (wasvalid)
1345		ia64_ptc_g(va, PAGE_SHIFT << 2);
1346}
1347
1348/*
1349 * Remove a page from the kva
1350 */
1351void
1352pmap_kremove(vm_offset_t va)
1353{
1354	struct ia64_lpte *pte;
1355
1356	pte = pmap_find_kpte(va);
1357	pmap_clear_pte(pte, va);
1358}
1359
1360/*
1361 *	Used to map a range of physical addresses into kernel
1362 *	virtual address space.
1363 *
1364 *	The value passed in '*virt' is a suggested virtual address for
1365 *	the mapping. Architectures which can support a direct-mapped
1366 *	physical to virtual region can return the appropriate address
1367 *	within that region, leaving '*virt' unchanged. Other
1368 *	architectures should map the pages starting at '*virt' and
1369 *	update '*virt' with the first usable address after the mapped
1370 *	region.
1371 */
1372vm_offset_t
1373pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1374{
1375	return IA64_PHYS_TO_RR7(start);
1376}
1377
1378/*
1379 * Remove a single page from a process address space
1380 */
1381static void
1382pmap_remove_page(pmap_t pmap, vm_offset_t va)
1383{
1384	struct ia64_lpte *pte;
1385
1386	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1387		("removing page for non-current pmap"));
1388
1389	pte = pmap_find_vhpt(va);
1390	if (pte) {
1391		pmap_remove_pte(pmap, pte, va, 0, 1);
1392		pmap_invalidate_page(pmap, va);
1393	}
1394	return;
1395}
1396
1397/*
1398 *	Remove the given range of addresses from the specified map.
1399 *
1400 *	It is assumed that the start and end are properly
1401 *	rounded to the page size.
1402 */
1403void
1404pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1405{
1406	pmap_t oldpmap;
1407	vm_offset_t va;
1408	pv_entry_t pv;
1409	struct ia64_lpte *pte;
1410
1411	if (pmap == NULL)
1412		return;
1413
1414	if (pmap->pm_stats.resident_count == 0)
1415		return;
1416
1417	oldpmap = pmap_install(pmap);
1418
1419	/*
1420	 * special handling of removing one page.  a very
1421	 * common operation and easy to short circuit some
1422	 * code.
1423	 */
1424	if (sva + PAGE_SIZE == eva) {
1425		pmap_remove_page(pmap, sva);
1426		pmap_install(oldpmap);
1427		return;
1428	}
1429
1430	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1431		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1432			va = pv->pv_va;
1433			if (va >= sva && va < eva) {
1434				pte = pmap_find_vhpt(va);
1435				KASSERT(pte != NULL, ("pte"));
1436				pmap_remove_pte(pmap, pte, va, pv, 1);
1437				pmap_invalidate_page(pmap, va);
1438			}
1439		}
1440
1441	} else {
1442		for (va = sva; va < eva; va = va += PAGE_SIZE) {
1443			pte = pmap_find_vhpt(va);
1444			if (pte) {
1445				pmap_remove_pte(pmap, pte, va, 0, 1);
1446				pmap_invalidate_page(pmap, va);
1447			}
1448		}
1449	}
1450
1451	pmap_install(oldpmap);
1452}
1453
1454/*
1455 *	Routine:	pmap_remove_all
1456 *	Function:
1457 *		Removes this physical page from
1458 *		all physical maps in which it resides.
1459 *		Reflects back modify bits to the pager.
1460 *
1461 *	Notes:
1462 *		Original versions of this routine were very
1463 *		inefficient because they iteratively called
1464 *		pmap_remove (slow...)
1465 */
1466
1467void
1468pmap_remove_all(vm_page_t m)
1469{
1470	pmap_t oldpmap;
1471	pv_entry_t pv;
1472	int s;
1473
1474#if defined(PMAP_DIAGNOSTIC)
1475	/*
1476	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1477	 * pages!
1478	 */
1479	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1480		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1481	}
1482#endif
1483
1484	s = splvm();
1485
1486	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1487		struct ia64_lpte *pte;
1488		pmap_t pmap = pv->pv_pmap;
1489		vm_offset_t va = pv->pv_va;
1490
1491		oldpmap = pmap_install(pmap);
1492		pte = pmap_find_vhpt(va);
1493		KASSERT(pte != NULL, ("pte"));
1494		if (pmap_pte_pa(pte) != VM_PAGE_TO_PHYS(m))
1495			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1496		pmap_remove_pte(pmap, pte, va, pv, 1);
1497		pmap_invalidate_page(pmap, va);
1498		pmap_install(oldpmap);
1499	}
1500
1501	vm_page_flag_clear(m, PG_WRITEABLE);
1502
1503	splx(s);
1504	return;
1505}
1506
1507/*
1508 *	Set the physical protection on the
1509 *	specified range of this map as requested.
1510 */
1511void
1512pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1513{
1514	pmap_t oldpmap;
1515	struct ia64_lpte *pte;
1516	int newprot;
1517
1518	if (pmap == NULL)
1519		return;
1520
1521	oldpmap = pmap_install(pmap);
1522
1523	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1524		pmap_remove(pmap, sva, eva);
1525		pmap_install(oldpmap);
1526		return;
1527	}
1528
1529	if (prot & VM_PROT_WRITE) {
1530		pmap_install(oldpmap);
1531		return;
1532	}
1533
1534	newprot = pte_prot(pmap, prot);
1535
1536	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1537		panic("pmap_protect: unaligned addresses");
1538
1539	while (sva < eva) {
1540		/*
1541		 * If page is invalid, skip this page
1542		 */
1543		pte = pmap_find_vhpt(sva);
1544		if (!pte) {
1545			sva += PAGE_SIZE;
1546			continue;
1547		}
1548
1549		if (pmap_pte_prot(pte) != newprot) {
1550			if (pte->pte_ig & PTE_IG_MANAGED) {
1551				vm_offset_t pa = pmap_pte_pa(pte);
1552				vm_page_t m = PHYS_TO_VM_PAGE(pa);
1553				if (pte->pte_d) {
1554					if (pmap_track_modified(sva))
1555						vm_page_dirty(m);
1556					pte->pte_d = 0;
1557				}
1558				if (pte->pte_a) {
1559					vm_page_flag_set(m, PG_REFERENCED);
1560					pte->pte_a = 0;
1561				}
1562			}
1563			pmap_pte_set_prot(pte, newprot);
1564			pmap_update_vhpt(pte, sva);
1565			pmap_invalidate_page(pmap, sva);
1566		}
1567
1568		sva += PAGE_SIZE;
1569	}
1570	pmap_install(oldpmap);
1571}
1572
1573/*
1574 *	Insert the given physical page (p) at
1575 *	the specified virtual address (v) in the
1576 *	target physical map with the protection requested.
1577 *
1578 *	If specified, the page will be wired down, meaning
1579 *	that the related pte can not be reclaimed.
1580 *
1581 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1582 *	or lose information.  That is, this routine must actually
1583 *	insert this page into the given map NOW.
1584 */
1585void
1586pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1587	   boolean_t wired)
1588{
1589	pmap_t oldpmap;
1590	vm_offset_t pa;
1591	vm_offset_t opa;
1592	struct ia64_lpte origpte;
1593	struct ia64_lpte *pte;
1594	int managed;
1595
1596	if (pmap == NULL)
1597		return;
1598
1599	oldpmap = pmap_install(pmap);
1600
1601	va &= ~PAGE_MASK;
1602#ifdef PMAP_DIAGNOSTIC
1603	if (va > VM_MAX_KERNEL_ADDRESS)
1604		panic("pmap_enter: toobig");
1605#endif
1606
1607	/*
1608	 * Find (or create) a pte for the given mapping.
1609	 */
1610	pte = pmap_find_pte(va);
1611	origpte = *pte;
1612
1613	if (origpte.pte_p)
1614		opa = pmap_pte_pa(&origpte);
1615	else
1616		opa = 0;
1617	managed = 0;
1618
1619	pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK;
1620
1621	/*
1622	 * Mapping has not changed, must be protection or wiring change.
1623	 */
1624	if (origpte.pte_p && (opa == pa)) {
1625		/*
1626		 * Wiring change, just update stats. We don't worry about
1627		 * wiring PT pages as they remain resident as long as there
1628		 * are valid mappings in them. Hence, if a user page is wired,
1629		 * the PT page will be also.
1630		 */
1631		if (wired && ((origpte.pte_ig & PTE_IG_WIRED) == 0))
1632			pmap->pm_stats.wired_count++;
1633		else if (!wired && (origpte.pte_ig & PTE_IG_WIRED))
1634			pmap->pm_stats.wired_count--;
1635
1636		/*
1637		 * We might be turning off write access to the page,
1638		 * so we go ahead and sense modify status.
1639		 */
1640		if (origpte.pte_ig & PTE_IG_MANAGED) {
1641			if (origpte.pte_d && pmap_track_modified(va)) {
1642				vm_page_t om;
1643				om = PHYS_TO_VM_PAGE(opa);
1644				vm_page_dirty(om);
1645			}
1646		}
1647
1648		managed = origpte.pte_ig & PTE_IG_MANAGED;
1649		goto validate;
1650	}
1651	/*
1652	 * Mapping has changed, invalidate old range and fall
1653	 * through to handle validating new mapping.
1654	 */
1655	if (opa) {
1656		int error;
1657		vm_page_lock_queues();
1658		error = pmap_remove_pte(pmap, pte, va, 0, 0);
1659		vm_page_unlock_queues();
1660		if (error)
1661			panic("pmap_enter: pte vanished, va: 0x%lx", va);
1662	}
1663
1664	/*
1665	 * Enter on the PV list if part of our managed memory.
1666	 */
1667	if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
1668		pmap_insert_entry(pmap, va, m);
1669		managed |= PTE_IG_MANAGED;
1670	}
1671
1672	/*
1673	 * Increment counters
1674	 */
1675	pmap->pm_stats.resident_count++;
1676	if (wired)
1677		pmap->pm_stats.wired_count++;
1678
1679validate:
1680
1681	/*
1682	 * Now validate mapping with desired protection/wiring. This
1683	 * adds the pte to the VHPT if necessary.
1684	 */
1685	pmap_set_pte(pte, va, pa, managed | (wired ? PTE_IG_WIRED : 0),
1686		     pte_prot_pl(pmap, prot), pte_prot_ar(pmap, prot));
1687
1688	/*
1689	 * if the mapping or permission bits are different, we need
1690	 * to invalidate the page.
1691	 */
1692	if (!pmap_equal_pte(&origpte, pte))
1693		pmap_invalidate_page(pmap, va);
1694
1695	pmap_install(oldpmap);
1696}
1697
1698/*
1699 * this code makes some *MAJOR* assumptions:
1700 * 1. Current pmap & pmap exists.
1701 * 2. Not wired.
1702 * 3. Read access.
1703 * 4. No page table pages.
1704 * 5. Tlbflush is deferred to calling procedure.
1705 * 6. Page IS managed.
1706 * but is *MUCH* faster than pmap_enter...
1707 */
1708
1709vm_page_t
1710pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
1711{
1712	struct ia64_lpte *pte;
1713	pmap_t oldpmap;
1714
1715	oldpmap = pmap_install(pmap);
1716
1717	pte = pmap_find_pte(va);
1718	if (pte->pte_p)
1719		goto reinstall;
1720
1721	/*
1722	 * Enter on the PV list since its part of our managed memory.
1723	 */
1724	pmap_insert_entry(pmap, va, m);
1725
1726	/*
1727	 * Increment counters
1728	 */
1729	pmap->pm_stats.resident_count++;
1730
1731	/*
1732	 * Initialise PTE with read-only protection and enter into VHPT.
1733	 */
1734	pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m),
1735		     PTE_IG_MANAGED,
1736		     PTE_PL_USER, PTE_AR_R);
1737reinstall:
1738	pmap_install(oldpmap);
1739	return (NULL);
1740}
1741
1742/*
1743 * Make temporary mapping for a physical address. This is called
1744 * during dump.
1745 */
1746void *
1747pmap_kenter_temporary(vm_offset_t pa, int i)
1748{
1749	return (void *) IA64_PHYS_TO_RR7(pa - (i * PAGE_SIZE));
1750}
1751
1752/*
1753 * pmap_object_init_pt preloads the ptes for a given object
1754 * into the specified pmap.  This eliminates the blast of soft
1755 * faults on process startup and immediately after an mmap.
1756 */
1757void
1758pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1759		    vm_object_t object, vm_pindex_t pindex,
1760		    vm_size_t size)
1761{
1762
1763	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1764	KASSERT(object->type == OBJT_DEVICE,
1765	    ("pmap_object_init_pt: non-device object"));
1766}
1767
1768/*
1769 * pmap_prefault provides a quick way of clustering
1770 * pagefaults into a processes address space.  It is a "cousin"
1771 * of pmap_object_init_pt, except it runs at page fault time instead
1772 * of mmap time.
1773 */
1774#define PFBAK 4
1775#define PFFOR 4
1776#define PAGEORDER_SIZE (PFBAK+PFFOR)
1777
1778static int pmap_prefault_pageorder[] = {
1779	-1 * PAGE_SIZE, 1 * PAGE_SIZE,
1780	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
1781	-3 * PAGE_SIZE, 3 * PAGE_SIZE,
1782	-4 * PAGE_SIZE, 4 * PAGE_SIZE
1783};
1784
1785void
1786pmap_prefault(pmap, addra, entry)
1787	pmap_t pmap;
1788	vm_offset_t addra;
1789	vm_map_entry_t entry;
1790{
1791	int i;
1792	vm_offset_t starta;
1793	vm_offset_t addr;
1794	vm_pindex_t pindex;
1795	vm_page_t m, mpte;
1796	vm_object_t object;
1797
1798	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)))
1799		return;
1800
1801	object = entry->object.vm_object;
1802
1803	starta = addra - PFBAK * PAGE_SIZE;
1804	if (starta < entry->start) {
1805		starta = entry->start;
1806	} else if (starta > addra) {
1807		starta = 0;
1808	}
1809
1810	mpte = NULL;
1811	for (i = 0; i < PAGEORDER_SIZE; i++) {
1812		vm_object_t backing_object, lobject;
1813		struct ia64_lpte *pte;
1814
1815		addr = addra + pmap_prefault_pageorder[i];
1816		if (addr > addra + (PFFOR * PAGE_SIZE))
1817			addr = 0;
1818
1819		if (addr < starta || addr >= entry->end)
1820			continue;
1821
1822		pte = pmap_find_vhpt(addr);
1823		if (pte && pte->pte_p)
1824			continue;
1825
1826		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
1827		lobject = object;
1828		VM_OBJECT_LOCK(lobject);
1829		while ((m = vm_page_lookup(lobject, pindex)) == NULL &&
1830		    lobject->type == OBJT_DEFAULT &&
1831		    (backing_object = lobject->backing_object) != NULL) {
1832			if (lobject->backing_object_offset & PAGE_MASK)
1833				break;
1834			pindex += lobject->backing_object_offset >> PAGE_SHIFT;
1835			VM_OBJECT_LOCK(backing_object);
1836			VM_OBJECT_UNLOCK(lobject);
1837			lobject = backing_object;
1838		}
1839		VM_OBJECT_UNLOCK(lobject);
1840		/*
1841		 * give-up when a page is not in memory
1842		 */
1843		if (m == NULL)
1844			break;
1845		vm_page_lock_queues();
1846		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1847			(m->busy == 0) &&
1848		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1849
1850			if ((m->queue - m->pc) == PQ_CACHE) {
1851				vm_page_deactivate(m);
1852			}
1853			vm_page_busy(m);
1854			vm_page_unlock_queues();
1855			pmap_enter_quick(pmap, addr, m, NULL);
1856			vm_page_lock_queues();
1857			vm_page_wakeup(m);
1858		}
1859		vm_page_unlock_queues();
1860	}
1861}
1862
1863/*
1864 *	Routine:	pmap_change_wiring
1865 *	Function:	Change the wiring attribute for a map/virtual-address
1866 *			pair.
1867 *	In/out conditions:
1868 *			The mapping must already exist in the pmap.
1869 */
1870void
1871pmap_change_wiring(pmap, va, wired)
1872	register pmap_t pmap;
1873	vm_offset_t va;
1874	boolean_t wired;
1875{
1876	pmap_t oldpmap;
1877	struct ia64_lpte *pte;
1878
1879	if (pmap == NULL)
1880		return;
1881
1882	oldpmap = pmap_install(pmap);
1883
1884	pte = pmap_find_vhpt(va);
1885	KASSERT(pte != NULL, ("pte"));
1886	if (wired && !pmap_pte_w(pte))
1887		pmap->pm_stats.wired_count++;
1888	else if (!wired && pmap_pte_w(pte))
1889		pmap->pm_stats.wired_count--;
1890
1891	/*
1892	 * Wiring is not a hardware characteristic so there is no need to
1893	 * invalidate TLB.
1894	 */
1895	pmap_pte_set_w(pte, wired);
1896
1897	pmap_install(oldpmap);
1898}
1899
1900
1901
1902/*
1903 *	Copy the range specified by src_addr/len
1904 *	from the source map to the range dst_addr/len
1905 *	in the destination map.
1906 *
1907 *	This routine is only advisory and need not do anything.
1908 */
1909
1910void
1911pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
1912	  vm_offset_t src_addr)
1913{
1914}
1915
1916
1917/*
1918 *	pmap_zero_page zeros the specified hardware page by
1919 *	mapping it into virtual memory and using bzero to clear
1920 *	its contents.
1921 */
1922
1923void
1924pmap_zero_page(vm_page_t m)
1925{
1926	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1927	bzero((caddr_t) va, PAGE_SIZE);
1928}
1929
1930
1931/*
1932 *	pmap_zero_page_area zeros the specified hardware page by
1933 *	mapping it into virtual memory and using bzero to clear
1934 *	its contents.
1935 *
1936 *	off and size must reside within a single page.
1937 */
1938
1939void
1940pmap_zero_page_area(vm_page_t m, int off, int size)
1941{
1942	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1943	bzero((char *)(caddr_t)va + off, size);
1944}
1945
1946
1947/*
1948 *	pmap_zero_page_idle zeros the specified hardware page by
1949 *	mapping it into virtual memory and using bzero to clear
1950 *	its contents.  This is for the vm_idlezero process.
1951 */
1952
1953void
1954pmap_zero_page_idle(vm_page_t m)
1955{
1956	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1957	bzero((caddr_t) va, PAGE_SIZE);
1958}
1959
1960
1961/*
1962 *	pmap_copy_page copies the specified (machine independent)
1963 *	page by mapping the page into virtual memory and using
1964 *	bcopy to copy the page, one machine dependent page at a
1965 *	time.
1966 */
1967void
1968pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1969{
1970	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
1971	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
1972	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
1973}
1974
1975/*
1976 * Returns true if the pmap's pv is one of the first
1977 * 16 pvs linked to from this page.  This count may
1978 * be changed upwards or downwards in the future; it
1979 * is only necessary that true be returned for a small
1980 * subset of pmaps for proper page aging.
1981 */
1982boolean_t
1983pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
1984{
1985	pv_entry_t pv;
1986	int loops = 0;
1987	int s;
1988
1989	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
1990		return FALSE;
1991
1992	s = splvm();
1993
1994	/*
1995	 * Not found, check current mappings returning immediately if found.
1996	 */
1997	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1998		if (pv->pv_pmap == pmap) {
1999			splx(s);
2000			return TRUE;
2001		}
2002		loops++;
2003		if (loops >= 16)
2004			break;
2005	}
2006	splx(s);
2007	return (FALSE);
2008}
2009
2010#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2011/*
2012 * Remove all pages from specified address space
2013 * this aids process exit speeds.  Also, this code
2014 * is special cased for current process only, but
2015 * can have the more generic (and slightly slower)
2016 * mode enabled.  This is much faster than pmap_remove
2017 * in the case of running down an entire address space.
2018 */
2019void
2020pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2021{
2022	pv_entry_t pv, npv;
2023	int s;
2024
2025#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2026	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
2027		printf("warning: pmap_remove_pages called with non-current pmap\n");
2028		return;
2029	}
2030#endif
2031
2032	s = splvm();
2033	for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
2034		pv;
2035		pv = npv) {
2036		struct ia64_lpte *pte;
2037
2038		npv = TAILQ_NEXT(pv, pv_plist);
2039
2040		if (pv->pv_va >= eva || pv->pv_va < sva) {
2041			continue;
2042		}
2043
2044		pte = pmap_find_vhpt(pv->pv_va);
2045		KASSERT(pte != NULL, ("pte"));
2046		if (pte->pte_ig & PTE_IG_WIRED)
2047			continue;
2048
2049		pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
2050	}
2051	splx(s);
2052
2053	pmap_invalidate_all(pmap);
2054}
2055
2056/*
2057 *      pmap_page_protect:
2058 *
2059 *      Lower the permission for all mappings to a given page.
2060 */
2061void
2062pmap_page_protect(vm_page_t m, vm_prot_t prot)
2063{
2064	pv_entry_t pv;
2065
2066	if ((prot & VM_PROT_WRITE) != 0)
2067		return;
2068	if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
2069		if ((m->flags & PG_WRITEABLE) == 0)
2070			return;
2071		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2072			int newprot = pte_prot(pv->pv_pmap, prot);
2073			pmap_t oldpmap = pmap_install(pv->pv_pmap);
2074			struct ia64_lpte *pte;
2075			pte = pmap_find_vhpt(pv->pv_va);
2076			KASSERT(pte != NULL, ("pte"));
2077			pmap_pte_set_prot(pte, newprot);
2078			pmap_update_vhpt(pte, pv->pv_va);
2079			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2080			pmap_install(oldpmap);
2081		}
2082		vm_page_flag_clear(m, PG_WRITEABLE);
2083	} else {
2084		pmap_remove_all(m);
2085	}
2086}
2087
2088/*
2089 *	pmap_ts_referenced:
2090 *
2091 *	Return a count of reference bits for a page, clearing those bits.
2092 *	It is not necessary for every reference bit to be cleared, but it
2093 *	is necessary that 0 only be returned when there are truly no
2094 *	reference bits set.
2095 *
2096 *	XXX: The exact number of bits to check and clear is a matter that
2097 *	should be tested and standardized at some point in the future for
2098 *	optimal aging of shared pages.
2099 */
2100int
2101pmap_ts_referenced(vm_page_t m)
2102{
2103	pv_entry_t pv;
2104	int count = 0;
2105
2106	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2107		return 0;
2108
2109	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2110		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2111		struct ia64_lpte *pte;
2112		pte = pmap_find_vhpt(pv->pv_va);
2113		KASSERT(pte != NULL, ("pte"));
2114		if (pte->pte_a) {
2115			count++;
2116			pte->pte_a = 0;
2117			pmap_update_vhpt(pte, pv->pv_va);
2118			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2119		}
2120		pmap_install(oldpmap);
2121	}
2122
2123	return count;
2124}
2125
2126#if 0
2127/*
2128 *	pmap_is_referenced:
2129 *
2130 *	Return whether or not the specified physical page was referenced
2131 *	in any physical maps.
2132 */
2133static boolean_t
2134pmap_is_referenced(vm_page_t m)
2135{
2136	pv_entry_t pv;
2137
2138	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2139		return FALSE;
2140
2141	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2142		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2143		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2144		pmap_install(oldpmap);
2145		KASSERT(pte != NULL, ("pte"));
2146		if (pte->pte_a)
2147			return 1;
2148	}
2149
2150	return 0;
2151}
2152#endif
2153
2154/*
2155 *	pmap_is_modified:
2156 *
2157 *	Return whether or not the specified physical page was modified
2158 *	in any physical maps.
2159 */
2160boolean_t
2161pmap_is_modified(vm_page_t m)
2162{
2163	pv_entry_t pv;
2164
2165	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2166		return FALSE;
2167
2168	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2169		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2170		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2171		pmap_install(oldpmap);
2172		KASSERT(pte != NULL, ("pte"));
2173		if (pte->pte_d)
2174			return 1;
2175	}
2176
2177	return 0;
2178}
2179
2180/*
2181 *	Clear the modify bits on the specified physical page.
2182 */
2183void
2184pmap_clear_modify(vm_page_t m)
2185{
2186	pv_entry_t pv;
2187
2188	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2189		return;
2190
2191	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2192		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2193		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2194		KASSERT(pte != NULL, ("pte"));
2195		if (pte->pte_d) {
2196			pte->pte_d = 0;
2197			pmap_update_vhpt(pte, pv->pv_va);
2198			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2199		}
2200		pmap_install(oldpmap);
2201	}
2202}
2203
2204/*
2205 *	pmap_clear_reference:
2206 *
2207 *	Clear the reference bit on the specified physical page.
2208 */
2209void
2210pmap_clear_reference(vm_page_t m)
2211{
2212	pv_entry_t pv;
2213
2214	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2215		return;
2216
2217	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2218		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2219		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2220		KASSERT(pte != NULL, ("pte"));
2221		if (pte->pte_a) {
2222			pte->pte_a = 0;
2223			pmap_update_vhpt(pte, pv->pv_va);
2224			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2225		}
2226		pmap_install(oldpmap);
2227	}
2228}
2229
2230/*
2231 * Miscellaneous support routines follow
2232 */
2233
2234static void
2235ia64_protection_init()
2236{
2237	int prot, *kp, *up;
2238
2239	kp = protection_codes[0];
2240	up = protection_codes[1];
2241
2242	for (prot = 0; prot < 8; prot++) {
2243		switch (prot) {
2244		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
2245			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2246			*up++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2247			break;
2248
2249		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
2250			*kp++ = (PTE_AR_X_RX << 2) | PTE_PL_KERN;
2251			*up++ = (PTE_AR_X_RX << 2) | PTE_PL_USER;
2252			break;
2253
2254		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
2255			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2256			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2257			break;
2258
2259		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
2260			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2261			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2262			break;
2263
2264		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
2265			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2266			*up++ = (PTE_AR_R << 2) | PTE_PL_USER;
2267			break;
2268
2269		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
2270			*kp++ = (PTE_AR_RX << 2) | PTE_PL_KERN;
2271			*up++ = (PTE_AR_RX << 2) | PTE_PL_USER;
2272			break;
2273
2274		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
2275			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2276			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2277			break;
2278
2279		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
2280			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2281			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2282			break;
2283		}
2284	}
2285}
2286
2287/*
2288 * Map a set of physical memory pages into the kernel virtual
2289 * address space. Return a pointer to where it is mapped. This
2290 * routine is intended to be used for mapping device memory,
2291 * NOT real memory.
2292 */
2293void *
2294pmap_mapdev(vm_offset_t pa, vm_size_t size)
2295{
2296	return (void*) IA64_PHYS_TO_RR6(pa);
2297}
2298
2299/*
2300 * 'Unmap' a range mapped by pmap_mapdev().
2301 */
2302void
2303pmap_unmapdev(vm_offset_t va, vm_size_t size)
2304{
2305	return;
2306}
2307
2308/*
2309 * perform the pmap work for mincore
2310 */
2311int
2312pmap_mincore(pmap_t pmap, vm_offset_t addr)
2313{
2314	pmap_t oldpmap;
2315	struct ia64_lpte *pte;
2316	int val = 0;
2317
2318	oldpmap = pmap_install(pmap);
2319	pte = pmap_find_vhpt(addr);
2320	pmap_install(oldpmap);
2321
2322	if (!pte)
2323		return 0;
2324
2325	if (pmap_pte_v(pte)) {
2326		vm_page_t m;
2327		vm_offset_t pa;
2328
2329		val = MINCORE_INCORE;
2330		if ((pte->pte_ig & PTE_IG_MANAGED) == 0)
2331			return val;
2332
2333		pa = pmap_pte_pa(pte);
2334
2335		m = PHYS_TO_VM_PAGE(pa);
2336
2337		/*
2338		 * Modified by us
2339		 */
2340		if (pte->pte_d)
2341			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2342		else {
2343			/*
2344			 * Modified by someone
2345			 */
2346			vm_page_lock_queues();
2347			if (pmap_is_modified(m))
2348				val |= MINCORE_MODIFIED_OTHER;
2349			vm_page_unlock_queues();
2350		}
2351		/*
2352		 * Referenced by us
2353		 */
2354		if (pte->pte_a)
2355			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2356		else {
2357			/*
2358			 * Referenced by someone
2359			 */
2360			vm_page_lock_queues();
2361			if (pmap_ts_referenced(m)) {
2362				val |= MINCORE_REFERENCED_OTHER;
2363				vm_page_flag_set(m, PG_REFERENCED);
2364			}
2365			vm_page_unlock_queues();
2366		}
2367	}
2368	return val;
2369}
2370
2371void
2372pmap_activate(struct thread *td)
2373{
2374	pmap_install(vmspace_pmap(td->td_proc->p_vmspace));
2375}
2376
2377pmap_t
2378pmap_switch(pmap_t pm)
2379{
2380	pmap_t prevpm;
2381	int i;
2382
2383	mtx_assert(&sched_lock, MA_OWNED);
2384
2385	prevpm = PCPU_GET(current_pmap);
2386	if (prevpm == pm)
2387		return (prevpm);
2388	if (prevpm != NULL)
2389		atomic_clear_32(&prevpm->pm_active, PCPU_GET(cpumask));
2390	if (pm == NULL) {
2391		for (i = 0; i < 5; i++) {
2392			ia64_set_rr(IA64_RR_BASE(i),
2393			    (i << 8)|(PAGE_SHIFT << 2)|1);
2394		}
2395	} else {
2396		for (i = 0; i < 5; i++) {
2397			ia64_set_rr(IA64_RR_BASE(i),
2398			    (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2399		}
2400		atomic_set_32(&pm->pm_active, PCPU_GET(cpumask));
2401	}
2402	PCPU_SET(current_pmap, pm);
2403	__asm __volatile("srlz.d");
2404	return (prevpm);
2405}
2406
2407static pmap_t
2408pmap_install(pmap_t pm)
2409{
2410	pmap_t prevpm;
2411
2412	mtx_lock_spin(&sched_lock);
2413	prevpm = pmap_switch(pm);
2414	mtx_unlock_spin(&sched_lock);
2415	return (prevpm);
2416}
2417
2418vm_offset_t
2419pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2420{
2421
2422	return addr;
2423}
2424
2425#include "opt_ddb.h"
2426
2427#ifdef DDB
2428
2429#include <ddb/ddb.h>
2430
2431static const char*	psnames[] = {
2432	"1B",	"2B",	"4B",	"8B",
2433	"16B",	"32B",	"64B",	"128B",
2434	"256B",	"512B",	"1K",	"2K",
2435	"4K",	"8K",	"16K",	"32K",
2436	"64K",	"128K",	"256K",	"512K",
2437	"1M",	"2M",	"4M",	"8M",
2438	"16M",	"32M",	"64M",	"128M",
2439	"256M",	"512M",	"1G",	"2G"
2440};
2441
2442static void
2443print_trs(int type)
2444{
2445	struct ia64_pal_result	res;
2446	int			i, maxtr;
2447	struct {
2448		struct ia64_pte	pte;
2449		struct ia64_itir itir;
2450		struct ia64_ifa ifa;
2451		struct ia64_rr	rr;
2452	}			buf;
2453	static const char*	manames[] = {
2454		"WB",	"bad",	"bad",	"bad",
2455		"UC",	"UCE",	"WC",	"NaT",
2456
2457	};
2458
2459	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2460	if (res.pal_status != 0) {
2461		db_printf("Can't get VM summary\n");
2462		return;
2463	}
2464
2465	if (type == 0)
2466		maxtr = (res.pal_result[0] >> 40) & 0xff;
2467	else
2468		maxtr = (res.pal_result[0] >> 32) & 0xff;
2469
2470	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2471	for (i = 0; i <= maxtr; i++) {
2472		bzero(&buf, sizeof(buf));
2473		res = ia64_call_pal_stacked_physical
2474			(PAL_VM_TR_READ, i, type, ia64_tpa((u_int64_t) &buf));
2475		if (!(res.pal_result[0] & 1))
2476			buf.pte.pte_ar = 0;
2477		if (!(res.pal_result[0] & 2))
2478			buf.pte.pte_pl = 0;
2479		if (!(res.pal_result[0] & 4))
2480			buf.pte.pte_d = 0;
2481		if (!(res.pal_result[0] & 8))
2482			buf.pte.pte_ma = 0;
2483		db_printf(
2484			"%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s %d %06x\n",
2485			buf.ifa.ifa_ig & 1,
2486			buf.rr.rr_rid,
2487			buf.ifa.ifa_vpn,
2488			buf.pte.pte_ppn,
2489			psnames[buf.itir.itir_ps],
2490			buf.pte.pte_ed,
2491			buf.pte.pte_ar,
2492			buf.pte.pte_pl,
2493			buf.pte.pte_d,
2494			buf.pte.pte_a,
2495			manames[buf.pte.pte_ma],
2496			buf.pte.pte_p,
2497			buf.itir.itir_key);
2498	}
2499}
2500
2501DB_COMMAND(itr, db_itr)
2502{
2503	print_trs(0);
2504}
2505
2506DB_COMMAND(dtr, db_dtr)
2507{
2508	print_trs(1);
2509}
2510
2511DB_COMMAND(rr, db_rr)
2512{
2513	int i;
2514	u_int64_t t;
2515	struct ia64_rr rr;
2516
2517	printf("RR RID    PgSz VE\n");
2518	for (i = 0; i < 8; i++) {
2519		__asm __volatile ("mov %0=rr[%1]"
2520				  : "=r"(t)
2521				  : "r"(IA64_RR_BASE(i)));
2522		*(u_int64_t *) &rr = t;
2523		printf("%d  %06x %4s %d\n",
2524		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2525	}
2526}
2527
2528DB_COMMAND(thash, db_thash)
2529{
2530	if (!have_addr)
2531		return;
2532
2533	db_printf("%p\n", (void *) ia64_thash(addr));
2534}
2535
2536DB_COMMAND(ttag, db_ttag)
2537{
2538	if (!have_addr)
2539		return;
2540
2541	db_printf("0x%lx\n", ia64_ttag(addr));
2542}
2543
2544#endif
2545