pmap.c revision 116510
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 * $FreeBSD: head/sys/ia64/ia64/pmap.c 116510 2003-06-18 02:57:38Z alc $
47 */
48
49/*
50 *	Manages physical address maps.
51 *
52 *	In addition to hardware address maps, this
53 *	module is called upon to provide software-use-only
54 *	maps which may or may not be stored in the same
55 *	form as hardware maps.  These pseudo-maps are
56 *	used to store intermediate results from copy
57 *	operations to and from address spaces.
58 *
59 *	Since the information managed by this module is
60 *	also stored by the logical address mapping module,
61 *	this module may throw away valid virtual-to-physical
62 *	mappings at almost any time.  However, invalidations
63 *	of virtual-to-physical mappings must be done as
64 *	requested.
65 *
66 *	In order to cope with hardware architectures which
67 *	make virtual-to-physical map invalidates expensive,
68 *	this module may delay invalidate or reduced protection
69 *	operations until such time as they are actually
70 *	necessary.  This module is given full information as
71 *	to which processors are currently using which maps,
72 *	and to when physical maps must be made correct.
73 */
74
75/*
76 * Following the Linux model, region IDs are allocated in groups of
77 * eight so that a single region ID can be used for as many RRs as we
78 * want by encoding the RR number into the low bits of the ID.
79 *
80 * We reserve region ID 0 for the kernel and allocate the remaining
81 * IDs for user pmaps.
82 *
83 * Region 0..4
84 *	User virtually mapped
85 *
86 * Region 5
87 *	Kernel virtually mapped
88 *
89 * Region 6
90 *	Kernel physically mapped uncacheable
91 *
92 * Region 7
93 *	Kernel physically mapped cacheable
94 */
95
96#include <sys/param.h>
97#include <sys/kernel.h>
98#include <sys/lock.h>
99#include <sys/malloc.h>
100#include <sys/mman.h>
101#include <sys/msgbuf.h>
102#include <sys/mutex.h>
103#include <sys/proc.h>
104#include <sys/sx.h>
105#include <sys/systm.h>
106#include <sys/vmmeter.h>
107#include <sys/smp.h>
108#include <sys/sysctl.h>
109
110#include <vm/vm.h>
111#include <vm/vm_param.h>
112#include <vm/vm_kern.h>
113#include <vm/vm_page.h>
114#include <vm/vm_map.h>
115#include <vm/vm_object.h>
116#include <vm/vm_extern.h>
117#include <vm/vm_pageout.h>
118#include <vm/vm_pager.h>
119#include <vm/uma.h>
120#include <vm/uma_int.h>
121
122#include <sys/user.h>
123
124#include <machine/cpu.h>
125#include <machine/pal.h>
126#include <machine/md_var.h>
127
128/* XXX move to a header. */
129extern u_int64_t ia64_gateway_page[];
130
131MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
132
133#ifndef KSTACK_MAX_PAGES
134#define	KSTACK_MAX_PAGES 32
135#endif
136
137#ifndef PMAP_SHPGPERPROC
138#define PMAP_SHPGPERPROC 200
139#endif
140
141#if defined(DIAGNOSTIC)
142#define PMAP_DIAGNOSTIC
143#endif
144
145#define MINPV 2048	/* Preallocate at least this many */
146#define MAXPV 20480	/* But no more than this */
147
148#if 0
149#define PMAP_DIAGNOSTIC
150#define PMAP_DEBUG
151#endif
152
153#if !defined(PMAP_DIAGNOSTIC)
154#define PMAP_INLINE __inline
155#else
156#define PMAP_INLINE
157#endif
158
159/*
160 * Get PDEs and PTEs for user/kernel address space
161 */
162#define pmap_pte_w(pte)		((pte)->pte_ig & PTE_IG_WIRED)
163#define pmap_pte_managed(pte)	((pte)->pte_ig & PTE_IG_MANAGED)
164#define pmap_pte_v(pte)		((pte)->pte_p)
165#define pmap_pte_pa(pte)	(((pte)->pte_ppn) << 12)
166#define pmap_pte_prot(pte)	(((pte)->pte_ar << 2) | (pte)->pte_pl)
167
168#define pmap_pte_set_w(pte, v) ((v)?((pte)->pte_ig |= PTE_IG_WIRED) \
169				:((pte)->pte_ig &= ~PTE_IG_WIRED))
170#define pmap_pte_set_prot(pte, v) do {		\
171    (pte)->pte_ar = v >> 2;			\
172    (pte)->pte_pl = v & 3;			\
173} while (0)
174
175/*
176 * Given a map and a machine independent protection code,
177 * convert to an ia64 protection code.
178 */
179#define pte_prot(m, p)		(protection_codes[m == kernel_pmap ? 0 : 1][p])
180#define pte_prot_pl(m, p)	(pte_prot(m, p) & 3)
181#define pte_prot_ar(m, p)	(pte_prot(m, p) >> 2)
182int	protection_codes[2][8];
183
184/*
185 * Return non-zero if this pmap is currently active
186 */
187#define pmap_isactive(pmap)	(pmap->pm_active)
188
189/*
190 * Statically allocated kernel pmap
191 */
192struct pmap kernel_pmap_store;
193
194vm_offset_t avail_start;	/* PA of first available physical page */
195vm_offset_t avail_end;		/* PA of last available physical page */
196vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
197vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
198static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
199
200vm_offset_t vhpt_base, vhpt_size;
201struct mtx pmap_vhptmutex;
202
203/*
204 * We use an object to own the kernel's 'page tables'. For simplicity,
205 * we use one page directory to index a set of pages containing
206 * ia64_lptes. This gives us up to 2Gb of kernel virtual space.
207 */
208static int nkpt;
209struct ia64_lpte **ia64_kptdir;
210#define KPTE_DIR_INDEX(va) \
211	((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
212#define KPTE_PTE_INDEX(va) \
213	((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
214#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
215
216vm_offset_t kernel_vm_end;
217
218/* Values for ptc.e. XXX values for SKI. */
219static u_int64_t pmap_ptc_e_base = 0x100000000;
220static u_int64_t pmap_ptc_e_count1 = 3;
221static u_int64_t pmap_ptc_e_count2 = 2;
222static u_int64_t pmap_ptc_e_stride1 = 0x2000;
223static u_int64_t pmap_ptc_e_stride2 = 0x100000000;
224
225/*
226 * Data for the RID allocator
227 */
228static int pmap_ridcount;
229static int pmap_rididx;
230static int pmap_ridmapsz;
231static int pmap_ridmax;
232static u_int64_t *pmap_ridmap;
233struct mtx pmap_ridmutex;
234
235/*
236 * Data for the pv entry allocation mechanism
237 */
238static uma_zone_t pvzone;
239static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
240int pmap_pagedaemon_waken;
241static struct pv_entry *pvbootentries;
242static int pvbootnext, pvbootmax;
243
244/*
245 * Data for allocating PTEs for user processes.
246 */
247static uma_zone_t ptezone;
248
249/*
250 * VHPT instrumentation.
251 */
252static int pmap_vhpt_inserts;
253static int pmap_vhpt_collisions;
254static int pmap_vhpt_resident;
255SYSCTL_DECL(_vm_stats);
256SYSCTL_NODE(_vm_stats, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
257SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
258	   &pmap_vhpt_inserts, 0, "");
259SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, collisions, CTLFLAG_RD,
260	   &pmap_vhpt_collisions, 0, "");
261SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, resident, CTLFLAG_RD,
262	   &pmap_vhpt_resident, 0, "");
263
264static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
265static pv_entry_t get_pv_entry(void);
266static void	ia64_protection_init(void);
267
268static pmap_t	pmap_install(pmap_t);
269static void	pmap_invalidate_all(pmap_t pmap);
270static void	pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m);
271
272vm_offset_t
273pmap_steal_memory(vm_size_t size)
274{
275	vm_size_t bank_size;
276	vm_offset_t pa, va;
277
278	size = round_page(size);
279
280	bank_size = phys_avail[1] - phys_avail[0];
281	while (size > bank_size) {
282		int i;
283		for (i = 0; phys_avail[i+2]; i+= 2) {
284			phys_avail[i] = phys_avail[i+2];
285			phys_avail[i+1] = phys_avail[i+3];
286		}
287		phys_avail[i] = 0;
288		phys_avail[i+1] = 0;
289		if (!phys_avail[0])
290			panic("pmap_steal_memory: out of memory");
291		bank_size = phys_avail[1] - phys_avail[0];
292	}
293
294	pa = phys_avail[0];
295	phys_avail[0] += size;
296
297	va = IA64_PHYS_TO_RR7(pa);
298	bzero((caddr_t) va, size);
299	return va;
300}
301
302/*
303 *	Bootstrap the system enough to run with virtual memory.
304 */
305void
306pmap_bootstrap()
307{
308	int i, j, count, ridbits;
309	struct ia64_pal_result res;
310
311	/*
312	 * Query the PAL Code to find the loop parameters for the
313	 * ptc.e instruction.
314	 */
315	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
316	if (res.pal_status != 0)
317		panic("Can't configure ptc.e parameters");
318	pmap_ptc_e_base = res.pal_result[0];
319	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
320	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
321	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
322	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
323	if (bootverbose)
324		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
325		       "stride1=0x%lx, stride2=0x%lx\n",
326		       pmap_ptc_e_base,
327		       pmap_ptc_e_count1,
328		       pmap_ptc_e_count2,
329		       pmap_ptc_e_stride1,
330		       pmap_ptc_e_stride2);
331
332	/*
333	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
334	 *
335	 * We currently need at least 19 bits in the RID because PID_MAX
336	 * can only be encoded in 17 bits and we need RIDs for 5 regions
337	 * per process. With PID_MAX equalling 99999 this means that we
338	 * need to be able to encode 499995 (=5*PID_MAX).
339	 * The Itanium processor only has 18 bits and the architected
340	 * minimum is exactly that. So, we cannot use a PID based scheme
341	 * in those cases. Enter pmap_ridmap...
342	 * We should avoid the map when running on a processor that has
343	 * implemented enough bits. This means that we should pass the
344	 * process/thread ID to pmap. This we currently don't do, so we
345	 * use the map anyway. However, we don't want to allocate a map
346	 * that is large enough to cover the range dictated by the number
347	 * of bits in the RID, because that may result in a RID map of
348	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
349	 * The bottomline: we create a 32KB map when the processor only
350	 * implements 18 bits (or when we can't figure it out). Otherwise
351	 * we create a 64KB map.
352	 */
353	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
354	if (res.pal_status != 0) {
355		if (bootverbose)
356			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
357		ridbits = 18; /* guaranteed minimum */
358	} else {
359		ridbits = (res.pal_result[1] >> 8) & 0xff;
360		if (bootverbose)
361			printf("Processor supports %d Region ID bits\n",
362			    ridbits);
363	}
364	if (ridbits > 19)
365		ridbits = 19;
366
367	pmap_ridmax = (1 << ridbits);
368	pmap_ridmapsz = pmap_ridmax / 64;
369	pmap_ridmap = (u_int64_t *)pmap_steal_memory(pmap_ridmax / 8);
370	pmap_ridmap[0] |= 0xff;
371	pmap_rididx = 0;
372	pmap_ridcount = 8;
373	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
374
375	/*
376	 * Allocate some memory for initial kernel 'page tables'.
377	 */
378	ia64_kptdir = (void *)pmap_steal_memory(PAGE_SIZE);
379	for (i = 0; i < NKPT; i++) {
380		ia64_kptdir[i] = (void*)pmap_steal_memory(PAGE_SIZE);
381	}
382	nkpt = NKPT;
383	kernel_vm_end = NKPT * PAGE_SIZE * NKPTEPG + VM_MIN_KERNEL_ADDRESS -
384	    VM_GATEWAY_SIZE;
385
386	avail_start = phys_avail[0];
387	for (i = 0; phys_avail[i+2]; i+= 2) ;
388	avail_end = phys_avail[i+1];
389	count = i+2;
390
391	/*
392	 * Figure out a useful size for the VHPT, based on the size of
393	 * physical memory and try to locate a region which is large
394	 * enough to contain the VHPT (which must be a power of two in
395	 * size and aligned to a natural boundary).
396	 * Don't use the difference between avail_start and avail_end
397	 * as a measure for memory size. The address space is often
398	 * enough sparse, causing us to (try to) create a huge VHPT.
399	 */
400	vhpt_size = 15;
401	while ((1<<vhpt_size) < Maxmem * 32)
402		vhpt_size++;
403
404	vhpt_base = 0;
405	while (!vhpt_base) {
406		vm_offset_t mask;
407		if (bootverbose)
408			printf("Trying VHPT size 0x%lx\n", (1L<<vhpt_size));
409		mask = (1L << vhpt_size) - 1;
410		for (i = 0; i < count; i += 2) {
411			vm_offset_t base, limit;
412			base = (phys_avail[i] + mask) & ~mask;
413			limit = base + (1L << vhpt_size);
414			if (limit <= phys_avail[i+1])
415				/*
416				 * VHPT can fit in this region
417				 */
418				break;
419		}
420		if (!phys_avail[i]) {
421			/*
422			 * Can't fit, try next smaller size.
423			 */
424			vhpt_size--;
425		} else {
426			vhpt_base = (phys_avail[i] + mask) & ~mask;
427		}
428	}
429	if (vhpt_size < 15)
430		panic("Can't find space for VHPT");
431
432	if (bootverbose)
433		printf("Putting VHPT at %p\n", (void *) vhpt_base);
434	if (vhpt_base != phys_avail[i]) {
435		/*
436		 * Split this region.
437		 */
438		if (bootverbose)
439			printf("Splitting [%p-%p]\n",
440			       (void *) phys_avail[i],
441			       (void *) phys_avail[i+1]);
442		for (j = count; j > i; j -= 2) {
443			phys_avail[j] = phys_avail[j-2];
444			phys_avail[j+1] = phys_avail[j-2+1];
445		}
446		phys_avail[count+2] = 0;
447		phys_avail[count+3] = 0;
448		phys_avail[i+1] = vhpt_base;
449		phys_avail[i+2] = vhpt_base + (1L << vhpt_size);
450	} else {
451		phys_avail[i] = vhpt_base + (1L << vhpt_size);
452	}
453
454	vhpt_base = IA64_PHYS_TO_RR7(vhpt_base);
455	bzero((void *) vhpt_base, (1L << vhpt_size));
456
457	mtx_init(&pmap_vhptmutex, "VHPT collision chain lock", NULL, MTX_DEF);
458
459	__asm __volatile("mov cr.pta=%0;; srlz.i;;"
460			 :: "r" (vhpt_base + (1<<8) + (vhpt_size<<2) + 1));
461
462	virtual_avail = VM_MIN_KERNEL_ADDRESS;
463	virtual_end = VM_MAX_KERNEL_ADDRESS;
464
465	/*
466	 * Initialize protection array.
467	 */
468	ia64_protection_init();
469
470	/*
471	 * Initialize the kernel pmap (which is statically allocated).
472	 */
473	for (i = 0; i < 5; i++)
474		kernel_pmap->pm_rid[i] = 0;
475	kernel_pmap->pm_active = 1;
476	TAILQ_INIT(&kernel_pmap->pm_pvlist);
477	PCPU_SET(current_pmap, kernel_pmap);
478
479	/*
480	 * Region 5 is mapped via the vhpt.
481	 */
482	ia64_set_rr(IA64_RR_BASE(5),
483		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
484
485	/*
486	 * Region 6 is direct mapped UC and region 7 is direct mapped
487	 * WC. The details of this is controlled by the Alt {I,D}TLB
488	 * handlers. Here we just make sure that they have the largest
489	 * possible page size to minimise TLB usage.
490	 */
491	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (28 << 2));
492	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (28 << 2));
493
494	/*
495	 * Reserve some memory for allocating pvs while bootstrapping
496	 * the pv allocator. We need to have enough to cover mapping
497	 * the kmem_alloc region used to allocate the initial_pvs in
498	 * pmap_init. In general, the size of this region is
499	 * approximately (# physical pages) * (size of pv entry).
500	 */
501	pvbootmax = ((physmem * sizeof(struct pv_entry)) >> PAGE_SHIFT) + 128;
502	pvbootentries = (struct pv_entry *)
503		pmap_steal_memory(pvbootmax * sizeof(struct pv_entry));
504	pvbootnext = 0;
505
506	/*
507	 * Clear out any random TLB entries left over from booting.
508	 */
509	pmap_invalidate_all(kernel_pmap);
510
511	map_gateway_page();
512}
513
514void *
515uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
516{
517	static vm_pindex_t color;
518	vm_page_t m;
519	int pflags;
520	void *va;
521
522	*flags = UMA_SLAB_PRIV;
523	if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT)
524		pflags = VM_ALLOC_INTERRUPT;
525	else
526		pflags = VM_ALLOC_SYSTEM;
527	if (wait & M_ZERO)
528		pflags |= VM_ALLOC_ZERO;
529
530	for (;;) {
531		m = vm_page_alloc(NULL, color++, pflags | VM_ALLOC_NOOBJ);
532		if (m == NULL) {
533			if (wait & M_NOWAIT)
534				return (NULL);
535			else
536				VM_WAIT;
537		} else
538			break;
539	}
540
541	va = (void *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
542	if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
543		bzero(va, PAGE_SIZE);
544	return (va);
545}
546
547void
548uma_small_free(void *mem, int size, u_int8_t flags)
549{
550	vm_page_t m;
551
552	m = PHYS_TO_VM_PAGE(IA64_RR_MASK((u_int64_t)mem));
553	vm_page_lock_queues();
554	vm_page_free(m);
555	vm_page_unlock_queues();
556}
557
558/*
559 *	Initialize the pmap module.
560 *	Called by vm_init, to initialize any structures that the pmap
561 *	system needs to map virtual memory.
562 *	pmap_init has been enhanced to support in a fairly consistant
563 *	way, discontiguous physical memory.
564 */
565void
566pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
567{
568	int i;
569	int initial_pvs;
570
571	/*
572	 * Allocate memory for random pmap data structures.  Includes the
573	 * pv_head_table.
574	 */
575
576	for(i = 0; i < vm_page_array_size; i++) {
577		vm_page_t m;
578
579		m = &vm_page_array[i];
580		TAILQ_INIT(&m->md.pv_list);
581		m->md.pv_list_count = 0;
582 	}
583
584	/*
585	 * Init the pv free list and the PTE free list.
586	 */
587	initial_pvs = vm_page_array_size;
588	if (initial_pvs < MINPV)
589		initial_pvs = MINPV;
590	if (initial_pvs > MAXPV)
591		initial_pvs = MAXPV;
592	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry),
593	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
594	uma_prealloc(pvzone, initial_pvs);
595
596	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
597	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
598	uma_prealloc(ptezone, initial_pvs);
599
600	/*
601	 * Now it is safe to enable pv_table recording.
602	 */
603	pmap_initialized = TRUE;
604}
605
606/*
607 * Initialize the address space (zone) for the pv_entries.  Set a
608 * high water mark so that the system can recover from excessive
609 * numbers of pv entries.
610 */
611void
612pmap_init2()
613{
614	int shpgperproc = PMAP_SHPGPERPROC;
615
616	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
617	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
618	pv_entry_high_water = 9 * (pv_entry_max / 10);
619}
620
621
622/***************************************************
623 * Manipulate TLBs for a pmap
624 ***************************************************/
625
626static void
627pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
628{
629	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
630		("invalidating TLB for non-current pmap"));
631	ia64_ptc_g(va, PAGE_SHIFT << 2);
632}
633
634static void
635pmap_invalidate_all_1(void *arg)
636{
637	u_int64_t addr;
638	int i, j;
639	register_t psr;
640
641	psr = intr_disable();
642	addr = pmap_ptc_e_base;
643	for (i = 0; i < pmap_ptc_e_count1; i++) {
644		for (j = 0; j < pmap_ptc_e_count2; j++) {
645			ia64_ptc_e(addr);
646			addr += pmap_ptc_e_stride2;
647		}
648		addr += pmap_ptc_e_stride1;
649	}
650	intr_restore(psr);
651}
652
653static void
654pmap_invalidate_all(pmap_t pmap)
655{
656	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
657		("invalidating TLB for non-current pmap"));
658
659
660#ifdef SMP
661	smp_rendezvous(0, pmap_invalidate_all_1, 0, 0);
662#else
663	pmap_invalidate_all_1(0);
664#endif
665}
666
667static u_int32_t
668pmap_allocate_rid(void)
669{
670	uint64_t bit, bits;
671	int rid;
672
673	mtx_lock(&pmap_ridmutex);
674	if (pmap_ridcount == pmap_ridmax)
675		panic("pmap_allocate_rid: All Region IDs used");
676
677	/* Find an index with a free bit. */
678	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
679		pmap_rididx++;
680		if (pmap_rididx == pmap_ridmapsz)
681			pmap_rididx = 0;
682	}
683	rid = pmap_rididx * 64;
684
685	/* Find a free bit. */
686	bit = 1UL;
687	while (bits & bit) {
688		rid++;
689		bit <<= 1;
690	}
691
692	pmap_ridmap[pmap_rididx] |= bit;
693	pmap_ridcount++;
694	mtx_unlock(&pmap_ridmutex);
695
696	return rid;
697}
698
699static void
700pmap_free_rid(u_int32_t rid)
701{
702	uint64_t bit;
703	int idx;
704
705	idx = rid / 64;
706	bit = ~(1UL << (rid & 63));
707
708	mtx_lock(&pmap_ridmutex);
709	pmap_ridmap[idx] &= bit;
710	pmap_ridcount--;
711	mtx_unlock(&pmap_ridmutex);
712}
713
714/***************************************************
715 * Low level helper routines.....
716 ***************************************************/
717
718/*
719 * Install a pte into the VHPT
720 */
721static PMAP_INLINE void
722pmap_install_pte(struct ia64_lpte *vhpte, struct ia64_lpte *pte)
723{
724	u_int64_t *vhp, *p;
725
726	vhp = (u_int64_t *)vhpte;
727	p = (u_int64_t *)pte;
728
729	critical_enter();
730
731	/* Invalidate the tag so the VHPT walker will not match this entry. */
732	vhp[2] = 1UL << 63;
733	ia64_mf();
734
735	vhp[0] = p[0];
736	vhp[1] = p[1];
737	ia64_mf();
738
739	/* Install a proper tag now that we're done. */
740	vhp[2] = p[2];
741	ia64_mf();
742
743	critical_exit();
744}
745
746/*
747 * Compare essential parts of pte.
748 */
749static PMAP_INLINE int
750pmap_equal_pte(struct ia64_lpte *pte1, struct ia64_lpte *pte2)
751{
752	return *(u_int64_t *) pte1 == *(u_int64_t *) pte2;
753}
754
755/*
756 * this routine defines the region(s) of memory that should
757 * not be tested for the modified bit.
758 */
759static PMAP_INLINE int
760pmap_track_modified(vm_offset_t va)
761{
762	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
763		return 1;
764	else
765		return 0;
766}
767
768/***************************************************
769 * Page table page management routines.....
770 ***************************************************/
771
772void
773pmap_pinit0(struct pmap *pmap)
774{
775	/* kernel_pmap is the same as any other pmap. */
776	pmap_pinit(pmap);
777}
778
779/*
780 * Initialize a preallocated and zeroed pmap structure,
781 * such as one in a vmspace structure.
782 */
783void
784pmap_pinit(struct pmap *pmap)
785{
786	int i;
787
788	pmap->pm_flags = 0;
789	for (i = 0; i < 5; i++)
790		pmap->pm_rid[i] = 0;
791	pmap->pm_ptphint = NULL;
792	pmap->pm_active = 0;
793	TAILQ_INIT(&pmap->pm_pvlist);
794	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
795}
796
797/*
798 * Wire in kernel global address entries.  To avoid a race condition
799 * between pmap initialization and pmap_growkernel, this procedure
800 * should be called after the vmspace is attached to the process
801 * but before this pmap is activated.
802 */
803void
804pmap_pinit2(struct pmap *pmap)
805{
806	int i;
807
808	for (i = 0; i < 5; i++)
809		pmap->pm_rid[i] = pmap_allocate_rid();
810}
811
812/***************************************************
813 * Pmap allocation/deallocation routines.
814 ***************************************************/
815
816/*
817 * Release any resources held by the given physical map.
818 * Called when a pmap initialized by pmap_pinit is being released.
819 * Should only be called if the map contains no valid mappings.
820 */
821void
822pmap_release(pmap_t pmap)
823{
824	int i;
825
826	for (i = 0; i < 5; i++)
827		if (pmap->pm_rid[i])
828			pmap_free_rid(pmap->pm_rid[i]);
829}
830
831/*
832 * grow the number of kernel page table entries, if needed
833 */
834void
835pmap_growkernel(vm_offset_t addr)
836{
837	struct ia64_lpte *ptepage;
838	vm_page_t nkpg;
839
840	if (kernel_vm_end >= addr)
841		return;
842
843	critical_enter();
844
845	while (kernel_vm_end < addr) {
846		/* We could handle more by increasing the size of kptdir. */
847		if (nkpt == MAXKPT)
848			panic("pmap_growkernel: out of kernel address space");
849
850		nkpg = vm_page_alloc(NULL, nkpt,
851		    VM_ALLOC_NOOBJ | VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
852		if (!nkpg)
853			panic("pmap_growkernel: no memory to grow kernel");
854
855		ptepage = (struct ia64_lpte *)
856		    IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
857		bzero(ptepage, PAGE_SIZE);
858		ia64_kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage;
859
860		nkpt++;
861		kernel_vm_end += PAGE_SIZE * NKPTEPG;
862	}
863
864	critical_exit();
865}
866
867/***************************************************
868 * page management routines.
869 ***************************************************/
870
871/*
872 * free the pv_entry back to the free list
873 */
874static PMAP_INLINE void
875free_pv_entry(pv_entry_t pv)
876{
877	pv_entry_count--;
878	uma_zfree(pvzone, pv);
879}
880
881/*
882 * get a new pv_entry, allocating a block from the system
883 * when needed.
884 * the memory allocation is performed bypassing the malloc code
885 * because of the possibility of allocations at interrupt time.
886 */
887static pv_entry_t
888get_pv_entry(void)
889{
890	pv_entry_count++;
891	if (pv_entry_high_water &&
892		(pv_entry_count > pv_entry_high_water) &&
893		(pmap_pagedaemon_waken == 0)) {
894		pmap_pagedaemon_waken = 1;
895		wakeup (&vm_pages_needed);
896	}
897	return uma_zalloc(pvzone, M_NOWAIT);
898}
899
900/*
901 * Add an ia64_lpte to the VHPT.
902 */
903static void
904pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
905{
906	struct ia64_lpte *vhpte;
907
908	pmap_vhpt_inserts++;
909	pmap_vhpt_resident++;
910
911	vhpte = (struct ia64_lpte *) ia64_thash(va);
912
913	if (vhpte->pte_chain)
914		pmap_vhpt_collisions++;
915
916	mtx_lock(&pmap_vhptmutex);
917
918	pte->pte_chain = vhpte->pte_chain;
919	ia64_mf();
920	vhpte->pte_chain = ia64_tpa((vm_offset_t)pte);
921	ia64_mf();
922
923	if (!vhpte->pte_p && pte->pte_p)
924		pmap_install_pte(vhpte, pte);
925
926	mtx_unlock(&pmap_vhptmutex);
927}
928
929/*
930 * Update VHPT after a pte has changed.
931 */
932static void
933pmap_update_vhpt(struct ia64_lpte *pte, vm_offset_t va)
934{
935	struct ia64_lpte *vhpte;
936
937	vhpte = (struct ia64_lpte *)ia64_thash(va);
938
939	mtx_lock(&pmap_vhptmutex);
940
941	if ((!vhpte->pte_p || vhpte->pte_tag == pte->pte_tag) && pte->pte_p)
942		pmap_install_pte(vhpte, pte);
943
944	mtx_unlock(&pmap_vhptmutex);
945}
946
947/*
948 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
949 * worked or an appropriate error code otherwise.
950 */
951static int
952pmap_remove_vhpt(vm_offset_t va)
953{
954	struct ia64_lpte *pte;
955	struct ia64_lpte *lpte;
956	struct ia64_lpte *vhpte;
957	u_int64_t tag;
958
959	vhpte = (struct ia64_lpte *)ia64_thash(va);
960
961	/*
962	 * If the VHPTE is invalid, there can't be a collision chain.
963	 */
964	if (!vhpte->pte_p) {
965		KASSERT(!vhpte->pte_chain, ("bad vhpte"));
966		return (ENOENT);
967	}
968
969	lpte = vhpte;
970	tag = ia64_ttag(va);
971
972	mtx_lock(&pmap_vhptmutex);
973
974	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(vhpte->pte_chain);
975	KASSERT(pte != NULL, ("foo"));
976
977	while (pte->pte_tag != tag) {
978		lpte = pte;
979		if (pte->pte_chain == 0) {
980			mtx_unlock(&pmap_vhptmutex);
981			return (ENOENT);
982		}
983		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(pte->pte_chain);
984	}
985
986	/* Snip this pv_entry out of the collision chain. */
987	lpte->pte_chain = pte->pte_chain;
988	ia64_mf();
989
990	/*
991	 * If the VHPTE matches as well, change it to map the first
992	 * element from the chain if there is one.
993	 */
994	if (vhpte->pte_tag == tag) {
995		if (vhpte->pte_chain) {
996			pte = (void*)IA64_PHYS_TO_RR7(vhpte->pte_chain);
997			pmap_install_pte(vhpte, pte);
998		} else
999			vhpte->pte_p = 0;
1000	}
1001
1002	mtx_unlock(&pmap_vhptmutex);
1003	pmap_vhpt_resident--;
1004	return (0);
1005}
1006
1007/*
1008 * Find the ia64_lpte for the given va, if any.
1009 */
1010static struct ia64_lpte *
1011pmap_find_vhpt(vm_offset_t va)
1012{
1013	struct ia64_lpte *pte;
1014	u_int64_t tag;
1015
1016	tag = ia64_ttag(va);
1017	pte = (struct ia64_lpte *)ia64_thash(va);
1018	if (pte->pte_chain == 0)
1019		return (NULL);
1020	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(pte->pte_chain);
1021	while (pte->pte_tag != tag) {
1022		if (pte->pte_chain == 0)
1023			return (NULL);
1024		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(pte->pte_chain);
1025	}
1026	return (pte);
1027}
1028
1029/*
1030 * Remove an entry from the list of managed mappings.
1031 */
1032static int
1033pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1034{
1035	if (!pv) {
1036		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1037			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1038				if (pmap == pv->pv_pmap && va == pv->pv_va)
1039					break;
1040			}
1041		} else {
1042			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1043				if (va == pv->pv_va)
1044					break;
1045			}
1046		}
1047	}
1048
1049	if (pv) {
1050		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1051		m->md.pv_list_count--;
1052		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1053			vm_page_flag_clear(m, PG_WRITEABLE);
1054
1055		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1056		free_pv_entry(pv);
1057		return 0;
1058	} else {
1059		return ENOENT;
1060	}
1061}
1062
1063/*
1064 * Create a pv entry for page at pa for
1065 * (pmap, va).
1066 */
1067static void
1068pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1069{
1070	pv_entry_t pv;
1071
1072	pv = get_pv_entry();
1073	pv->pv_pmap = pmap;
1074	pv->pv_va = va;
1075
1076	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1077	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1078	m->md.pv_list_count++;
1079}
1080
1081/*
1082 *	Routine:	pmap_extract
1083 *	Function:
1084 *		Extract the physical page address associated
1085 *		with the given map/virtual_address pair.
1086 */
1087vm_offset_t
1088pmap_extract(pmap, va)
1089	register pmap_t pmap;
1090	vm_offset_t va;
1091{
1092	struct ia64_lpte *pte;
1093	pmap_t oldpmap;
1094
1095	if (!pmap)
1096		return 0;
1097
1098	oldpmap = pmap_install(pmap);
1099	pte = pmap_find_vhpt(va);
1100	pmap_install(oldpmap);
1101
1102	if (!pte)
1103		return 0;
1104
1105	return pmap_pte_pa(pte);
1106}
1107
1108/***************************************************
1109 * Low level mapping routines.....
1110 ***************************************************/
1111
1112/*
1113 * Find the kernel lpte for mapping the given virtual address, which
1114 * must be in the part of region 5 which we can cover with our kernel
1115 * 'page tables'.
1116 */
1117static struct ia64_lpte *
1118pmap_find_kpte(vm_offset_t va)
1119{
1120	KASSERT((va >> 61) == 5,
1121		("kernel mapping 0x%lx not in region 5", va));
1122	KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG),
1123		("kernel mapping 0x%lx out of range", va));
1124	return (&ia64_kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)]);
1125}
1126
1127/*
1128 * Find a pte suitable for mapping a user-space address. If one exists
1129 * in the VHPT, that one will be returned, otherwise a new pte is
1130 * allocated.
1131 */
1132static struct ia64_lpte *
1133pmap_find_pte(vm_offset_t va)
1134{
1135	struct ia64_lpte *pte;
1136
1137	if (va >= VM_MAXUSER_ADDRESS)
1138		return pmap_find_kpte(va);
1139
1140	pte = pmap_find_vhpt(va);
1141	if (!pte) {
1142		pte = uma_zalloc(ptezone, M_WAITOK);
1143		pte->pte_p = 0;
1144	}
1145	return pte;
1146}
1147
1148/*
1149 * Free a pte which is now unused. This simply returns it to the zone
1150 * allocator if it is a user mapping. For kernel mappings, clear the
1151 * valid bit to make it clear that the mapping is not currently used.
1152 */
1153static void
1154pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1155{
1156	if (va < VM_MAXUSER_ADDRESS)
1157		uma_zfree(ptezone, pte);
1158	else
1159		pte->pte_p = 0;
1160}
1161
1162/*
1163 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1164 * the pte was orginally valid, then its assumed to already be in the
1165 * VHPT.
1166 */
1167static void
1168pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1169	     int ig, int pl, int ar)
1170{
1171	int wasvalid = pte->pte_p;
1172
1173	pte->pte_p = 1;
1174	pte->pte_ma = PTE_MA_WB;
1175	if (ig & PTE_IG_MANAGED) {
1176		pte->pte_a = 0;
1177		pte->pte_d = 0;
1178	} else {
1179		pte->pte_a = 1;
1180		pte->pte_d = 1;
1181	}
1182	pte->pte_pl = pl;
1183	pte->pte_ar = ar;
1184	pte->pte_ppn = pa >> 12;
1185	pte->pte_ed = 0;
1186	pte->pte_ig = ig;
1187
1188	pte->pte_ps = PAGE_SHIFT;
1189	pte->pte_key = 0;
1190
1191	pte->pte_tag = ia64_ttag(va);
1192
1193	if (wasvalid) {
1194		pmap_update_vhpt(pte, va);
1195	} else {
1196		pmap_enter_vhpt(pte, va);
1197	}
1198}
1199
1200/*
1201 * If a pte contains a valid mapping, clear it and update the VHPT.
1202 */
1203static void
1204pmap_clear_pte(struct ia64_lpte *pte, vm_offset_t va)
1205{
1206	if (pte->pte_p) {
1207		pmap_remove_vhpt(va);
1208		ia64_ptc_g(va, PAGE_SHIFT << 2);
1209		pte->pte_p = 0;
1210	}
1211}
1212
1213/*
1214 * Remove the (possibly managed) mapping represented by pte from the
1215 * given pmap.
1216 */
1217static int
1218pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1219		pv_entry_t pv, int freepte)
1220{
1221	int error;
1222	vm_page_t m;
1223
1224	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1225		("removing pte for non-current pmap"));
1226
1227	/*
1228	 * First remove from the VHPT.
1229	 */
1230	error = pmap_remove_vhpt(va);
1231	if (error)
1232		return error;
1233
1234	/*
1235	 * Make sure pmap_set_pte() knows it isn't in the VHPT.
1236	 */
1237	pte->pte_p = 0;
1238
1239	if (pte->pte_ig & PTE_IG_WIRED)
1240		pmap->pm_stats.wired_count -= 1;
1241
1242	pmap->pm_stats.resident_count -= 1;
1243	if (pte->pte_ig & PTE_IG_MANAGED) {
1244		m = PHYS_TO_VM_PAGE(pmap_pte_pa(pte));
1245		if (pte->pte_d)
1246			if (pmap_track_modified(va))
1247				vm_page_dirty(m);
1248		if (pte->pte_a)
1249			vm_page_flag_set(m, PG_REFERENCED);
1250
1251		if (freepte)
1252			pmap_free_pte(pte, va);
1253		return pmap_remove_entry(pmap, m, va, pv);
1254	} else {
1255		if (freepte)
1256			pmap_free_pte(pte, va);
1257		return 0;
1258	}
1259}
1260
1261/*
1262 * Extract the physical page address associated with a kernel
1263 * virtual address.
1264 */
1265vm_paddr_t
1266pmap_kextract(vm_offset_t va)
1267{
1268	struct ia64_lpte *pte;
1269	vm_offset_t gwpage;
1270
1271	KASSERT(va >= IA64_RR_BASE(5), ("Must be kernel VA"));
1272
1273	/* Regions 6 and 7 are direct mapped. */
1274	if (va >= IA64_RR_BASE(6))
1275		return (IA64_RR_MASK(va));
1276
1277	/* EPC gateway page? */
1278	gwpage = (vm_offset_t)ia64_get_k5();
1279	if (va >= gwpage && va < gwpage + VM_GATEWAY_SIZE)
1280		return (IA64_RR_MASK((vm_offset_t)ia64_gateway_page));
1281
1282	/* Bail out if the virtual address is beyond our limits. */
1283	if (IA64_RR_MASK(va) >= nkpt * PAGE_SIZE * NKPTEPG)
1284		return (0);
1285
1286	pte = pmap_find_kpte(va);
1287	if (!pte->pte_p)
1288		return (0);
1289	return ((pte->pte_ppn << 12) | (va & PAGE_MASK));
1290}
1291
1292/*
1293 * Add a list of wired pages to the kva
1294 * this routine is only used for temporary
1295 * kernel mappings that do not need to have
1296 * page modification or references recorded.
1297 * Note that old mappings are simply written
1298 * over.  The page *must* be wired.
1299 */
1300void
1301pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1302{
1303	int i;
1304	struct ia64_lpte *pte;
1305
1306	for (i = 0; i < count; i++) {
1307		vm_offset_t tva = va + i * PAGE_SIZE;
1308		int wasvalid;
1309		pte = pmap_find_kpte(tva);
1310		wasvalid = pte->pte_p;
1311		pmap_set_pte(pte, tva, VM_PAGE_TO_PHYS(m[i]),
1312			     0, PTE_PL_KERN, PTE_AR_RWX);
1313		if (wasvalid)
1314			ia64_ptc_g(tva, PAGE_SHIFT << 2);
1315	}
1316}
1317
1318/*
1319 * this routine jerks page mappings from the
1320 * kernel -- it is meant only for temporary mappings.
1321 */
1322void
1323pmap_qremove(vm_offset_t va, int count)
1324{
1325	int i;
1326	struct ia64_lpte *pte;
1327
1328	for (i = 0; i < count; i++) {
1329		pte = pmap_find_kpte(va);
1330		pmap_clear_pte(pte, va);
1331		va += PAGE_SIZE;
1332	}
1333}
1334
1335/*
1336 * Add a wired page to the kva.
1337 */
1338void
1339pmap_kenter(vm_offset_t va, vm_offset_t pa)
1340{
1341	struct ia64_lpte *pte;
1342	int wasvalid;
1343
1344	pte = pmap_find_kpte(va);
1345	wasvalid = pte->pte_p;
1346	pmap_set_pte(pte, va, pa, 0, PTE_PL_KERN, PTE_AR_RWX);
1347	if (wasvalid)
1348		ia64_ptc_g(va, PAGE_SHIFT << 2);
1349}
1350
1351/*
1352 * Remove a page from the kva
1353 */
1354void
1355pmap_kremove(vm_offset_t va)
1356{
1357	struct ia64_lpte *pte;
1358
1359	pte = pmap_find_kpte(va);
1360	pmap_clear_pte(pte, va);
1361}
1362
1363/*
1364 *	Used to map a range of physical addresses into kernel
1365 *	virtual address space.
1366 *
1367 *	The value passed in '*virt' is a suggested virtual address for
1368 *	the mapping. Architectures which can support a direct-mapped
1369 *	physical to virtual region can return the appropriate address
1370 *	within that region, leaving '*virt' unchanged. Other
1371 *	architectures should map the pages starting at '*virt' and
1372 *	update '*virt' with the first usable address after the mapped
1373 *	region.
1374 */
1375vm_offset_t
1376pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1377{
1378	return IA64_PHYS_TO_RR7(start);
1379}
1380
1381/*
1382 * Remove a single page from a process address space
1383 */
1384static void
1385pmap_remove_page(pmap_t pmap, vm_offset_t va)
1386{
1387	struct ia64_lpte *pte;
1388
1389	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1390		("removing page for non-current pmap"));
1391
1392	pte = pmap_find_vhpt(va);
1393	if (pte) {
1394		pmap_remove_pte(pmap, pte, va, 0, 1);
1395		pmap_invalidate_page(pmap, va);
1396	}
1397	return;
1398}
1399
1400/*
1401 *	Remove the given range of addresses from the specified map.
1402 *
1403 *	It is assumed that the start and end are properly
1404 *	rounded to the page size.
1405 */
1406void
1407pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1408{
1409	pmap_t oldpmap;
1410	vm_offset_t va;
1411	pv_entry_t pv;
1412	struct ia64_lpte *pte;
1413
1414	if (pmap == NULL)
1415		return;
1416
1417	if (pmap->pm_stats.resident_count == 0)
1418		return;
1419
1420	oldpmap = pmap_install(pmap);
1421
1422	/*
1423	 * special handling of removing one page.  a very
1424	 * common operation and easy to short circuit some
1425	 * code.
1426	 */
1427	if (sva + PAGE_SIZE == eva) {
1428		pmap_remove_page(pmap, sva);
1429		pmap_install(oldpmap);
1430		return;
1431	}
1432
1433	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1434		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1435			va = pv->pv_va;
1436			if (va >= sva && va < eva) {
1437				pte = pmap_find_vhpt(va);
1438				KASSERT(pte != NULL, ("pte"));
1439				pmap_remove_pte(pmap, pte, va, pv, 1);
1440				pmap_invalidate_page(pmap, va);
1441			}
1442		}
1443
1444	} else {
1445		for (va = sva; va < eva; va = va += PAGE_SIZE) {
1446			pte = pmap_find_vhpt(va);
1447			if (pte) {
1448				pmap_remove_pte(pmap, pte, va, 0, 1);
1449				pmap_invalidate_page(pmap, va);
1450			}
1451		}
1452	}
1453
1454	pmap_install(oldpmap);
1455}
1456
1457/*
1458 *	Routine:	pmap_remove_all
1459 *	Function:
1460 *		Removes this physical page from
1461 *		all physical maps in which it resides.
1462 *		Reflects back modify bits to the pager.
1463 *
1464 *	Notes:
1465 *		Original versions of this routine were very
1466 *		inefficient because they iteratively called
1467 *		pmap_remove (slow...)
1468 */
1469
1470void
1471pmap_remove_all(vm_page_t m)
1472{
1473	pmap_t oldpmap;
1474	pv_entry_t pv;
1475	int s;
1476
1477#if defined(PMAP_DIAGNOSTIC)
1478	/*
1479	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1480	 * pages!
1481	 */
1482	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1483		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1484	}
1485#endif
1486
1487	s = splvm();
1488
1489	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1490		struct ia64_lpte *pte;
1491		pmap_t pmap = pv->pv_pmap;
1492		vm_offset_t va = pv->pv_va;
1493
1494		oldpmap = pmap_install(pmap);
1495		pte = pmap_find_vhpt(va);
1496		KASSERT(pte != NULL, ("pte"));
1497		if (pmap_pte_pa(pte) != VM_PAGE_TO_PHYS(m))
1498			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1499		pmap_remove_pte(pmap, pte, va, pv, 1);
1500		pmap_invalidate_page(pmap, va);
1501		pmap_install(oldpmap);
1502	}
1503
1504	vm_page_flag_clear(m, PG_WRITEABLE);
1505
1506	splx(s);
1507	return;
1508}
1509
1510/*
1511 *	Set the physical protection on the
1512 *	specified range of this map as requested.
1513 */
1514void
1515pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1516{
1517	pmap_t oldpmap;
1518	struct ia64_lpte *pte;
1519	int newprot;
1520
1521	if (pmap == NULL)
1522		return;
1523
1524	oldpmap = pmap_install(pmap);
1525
1526	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1527		pmap_remove(pmap, sva, eva);
1528		pmap_install(oldpmap);
1529		return;
1530	}
1531
1532	if (prot & VM_PROT_WRITE) {
1533		pmap_install(oldpmap);
1534		return;
1535	}
1536
1537	newprot = pte_prot(pmap, prot);
1538
1539	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1540		panic("pmap_protect: unaligned addresses");
1541
1542	while (sva < eva) {
1543		/*
1544		 * If page is invalid, skip this page
1545		 */
1546		pte = pmap_find_vhpt(sva);
1547		if (!pte) {
1548			sva += PAGE_SIZE;
1549			continue;
1550		}
1551
1552		if (pmap_pte_prot(pte) != newprot) {
1553			if (pte->pte_ig & PTE_IG_MANAGED) {
1554				vm_offset_t pa = pmap_pte_pa(pte);
1555				vm_page_t m = PHYS_TO_VM_PAGE(pa);
1556				if (pte->pte_d) {
1557					if (pmap_track_modified(sva))
1558						vm_page_dirty(m);
1559					pte->pte_d = 0;
1560				}
1561				if (pte->pte_a) {
1562					vm_page_flag_set(m, PG_REFERENCED);
1563					pte->pte_a = 0;
1564				}
1565			}
1566			pmap_pte_set_prot(pte, newprot);
1567			pmap_update_vhpt(pte, sva);
1568			pmap_invalidate_page(pmap, sva);
1569		}
1570
1571		sva += PAGE_SIZE;
1572	}
1573	pmap_install(oldpmap);
1574}
1575
1576/*
1577 *	Insert the given physical page (p) at
1578 *	the specified virtual address (v) in the
1579 *	target physical map with the protection requested.
1580 *
1581 *	If specified, the page will be wired down, meaning
1582 *	that the related pte can not be reclaimed.
1583 *
1584 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1585 *	or lose information.  That is, this routine must actually
1586 *	insert this page into the given map NOW.
1587 */
1588void
1589pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1590	   boolean_t wired)
1591{
1592	pmap_t oldpmap;
1593	vm_offset_t pa;
1594	vm_offset_t opa;
1595	struct ia64_lpte origpte;
1596	struct ia64_lpte *pte;
1597	int managed;
1598
1599	if (pmap == NULL)
1600		return;
1601
1602	oldpmap = pmap_install(pmap);
1603
1604	va &= ~PAGE_MASK;
1605#ifdef PMAP_DIAGNOSTIC
1606	if (va > VM_MAX_KERNEL_ADDRESS)
1607		panic("pmap_enter: toobig");
1608#endif
1609
1610	/*
1611	 * Find (or create) a pte for the given mapping.
1612	 */
1613	pte = pmap_find_pte(va);
1614	origpte = *pte;
1615
1616	if (origpte.pte_p)
1617		opa = pmap_pte_pa(&origpte);
1618	else
1619		opa = 0;
1620	managed = 0;
1621
1622	pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK;
1623
1624	/*
1625	 * Mapping has not changed, must be protection or wiring change.
1626	 */
1627	if (origpte.pte_p && (opa == pa)) {
1628		/*
1629		 * Wiring change, just update stats. We don't worry about
1630		 * wiring PT pages as they remain resident as long as there
1631		 * are valid mappings in them. Hence, if a user page is wired,
1632		 * the PT page will be also.
1633		 */
1634		if (wired && ((origpte.pte_ig & PTE_IG_WIRED) == 0))
1635			pmap->pm_stats.wired_count++;
1636		else if (!wired && (origpte.pte_ig & PTE_IG_WIRED))
1637			pmap->pm_stats.wired_count--;
1638
1639		/*
1640		 * We might be turning off write access to the page,
1641		 * so we go ahead and sense modify status.
1642		 */
1643		if (origpte.pte_ig & PTE_IG_MANAGED) {
1644			if (origpte.pte_d && pmap_track_modified(va)) {
1645				vm_page_t om;
1646				om = PHYS_TO_VM_PAGE(opa);
1647				vm_page_dirty(om);
1648			}
1649		}
1650
1651		managed = origpte.pte_ig & PTE_IG_MANAGED;
1652		goto validate;
1653	}
1654	/*
1655	 * Mapping has changed, invalidate old range and fall
1656	 * through to handle validating new mapping.
1657	 */
1658	if (opa) {
1659		int error;
1660		vm_page_lock_queues();
1661		error = pmap_remove_pte(pmap, pte, va, 0, 0);
1662		vm_page_unlock_queues();
1663		if (error)
1664			panic("pmap_enter: pte vanished, va: 0x%lx", va);
1665	}
1666
1667	/*
1668	 * Enter on the PV list if part of our managed memory.
1669	 */
1670	if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
1671		pmap_insert_entry(pmap, va, m);
1672		managed |= PTE_IG_MANAGED;
1673	}
1674
1675	/*
1676	 * Increment counters
1677	 */
1678	pmap->pm_stats.resident_count++;
1679	if (wired)
1680		pmap->pm_stats.wired_count++;
1681
1682validate:
1683
1684	/*
1685	 * Now validate mapping with desired protection/wiring. This
1686	 * adds the pte to the VHPT if necessary.
1687	 */
1688	pmap_set_pte(pte, va, pa, managed | (wired ? PTE_IG_WIRED : 0),
1689		     pte_prot_pl(pmap, prot), pte_prot_ar(pmap, prot));
1690
1691	/*
1692	 * if the mapping or permission bits are different, we need
1693	 * to invalidate the page.
1694	 */
1695	if (!pmap_equal_pte(&origpte, pte))
1696		pmap_invalidate_page(pmap, va);
1697
1698	pmap_install(oldpmap);
1699}
1700
1701/*
1702 * this code makes some *MAJOR* assumptions:
1703 * 1. Current pmap & pmap exists.
1704 * 2. Not wired.
1705 * 3. Read access.
1706 * 4. No page table pages.
1707 * 5. Tlbflush is deferred to calling procedure.
1708 * 6. Page IS managed.
1709 * but is *MUCH* faster than pmap_enter...
1710 */
1711
1712static void
1713pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
1714{
1715	struct ia64_lpte *pte;
1716	pmap_t oldpmap;
1717
1718	oldpmap = pmap_install(pmap);
1719
1720	pte = pmap_find_pte(va);
1721	if (pte->pte_p)
1722		return;
1723
1724	/*
1725	 * Enter on the PV list since its part of our managed memory.
1726	 */
1727	pmap_insert_entry(pmap, va, m);
1728
1729	/*
1730	 * Increment counters
1731	 */
1732	pmap->pm_stats.resident_count++;
1733
1734	/*
1735	 * Initialise PTE with read-only protection and enter into VHPT.
1736	 */
1737	pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m),
1738		     PTE_IG_MANAGED,
1739		     PTE_PL_USER, PTE_AR_R);
1740
1741	pmap_install(oldpmap);
1742}
1743
1744/*
1745 * Make temporary mapping for a physical address. This is called
1746 * during dump.
1747 */
1748void *
1749pmap_kenter_temporary(vm_offset_t pa, int i)
1750{
1751	return (void *) IA64_PHYS_TO_RR7(pa - (i * PAGE_SIZE));
1752}
1753
1754#define MAX_INIT_PT (96)
1755/*
1756 * pmap_object_init_pt preloads the ptes for a given object
1757 * into the specified pmap.  This eliminates the blast of soft
1758 * faults on process startup and immediately after an mmap.
1759 */
1760void
1761pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1762		    vm_object_t object, vm_pindex_t pindex,
1763		    vm_size_t size, int limit)
1764{
1765	pmap_t oldpmap;
1766	vm_offset_t tmpidx;
1767	int psize;
1768	vm_page_t p;
1769	int objpgs;
1770
1771	if (pmap == NULL || object == NULL)
1772		return;
1773
1774	oldpmap = pmap_install(pmap);
1775
1776	psize = ia64_btop(size);
1777
1778	if ((object->type != OBJT_VNODE) ||
1779		((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
1780			(object->resident_page_count > MAX_INIT_PT))) {
1781		pmap_install(oldpmap);
1782		return;
1783	}
1784
1785	if (psize + pindex > object->size) {
1786		if (object->size < pindex)
1787			return;
1788		psize = object->size - pindex;
1789	}
1790
1791	/*
1792	 * if we are processing a major portion of the object, then scan the
1793	 * entire thing.
1794	 */
1795	if (psize > (object->resident_page_count >> 2)) {
1796		objpgs = psize;
1797
1798		for (p = TAILQ_FIRST(&object->memq);
1799		    ((objpgs > 0) && (p != NULL));
1800		    p = TAILQ_NEXT(p, listq)) {
1801
1802			tmpidx = p->pindex;
1803			if (tmpidx < pindex) {
1804				continue;
1805			}
1806			tmpidx -= pindex;
1807			if (tmpidx >= psize) {
1808				continue;
1809			}
1810			/*
1811			 * don't allow an madvise to blow away our really
1812			 * free pages allocating pv entries.
1813			 */
1814			if ((limit & MAP_PREFAULT_MADVISE) &&
1815			    cnt.v_free_count < cnt.v_free_reserved) {
1816				break;
1817			}
1818			vm_page_lock_queues();
1819			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1820				(p->busy == 0) &&
1821			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1822				if ((p->queue - p->pc) == PQ_CACHE)
1823					vm_page_deactivate(p);
1824				vm_page_busy(p);
1825				vm_page_unlock_queues();
1826				pmap_enter_quick(pmap,
1827						 addr + ia64_ptob(tmpidx), p);
1828				vm_page_lock_queues();
1829				vm_page_wakeup(p);
1830			}
1831			vm_page_unlock_queues();
1832			objpgs -= 1;
1833		}
1834	} else {
1835		/*
1836		 * else lookup the pages one-by-one.
1837		 */
1838		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
1839			/*
1840			 * don't allow an madvise to blow away our really
1841			 * free pages allocating pv entries.
1842			 */
1843			if ((limit & MAP_PREFAULT_MADVISE) &&
1844			    cnt.v_free_count < cnt.v_free_reserved) {
1845				break;
1846			}
1847			p = vm_page_lookup(object, tmpidx + pindex);
1848			if (p == NULL)
1849				continue;
1850			vm_page_lock_queues();
1851			if ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL &&
1852				(p->busy == 0) &&
1853			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1854				if ((p->queue - p->pc) == PQ_CACHE)
1855					vm_page_deactivate(p);
1856				vm_page_busy(p);
1857				vm_page_unlock_queues();
1858				pmap_enter_quick(pmap,
1859						 addr + ia64_ptob(tmpidx), p);
1860				vm_page_lock_queues();
1861				vm_page_wakeup(p);
1862			}
1863			vm_page_unlock_queues();
1864		}
1865	}
1866	pmap_install(oldpmap);
1867	return;
1868}
1869
1870/*
1871 * pmap_prefault provides a quick way of clustering
1872 * pagefaults into a processes address space.  It is a "cousin"
1873 * of pmap_object_init_pt, except it runs at page fault time instead
1874 * of mmap time.
1875 */
1876#define PFBAK 4
1877#define PFFOR 4
1878#define PAGEORDER_SIZE (PFBAK+PFFOR)
1879
1880static int pmap_prefault_pageorder[] = {
1881	-1 * PAGE_SIZE, 1 * PAGE_SIZE,
1882	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
1883	-3 * PAGE_SIZE, 3 * PAGE_SIZE,
1884	-4 * PAGE_SIZE, 4 * PAGE_SIZE
1885};
1886
1887void
1888pmap_prefault(pmap, addra, entry)
1889	pmap_t pmap;
1890	vm_offset_t addra;
1891	vm_map_entry_t entry;
1892{
1893	int i;
1894	vm_offset_t starta;
1895	vm_offset_t addr;
1896	vm_pindex_t pindex;
1897	vm_page_t m, mpte;
1898	vm_object_t object;
1899
1900	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)))
1901		return;
1902
1903	object = entry->object.vm_object;
1904
1905	starta = addra - PFBAK * PAGE_SIZE;
1906	if (starta < entry->start) {
1907		starta = entry->start;
1908	} else if (starta > addra) {
1909		starta = 0;
1910	}
1911
1912	mpte = NULL;
1913	for (i = 0; i < PAGEORDER_SIZE; i++) {
1914		vm_object_t lobject;
1915		struct ia64_lpte *pte;
1916
1917		addr = addra + pmap_prefault_pageorder[i];
1918		if (addr > addra + (PFFOR * PAGE_SIZE))
1919			addr = 0;
1920
1921		if (addr < starta || addr >= entry->end)
1922			continue;
1923
1924		pte = pmap_find_vhpt(addr);
1925		if (pte && pte->pte_p)
1926			continue;
1927
1928		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
1929		lobject = object;
1930		for (m = vm_page_lookup(lobject, pindex);
1931		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
1932		    lobject = lobject->backing_object) {
1933			if (lobject->backing_object_offset & PAGE_MASK)
1934				break;
1935			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
1936			m = vm_page_lookup(lobject->backing_object, pindex);
1937		}
1938
1939		/*
1940		 * give-up when a page is not in memory
1941		 */
1942		if (m == NULL)
1943			break;
1944		vm_page_lock_queues();
1945		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1946			(m->busy == 0) &&
1947		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1948
1949			if ((m->queue - m->pc) == PQ_CACHE) {
1950				vm_page_deactivate(m);
1951			}
1952			vm_page_busy(m);
1953			vm_page_unlock_queues();
1954			pmap_enter_quick(pmap, addr, m);
1955			vm_page_lock_queues();
1956			vm_page_wakeup(m);
1957		}
1958		vm_page_unlock_queues();
1959	}
1960}
1961
1962/*
1963 *	Routine:	pmap_change_wiring
1964 *	Function:	Change the wiring attribute for a map/virtual-address
1965 *			pair.
1966 *	In/out conditions:
1967 *			The mapping must already exist in the pmap.
1968 */
1969void
1970pmap_change_wiring(pmap, va, wired)
1971	register pmap_t pmap;
1972	vm_offset_t va;
1973	boolean_t wired;
1974{
1975	pmap_t oldpmap;
1976	struct ia64_lpte *pte;
1977
1978	if (pmap == NULL)
1979		return;
1980
1981	oldpmap = pmap_install(pmap);
1982
1983	pte = pmap_find_vhpt(va);
1984	KASSERT(pte != NULL, ("pte"));
1985	if (wired && !pmap_pte_w(pte))
1986		pmap->pm_stats.wired_count++;
1987	else if (!wired && pmap_pte_w(pte))
1988		pmap->pm_stats.wired_count--;
1989
1990	/*
1991	 * Wiring is not a hardware characteristic so there is no need to
1992	 * invalidate TLB.
1993	 */
1994	pmap_pte_set_w(pte, wired);
1995
1996	pmap_install(oldpmap);
1997}
1998
1999
2000
2001/*
2002 *	Copy the range specified by src_addr/len
2003 *	from the source map to the range dst_addr/len
2004 *	in the destination map.
2005 *
2006 *	This routine is only advisory and need not do anything.
2007 */
2008
2009void
2010pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2011	  vm_offset_t src_addr)
2012{
2013}
2014
2015
2016/*
2017 *	pmap_zero_page zeros the specified hardware page by
2018 *	mapping it into virtual memory and using bzero to clear
2019 *	its contents.
2020 */
2021
2022void
2023pmap_zero_page(vm_page_t m)
2024{
2025	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2026	bzero((caddr_t) va, PAGE_SIZE);
2027}
2028
2029
2030/*
2031 *	pmap_zero_page_area zeros the specified hardware page by
2032 *	mapping it into virtual memory and using bzero to clear
2033 *	its contents.
2034 *
2035 *	off and size must reside within a single page.
2036 */
2037
2038void
2039pmap_zero_page_area(vm_page_t m, int off, int size)
2040{
2041	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2042	bzero((char *)(caddr_t)va + off, size);
2043}
2044
2045
2046/*
2047 *	pmap_zero_page_idle zeros the specified hardware page by
2048 *	mapping it into virtual memory and using bzero to clear
2049 *	its contents.  This is for the vm_idlezero process.
2050 */
2051
2052void
2053pmap_zero_page_idle(vm_page_t m)
2054{
2055	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2056	bzero((caddr_t) va, PAGE_SIZE);
2057}
2058
2059
2060/*
2061 *	pmap_copy_page copies the specified (machine independent)
2062 *	page by mapping the page into virtual memory and using
2063 *	bcopy to copy the page, one machine dependent page at a
2064 *	time.
2065 */
2066void
2067pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2068{
2069	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
2070	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
2071	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
2072}
2073
2074/*
2075 * Returns true if the pmap's pv is one of the first
2076 * 16 pvs linked to from this page.  This count may
2077 * be changed upwards or downwards in the future; it
2078 * is only necessary that true be returned for a small
2079 * subset of pmaps for proper page aging.
2080 */
2081boolean_t
2082pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2083{
2084	pv_entry_t pv;
2085	int loops = 0;
2086	int s;
2087
2088	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2089		return FALSE;
2090
2091	s = splvm();
2092
2093	/*
2094	 * Not found, check current mappings returning immediately if found.
2095	 */
2096	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2097		if (pv->pv_pmap == pmap) {
2098			splx(s);
2099			return TRUE;
2100		}
2101		loops++;
2102		if (loops >= 16)
2103			break;
2104	}
2105	splx(s);
2106	return (FALSE);
2107}
2108
2109#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2110/*
2111 * Remove all pages from specified address space
2112 * this aids process exit speeds.  Also, this code
2113 * is special cased for current process only, but
2114 * can have the more generic (and slightly slower)
2115 * mode enabled.  This is much faster than pmap_remove
2116 * in the case of running down an entire address space.
2117 */
2118void
2119pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2120{
2121	pv_entry_t pv, npv;
2122	int s;
2123
2124#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2125	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
2126		printf("warning: pmap_remove_pages called with non-current pmap\n");
2127		return;
2128	}
2129#endif
2130
2131	s = splvm();
2132	for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
2133		pv;
2134		pv = npv) {
2135		struct ia64_lpte *pte;
2136
2137		npv = TAILQ_NEXT(pv, pv_plist);
2138
2139		if (pv->pv_va >= eva || pv->pv_va < sva) {
2140			continue;
2141		}
2142
2143		pte = pmap_find_vhpt(pv->pv_va);
2144		KASSERT(pte != NULL, ("pte"));
2145		if (pte->pte_ig & PTE_IG_WIRED)
2146			continue;
2147
2148		pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
2149	}
2150	splx(s);
2151
2152	pmap_invalidate_all(pmap);
2153}
2154
2155/*
2156 *      pmap_page_protect:
2157 *
2158 *      Lower the permission for all mappings to a given page.
2159 */
2160void
2161pmap_page_protect(vm_page_t m, vm_prot_t prot)
2162{
2163	pv_entry_t pv;
2164
2165	if ((prot & VM_PROT_WRITE) != 0)
2166		return;
2167	if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
2168		if ((m->flags & PG_WRITEABLE) == 0)
2169			return;
2170		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2171			int newprot = pte_prot(pv->pv_pmap, prot);
2172			pmap_t oldpmap = pmap_install(pv->pv_pmap);
2173			struct ia64_lpte *pte;
2174			pte = pmap_find_vhpt(pv->pv_va);
2175			KASSERT(pte != NULL, ("pte"));
2176			pmap_pte_set_prot(pte, newprot);
2177			pmap_update_vhpt(pte, pv->pv_va);
2178			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2179			pmap_install(oldpmap);
2180		}
2181		vm_page_flag_clear(m, PG_WRITEABLE);
2182	} else {
2183		pmap_remove_all(m);
2184	}
2185}
2186
2187/*
2188 *	pmap_ts_referenced:
2189 *
2190 *	Return a count of reference bits for a page, clearing those bits.
2191 *	It is not necessary for every reference bit to be cleared, but it
2192 *	is necessary that 0 only be returned when there are truly no
2193 *	reference bits set.
2194 *
2195 *	XXX: The exact number of bits to check and clear is a matter that
2196 *	should be tested and standardized at some point in the future for
2197 *	optimal aging of shared pages.
2198 */
2199int
2200pmap_ts_referenced(vm_page_t m)
2201{
2202	pv_entry_t pv;
2203	int count = 0;
2204
2205	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2206		return 0;
2207
2208	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2209		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2210		struct ia64_lpte *pte;
2211		pte = pmap_find_vhpt(pv->pv_va);
2212		KASSERT(pte != NULL, ("pte"));
2213		if (pte->pte_a) {
2214			count++;
2215			pte->pte_a = 0;
2216			pmap_update_vhpt(pte, pv->pv_va);
2217			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2218		}
2219		pmap_install(oldpmap);
2220	}
2221
2222	return count;
2223}
2224
2225#if 0
2226/*
2227 *	pmap_is_referenced:
2228 *
2229 *	Return whether or not the specified physical page was referenced
2230 *	in any physical maps.
2231 */
2232static boolean_t
2233pmap_is_referenced(vm_page_t m)
2234{
2235	pv_entry_t pv;
2236
2237	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2238		return FALSE;
2239
2240	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2241		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2242		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2243		pmap_install(oldpmap);
2244		KASSERT(pte != NULL, ("pte"));
2245		if (pte->pte_a)
2246			return 1;
2247	}
2248
2249	return 0;
2250}
2251#endif
2252
2253/*
2254 *	pmap_is_modified:
2255 *
2256 *	Return whether or not the specified physical page was modified
2257 *	in any physical maps.
2258 */
2259boolean_t
2260pmap_is_modified(vm_page_t m)
2261{
2262	pv_entry_t pv;
2263
2264	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2265		return FALSE;
2266
2267	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2268		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2269		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2270		pmap_install(oldpmap);
2271		KASSERT(pte != NULL, ("pte"));
2272		if (pte->pte_d)
2273			return 1;
2274	}
2275
2276	return 0;
2277}
2278
2279/*
2280 *	Clear the modify bits on the specified physical page.
2281 */
2282void
2283pmap_clear_modify(vm_page_t m)
2284{
2285	pv_entry_t pv;
2286
2287	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2288		return;
2289
2290	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2291		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2292		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2293		KASSERT(pte != NULL, ("pte"));
2294		if (pte->pte_d) {
2295			pte->pte_d = 0;
2296			pmap_update_vhpt(pte, pv->pv_va);
2297			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2298		}
2299		pmap_install(oldpmap);
2300	}
2301}
2302
2303/*
2304 *	pmap_clear_reference:
2305 *
2306 *	Clear the reference bit on the specified physical page.
2307 */
2308void
2309pmap_clear_reference(vm_page_t m)
2310{
2311	pv_entry_t pv;
2312
2313	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2314		return;
2315
2316	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2317		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2318		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2319		KASSERT(pte != NULL, ("pte"));
2320		if (pte->pte_a) {
2321			pte->pte_a = 0;
2322			pmap_update_vhpt(pte, pv->pv_va);
2323			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2324		}
2325		pmap_install(oldpmap);
2326	}
2327}
2328
2329/*
2330 * Miscellaneous support routines follow
2331 */
2332
2333static void
2334ia64_protection_init()
2335{
2336	int prot, *kp, *up;
2337
2338	kp = protection_codes[0];
2339	up = protection_codes[1];
2340
2341	for (prot = 0; prot < 8; prot++) {
2342		switch (prot) {
2343		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
2344			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2345			*up++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2346			break;
2347
2348		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
2349			*kp++ = (PTE_AR_X_RX << 2) | PTE_PL_KERN;
2350			*up++ = (PTE_AR_X_RX << 2) | PTE_PL_USER;
2351			break;
2352
2353		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
2354			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2355			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2356			break;
2357
2358		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
2359			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2360			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2361			break;
2362
2363		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
2364			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2365			*up++ = (PTE_AR_R << 2) | PTE_PL_USER;
2366			break;
2367
2368		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
2369			*kp++ = (PTE_AR_RX << 2) | PTE_PL_KERN;
2370			*up++ = (PTE_AR_RX << 2) | PTE_PL_USER;
2371			break;
2372
2373		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
2374			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2375			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2376			break;
2377
2378		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
2379			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2380			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2381			break;
2382		}
2383	}
2384}
2385
2386/*
2387 * Map a set of physical memory pages into the kernel virtual
2388 * address space. Return a pointer to where it is mapped. This
2389 * routine is intended to be used for mapping device memory,
2390 * NOT real memory.
2391 */
2392void *
2393pmap_mapdev(vm_offset_t pa, vm_size_t size)
2394{
2395	return (void*) IA64_PHYS_TO_RR6(pa);
2396}
2397
2398/*
2399 * 'Unmap' a range mapped by pmap_mapdev().
2400 */
2401void
2402pmap_unmapdev(vm_offset_t va, vm_size_t size)
2403{
2404	return;
2405}
2406
2407/*
2408 * perform the pmap work for mincore
2409 */
2410int
2411pmap_mincore(pmap_t pmap, vm_offset_t addr)
2412{
2413	pmap_t oldpmap;
2414	struct ia64_lpte *pte;
2415	int val = 0;
2416
2417	oldpmap = pmap_install(pmap);
2418	pte = pmap_find_vhpt(addr);
2419	pmap_install(oldpmap);
2420
2421	if (!pte)
2422		return 0;
2423
2424	if (pmap_pte_v(pte)) {
2425		vm_page_t m;
2426		vm_offset_t pa;
2427
2428		val = MINCORE_INCORE;
2429		if ((pte->pte_ig & PTE_IG_MANAGED) == 0)
2430			return val;
2431
2432		pa = pmap_pte_pa(pte);
2433
2434		m = PHYS_TO_VM_PAGE(pa);
2435
2436		/*
2437		 * Modified by us
2438		 */
2439		if (pte->pte_d)
2440			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2441		/*
2442		 * Modified by someone
2443		 */
2444		else if (pmap_is_modified(m))
2445			val |= MINCORE_MODIFIED_OTHER;
2446		/*
2447		 * Referenced by us
2448		 */
2449		if (pte->pte_a)
2450			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2451
2452		/*
2453		 * Referenced by someone
2454		 */
2455		else if (pmap_ts_referenced(m)) {
2456			val |= MINCORE_REFERENCED_OTHER;
2457			vm_page_flag_set(m, PG_REFERENCED);
2458		}
2459	}
2460	return val;
2461}
2462
2463void
2464pmap_activate(struct thread *td)
2465{
2466	pmap_install(vmspace_pmap(td->td_proc->p_vmspace));
2467}
2468
2469pmap_t
2470pmap_switch(pmap_t pm)
2471{
2472	pmap_t prevpm;
2473	int i;
2474
2475	mtx_assert(&sched_lock, MA_OWNED);
2476
2477	prevpm = PCPU_GET(current_pmap);
2478	if (prevpm == pm)
2479		return (prevpm);
2480	if (prevpm != NULL)
2481		atomic_clear_32(&prevpm->pm_active, PCPU_GET(cpumask));
2482	if (pm == NULL) {
2483		for (i = 0; i < 5; i++) {
2484			ia64_set_rr(IA64_RR_BASE(i),
2485			    (i << 8)|(PAGE_SHIFT << 2)|1);
2486		}
2487	} else {
2488		for (i = 0; i < 5; i++) {
2489			ia64_set_rr(IA64_RR_BASE(i),
2490			    (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2491		}
2492		atomic_set_32(&pm->pm_active, PCPU_GET(cpumask));
2493	}
2494	PCPU_SET(current_pmap, pm);
2495	__asm __volatile("srlz.d");
2496	return (prevpm);
2497}
2498
2499static pmap_t
2500pmap_install(pmap_t pm)
2501{
2502	pmap_t prevpm;
2503
2504	mtx_lock_spin(&sched_lock);
2505	prevpm = pmap_switch(pm);
2506	mtx_unlock_spin(&sched_lock);
2507	return (prevpm);
2508}
2509
2510vm_offset_t
2511pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2512{
2513
2514	return addr;
2515}
2516
2517#include "opt_ddb.h"
2518
2519#ifdef DDB
2520
2521#include <ddb/ddb.h>
2522
2523static const char*	psnames[] = {
2524	"1B",	"2B",	"4B",	"8B",
2525	"16B",	"32B",	"64B",	"128B",
2526	"256B",	"512B",	"1K",	"2K",
2527	"4K",	"8K",	"16K",	"32K",
2528	"64K",	"128K",	"256K",	"512K",
2529	"1M",	"2M",	"4M",	"8M",
2530	"16M",	"32M",	"64M",	"128M",
2531	"256M",	"512M",	"1G",	"2G"
2532};
2533
2534static void
2535print_trs(int type)
2536{
2537	struct ia64_pal_result	res;
2538	int			i, maxtr;
2539	struct {
2540		struct ia64_pte	pte;
2541		struct ia64_itir itir;
2542		struct ia64_ifa ifa;
2543		struct ia64_rr	rr;
2544	}			buf;
2545	static const char*	manames[] = {
2546		"WB",	"bad",	"bad",	"bad",
2547		"UC",	"UCE",	"WC",	"NaT",
2548
2549	};
2550
2551	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2552	if (res.pal_status != 0) {
2553		db_printf("Can't get VM summary\n");
2554		return;
2555	}
2556
2557	if (type == 0)
2558		maxtr = (res.pal_result[0] >> 40) & 0xff;
2559	else
2560		maxtr = (res.pal_result[0] >> 32) & 0xff;
2561
2562	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2563	for (i = 0; i <= maxtr; i++) {
2564		bzero(&buf, sizeof(buf));
2565		res = ia64_call_pal_stacked_physical
2566			(PAL_VM_TR_READ, i, type, ia64_tpa((u_int64_t) &buf));
2567		if (!(res.pal_result[0] & 1))
2568			buf.pte.pte_ar = 0;
2569		if (!(res.pal_result[0] & 2))
2570			buf.pte.pte_pl = 0;
2571		if (!(res.pal_result[0] & 4))
2572			buf.pte.pte_d = 0;
2573		if (!(res.pal_result[0] & 8))
2574			buf.pte.pte_ma = 0;
2575		db_printf(
2576			"%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s %d %06x\n",
2577			buf.ifa.ifa_ig & 1,
2578			buf.rr.rr_rid,
2579			buf.ifa.ifa_vpn,
2580			buf.pte.pte_ppn,
2581			psnames[buf.itir.itir_ps],
2582			buf.pte.pte_ed,
2583			buf.pte.pte_ar,
2584			buf.pte.pte_pl,
2585			buf.pte.pte_d,
2586			buf.pte.pte_a,
2587			manames[buf.pte.pte_ma],
2588			buf.pte.pte_p,
2589			buf.itir.itir_key);
2590	}
2591}
2592
2593DB_COMMAND(itr, db_itr)
2594{
2595	print_trs(0);
2596}
2597
2598DB_COMMAND(dtr, db_dtr)
2599{
2600	print_trs(1);
2601}
2602
2603DB_COMMAND(rr, db_rr)
2604{
2605	int i;
2606	u_int64_t t;
2607	struct ia64_rr rr;
2608
2609	printf("RR RID    PgSz VE\n");
2610	for (i = 0; i < 8; i++) {
2611		__asm __volatile ("mov %0=rr[%1]"
2612				  : "=r"(t)
2613				  : "r"(IA64_RR_BASE(i)));
2614		*(u_int64_t *) &rr = t;
2615		printf("%d  %06x %4s %d\n",
2616		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2617	}
2618}
2619
2620DB_COMMAND(thash, db_thash)
2621{
2622	if (!have_addr)
2623		return;
2624
2625	db_printf("%p\n", (void *) ia64_thash(addr));
2626}
2627
2628DB_COMMAND(ttag, db_ttag)
2629{
2630	if (!have_addr)
2631		return;
2632
2633	db_printf("0x%lx\n", ia64_ttag(addr));
2634}
2635
2636#endif
2637