pmap.c revision 100543
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 * $FreeBSD: head/sys/ia64/ia64/pmap.c 100543 2002-07-23 04:18:10Z arr $
47 */
48
49/*
50 *	Manages physical address maps.
51 *
52 *	In addition to hardware address maps, this
53 *	module is called upon to provide software-use-only
54 *	maps which may or may not be stored in the same
55 *	form as hardware maps.  These pseudo-maps are
56 *	used to store intermediate results from copy
57 *	operations to and from address spaces.
58 *
59 *	Since the information managed by this module is
60 *	also stored by the logical address mapping module,
61 *	this module may throw away valid virtual-to-physical
62 *	mappings at almost any time.  However, invalidations
63 *	of virtual-to-physical mappings must be done as
64 *	requested.
65 *
66 *	In order to cope with hardware architectures which
67 *	make virtual-to-physical map invalidates expensive,
68 *	this module may delay invalidate or reduced protection
69 *	operations until such time as they are actually
70 *	necessary.  This module is given full information as
71 *	to which processors are currently using which maps,
72 *	and to when physical maps must be made correct.
73 */
74
75/*
76 * Following the Linux model, region IDs are allocated in groups of
77 * eight so that a single region ID can be used for as many RRs as we
78 * want by encoding the RR number into the low bits of the ID.
79 *
80 * We reserve region ID 0 for the kernel and allocate the remaining
81 * IDs for user pmaps.
82 *
83 * Region 0..4
84 *	User virtually mapped
85 *
86 * Region 5
87 *	Kernel virtually mapped
88 *
89 * Region 6
90 *	Kernel physically mapped uncacheable
91 *
92 * Region 7
93 *	Kernel physically mapped cacheable
94 */
95
96#include <sys/param.h>
97#include <sys/kernel.h>
98#include <sys/lock.h>
99#include <sys/malloc.h>
100#include <sys/mman.h>
101#include <sys/msgbuf.h>
102#include <sys/mutex.h>
103#include <sys/proc.h>
104#include <sys/sx.h>
105#include <sys/systm.h>
106#include <sys/vmmeter.h>
107#include <sys/smp.h>
108#include <sys/sysctl.h>
109
110#include <vm/vm.h>
111#include <vm/vm_param.h>
112#include <vm/vm_kern.h>
113#include <vm/vm_page.h>
114#include <vm/vm_map.h>
115#include <vm/vm_object.h>
116#include <vm/vm_extern.h>
117#include <vm/vm_pageout.h>
118#include <vm/vm_pager.h>
119#include <vm/uma.h>
120
121#include <sys/user.h>
122
123#include <machine/pal.h>
124#include <machine/md_var.h>
125
126MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
127
128#ifndef PMAP_SHPGPERPROC
129#define PMAP_SHPGPERPROC 200
130#endif
131
132#if defined(DIAGNOSTIC)
133#define PMAP_DIAGNOSTIC
134#endif
135
136#define MINPV 2048	/* Preallocate at least this many */
137#define MAXPV 20480	/* But no more than this */
138
139#if 0
140#define PMAP_DIAGNOSTIC
141#define PMAP_DEBUG
142#endif
143
144#if !defined(PMAP_DIAGNOSTIC)
145#define PMAP_INLINE __inline
146#else
147#define PMAP_INLINE
148#endif
149
150/*
151 * Get PDEs and PTEs for user/kernel address space
152 */
153#define pmap_pte_w(pte)		((pte)->pte_ig & PTE_IG_WIRED)
154#define pmap_pte_managed(pte)	((pte)->pte_ig & PTE_IG_MANAGED)
155#define pmap_pte_v(pte)		((pte)->pte_p)
156#define pmap_pte_pa(pte)	(((pte)->pte_ppn) << 12)
157#define pmap_pte_prot(pte)	(((pte)->pte_ar << 2) | (pte)->pte_pl)
158
159#define pmap_pte_set_w(pte, v) ((v)?((pte)->pte_ig |= PTE_IG_WIRED) \
160				:((pte)->pte_ig &= ~PTE_IG_WIRED))
161#define pmap_pte_set_prot(pte, v) do {		\
162    (pte)->pte_ar = v >> 2;			\
163    (pte)->pte_pl = v & 3;			\
164} while (0)
165
166/*
167 * Given a map and a machine independent protection code,
168 * convert to an ia64 protection code.
169 */
170#define pte_prot(m, p)		(protection_codes[m == kernel_pmap ? 0 : 1][p])
171#define pte_prot_pl(m, p)	(pte_prot(m, p) & 3)
172#define pte_prot_ar(m, p)	(pte_prot(m, p) >> 2)
173int	protection_codes[2][8];
174
175/*
176 * Return non-zero if this pmap is currently active
177 */
178#define pmap_isactive(pmap)	(pmap->pm_active)
179
180/*
181 * Statically allocated kernel pmap
182 */
183struct pmap kernel_pmap_store;
184
185vm_offset_t avail_start;	/* PA of first available physical page */
186vm_offset_t avail_end;		/* PA of last available physical page */
187vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
188vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
189static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
190
191vm_offset_t vhpt_base, vhpt_size;
192
193/*
194 * We use an object to own the kernel's 'page tables'. For simplicity,
195 * we use one page directory to index a set of pages containing
196 * ia64_lptes. This gives us up to 2Gb of kernel virtual space.
197 */
198static vm_object_t kptobj;
199static int nkpt;
200static struct ia64_lpte **kptdir;
201#define KPTE_DIR_INDEX(va) \
202	((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
203#define KPTE_PTE_INDEX(va) \
204	((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
205#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
206
207vm_offset_t kernel_vm_end;
208
209/*
210 * Values for ptc.e. XXX values for SKI.
211 */
212static u_int64_t pmap_ptc_e_base = 0x100000000;
213static u_int64_t pmap_ptc_e_count1 = 3;
214static u_int64_t pmap_ptc_e_count2 = 2;
215static u_int64_t pmap_ptc_e_stride1 = 0x2000;
216static u_int64_t pmap_ptc_e_stride2 = 0x100000000;
217
218/*
219 * Data for the RID allocator
220 */
221static u_int64_t *pmap_ridbusy;
222static int pmap_ridmax, pmap_ridcount;
223struct mtx pmap_ridmutex;
224
225/*
226 * Data for the pv entry allocation mechanism
227 */
228static uma_zone_t pvzone;
229static struct vm_object pvzone_obj;
230static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
231static int pmap_pagedaemon_waken = 0;
232static struct pv_entry *pvbootentries;
233static int pvbootnext, pvbootmax;
234
235/*
236 * Data for allocating PTEs for user processes.
237 */
238static uma_zone_t ptezone;
239static struct vm_object ptezone_obj;
240#if 0
241static struct ia64_lpte *pteinit;
242#endif
243
244/*
245 * VHPT instrumentation.
246 */
247static int pmap_vhpt_inserts;
248static int pmap_vhpt_collisions;
249static int pmap_vhpt_resident;
250SYSCTL_DECL(_vm_stats);
251SYSCTL_NODE(_vm_stats, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
252SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
253	   &pmap_vhpt_inserts, 0, "");
254SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, collisions, CTLFLAG_RD,
255	   &pmap_vhpt_collisions, 0, "");
256SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, resident, CTLFLAG_RD,
257	   &pmap_vhpt_resident, 0, "");
258
259static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
260static pv_entry_t get_pv_entry(void);
261static void	ia64_protection_init(void);
262
263static void	pmap_invalidate_all(pmap_t pmap);
264static void	pmap_remove_all(vm_page_t m);
265static void	pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m);
266static void	*pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
267
268vm_offset_t
269pmap_steal_memory(vm_size_t size)
270{
271	vm_size_t bank_size;
272	vm_offset_t pa, va;
273
274	size = round_page(size);
275
276	bank_size = phys_avail[1] - phys_avail[0];
277	while (size > bank_size) {
278		int i;
279		for (i = 0; phys_avail[i+2]; i+= 2) {
280			phys_avail[i] = phys_avail[i+2];
281			phys_avail[i+1] = phys_avail[i+3];
282		}
283		phys_avail[i] = 0;
284		phys_avail[i+1] = 0;
285		if (!phys_avail[0])
286			panic("pmap_steal_memory: out of memory");
287		bank_size = phys_avail[1] - phys_avail[0];
288	}
289
290	pa = phys_avail[0];
291	phys_avail[0] += size;
292
293	va = IA64_PHYS_TO_RR7(pa);
294	bzero((caddr_t) va, size);
295	return va;
296}
297
298/*
299 *	Bootstrap the system enough to run with virtual memory.
300 */
301void
302pmap_bootstrap()
303{
304	int i, j, count, ridbits;
305	struct ia64_pal_result res;
306
307	/*
308	 * Query the PAL Code to find the loop parameters for the
309	 * ptc.e instruction.
310	 */
311	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
312	if (res.pal_status != 0)
313		panic("Can't configure ptc.e parameters");
314	pmap_ptc_e_base = res.pal_result[0];
315	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
316	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
317	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
318	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
319	if (bootverbose)
320		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
321		       "stride1=0x%lx, stride2=0x%lx\n",
322		       pmap_ptc_e_base,
323		       pmap_ptc_e_count1,
324		       pmap_ptc_e_count2,
325		       pmap_ptc_e_stride1,
326		       pmap_ptc_e_stride2);
327
328	/*
329	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
330	 */
331	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
332	if (res.pal_status != 0) {
333		if (bootverbose)
334			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
335		ridbits = 18; /* guaranteed minimum */
336	} else {
337		ridbits = (res.pal_result[1] >> 8) & 0xff;
338		if (bootverbose)
339			printf("Processor supports %d Region ID bits\n",
340			       ridbits);
341	}
342	pmap_ridmax = (1 << ridbits);
343	pmap_ridcount = 8;
344	pmap_ridbusy = (u_int64_t *)
345		pmap_steal_memory(pmap_ridmax / 8);
346	bzero(pmap_ridbusy, pmap_ridmax / 8);
347	pmap_ridbusy[0] |= 0xff;
348	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
349
350	/*
351	 * Allocate some memory for initial kernel 'page tables'.
352	 */
353	kptdir = (struct ia64_lpte **) pmap_steal_memory(PAGE_SIZE);
354	for (i = 0; i < NKPT; i++) {
355		kptdir[i] = (struct ia64_lpte *) pmap_steal_memory(PAGE_SIZE);
356	}
357	nkpt = NKPT;
358
359	avail_start = phys_avail[0];
360	for (i = 0; phys_avail[i+2]; i+= 2) ;
361	avail_end = phys_avail[i+1];
362	count = i+2;
363
364	/*
365	 * Figure out a useful size for the VHPT, based on the size of
366	 * physical memory and try to locate a region which is large
367	 * enough to contain the VHPT (which must be a power of two in
368	 * size and aligned to a natural boundary).
369	 */
370	vhpt_size = 15;
371	while ((1<<vhpt_size) < ia64_btop(avail_end - avail_start) * 32)
372		vhpt_size++;
373
374	vhpt_base = 0;
375	while (!vhpt_base) {
376		vm_offset_t mask;
377		if (bootverbose)
378			printf("Trying VHPT size 0x%lx\n", (1L<<vhpt_size));
379		mask = (1L << vhpt_size) - 1;
380		for (i = 0; i < count; i += 2) {
381			vm_offset_t base, limit;
382			base = (phys_avail[i] + mask) & ~mask;
383			limit = base + (1L << vhpt_size);
384			if (limit <= phys_avail[i+1])
385				/*
386				 * VHPT can fit in this region
387				 */
388				break;
389		}
390		if (!phys_avail[i]) {
391			/*
392			 * Can't fit, try next smaller size.
393			 */
394			vhpt_size--;
395		} else {
396			vhpt_base = (phys_avail[i] + mask) & ~mask;
397		}
398	}
399	if (vhpt_size < 15)
400		panic("Can't find space for VHPT");
401
402	if (bootverbose)
403		printf("Putting VHPT at %p\n", (void *) vhpt_base);
404	if (vhpt_base != phys_avail[i]) {
405		/*
406		 * Split this region.
407		 */
408		if (bootverbose)
409			printf("Splitting [%p-%p]\n",
410			       (void *) phys_avail[i],
411			       (void *) phys_avail[i+1]);
412		for (j = count; j > i; j -= 2) {
413			phys_avail[j] = phys_avail[j-2];
414			phys_avail[j+1] = phys_avail[j-2+1];
415		}
416		phys_avail[count+2] = 0;
417		phys_avail[count+3] = 0;
418		phys_avail[i+1] = vhpt_base;
419		phys_avail[i+2] = vhpt_base + (1L << vhpt_size);
420	} else {
421		phys_avail[i] = vhpt_base + (1L << vhpt_size);
422	}
423
424	vhpt_base = IA64_PHYS_TO_RR7(vhpt_base);
425	bzero((void *) vhpt_base, (1L << vhpt_size));
426	__asm __volatile("mov cr.pta=%0;; srlz.i;;"
427			 :: "r" (vhpt_base + (1<<8) + (vhpt_size<<2) + 1));
428
429	virtual_avail = IA64_RR_BASE(5);
430	virtual_end = IA64_RR_BASE(6)-1;
431
432	/*
433	 * Initialize protection array.
434	 */
435	ia64_protection_init();
436
437	/*
438	 * Initialize the kernel pmap (which is statically allocated).
439	 */
440	for (i = 0; i < 5; i++)
441		kernel_pmap->pm_rid[i] = 0;
442	kernel_pmap->pm_active = 1;
443	TAILQ_INIT(&kernel_pmap->pm_pvlist);
444	PCPU_SET(current_pmap, kernel_pmap);
445
446	/*
447	 * Region 5 is mapped via the vhpt.
448	 */
449	ia64_set_rr(IA64_RR_BASE(5),
450		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
451
452	/*
453	 * Region 6 is direct mapped UC and region 7 is direct mapped
454	 * WC. The details of this is controlled by the Alt {I,D}TLB
455	 * handlers. Here we just make sure that they have the largest
456	 * possible page size to minimise TLB usage.
457	 */
458	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (28 << 2));
459	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (28 << 2));
460
461	/*
462	 * Set up proc0's PCB.
463	 */
464#if 0
465	thread0.td_pcb->pcb_hw.apcb_asn = 0;
466#endif
467
468	/*
469	 * Reserve some memory for allocating pvs while bootstrapping
470	 * the pv allocator. We need to have enough to cover mapping
471	 * the kmem_alloc region used to allocate the initial_pvs in
472	 * pmap_init. In general, the size of this region is
473	 * approximately (# physical pages) * (size of pv entry).
474	 */
475	pvbootmax = ((physmem * sizeof(struct pv_entry)) >> PAGE_SHIFT) + 128;
476	pvbootentries = (struct pv_entry *)
477		pmap_steal_memory(pvbootmax * sizeof(struct pv_entry));
478	pvbootnext = 0;
479
480	/*
481	 * Clear out any random TLB entries left over from booting.
482	 */
483	pmap_invalidate_all(kernel_pmap);
484}
485
486static void *
487pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
488{
489	*flags = UMA_SLAB_PRIV;
490	return (void *)IA64_PHYS_TO_RR7(ia64_tpa(kmem_alloc(kernel_map, bytes)));
491}
492
493/*
494 *	Initialize the pmap module.
495 *	Called by vm_init, to initialize any structures that the pmap
496 *	system needs to map virtual memory.
497 *	pmap_init has been enhanced to support in a fairly consistant
498 *	way, discontiguous physical memory.
499 */
500void
501pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
502{
503	int i;
504	int initial_pvs;
505
506	/*
507	 * Allocate memory for random pmap data structures.  Includes the
508	 * pv_head_table.
509	 */
510
511	for(i = 0; i < vm_page_array_size; i++) {
512		vm_page_t m;
513
514		m = &vm_page_array[i];
515		TAILQ_INIT(&m->md.pv_list);
516		m->md.pv_list_count = 0;
517 	}
518
519	/*
520	 * Init the pv free list and the PTE free list.
521	 */
522	initial_pvs = vm_page_array_size;
523	if (initial_pvs < MINPV)
524		initial_pvs = MINPV;
525	if (initial_pvs > MAXPV)
526		initial_pvs = MAXPV;
527	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry),
528	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
529	uma_zone_set_allocf(pvzone, pmap_allocf);
530	uma_prealloc(pvzone, initial_pvs);
531
532	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
533	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
534	uma_zone_set_allocf(ptezone, pmap_allocf);
535	uma_prealloc(ptezone, initial_pvs);
536
537	/*
538	 * Create the object for the kernel's page tables.
539	 */
540	kptobj = vm_object_allocate(OBJT_DEFAULT, MAXKPT);
541
542	/*
543	 * Now it is safe to enable pv_table recording.
544	 */
545	pmap_initialized = TRUE;
546}
547
548/*
549 * Initialize the address space (zone) for the pv_entries.  Set a
550 * high water mark so that the system can recover from excessive
551 * numbers of pv entries.
552 */
553void
554pmap_init2()
555{
556	int shpgperproc = PMAP_SHPGPERPROC;
557
558	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
559	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
560	pv_entry_high_water = 9 * (pv_entry_max / 10);
561	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
562	uma_zone_set_obj(ptezone, &ptezone_obj, pv_entry_max);
563}
564
565
566/***************************************************
567 * Manipulate TLBs for a pmap
568 ***************************************************/
569
570static void
571pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
572{
573	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
574		("invalidating TLB for non-current pmap"));
575	ia64_ptc_g(va, PAGE_SHIFT << 2);
576}
577
578static void
579pmap_invalidate_all_1(void *arg)
580{
581	u_int64_t addr;
582	int i, j;
583	register_t psr;
584
585	psr = intr_disable();
586	addr = pmap_ptc_e_base;
587	for (i = 0; i < pmap_ptc_e_count1; i++) {
588		for (j = 0; j < pmap_ptc_e_count2; j++) {
589			ia64_ptc_e(addr);
590			addr += pmap_ptc_e_stride2;
591		}
592		addr += pmap_ptc_e_stride1;
593	}
594	intr_restore(psr);
595}
596
597static void
598pmap_invalidate_all(pmap_t pmap)
599{
600	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
601		("invalidating TLB for non-current pmap"));
602
603
604#ifdef SMP
605	smp_rendezvous(0, pmap_invalidate_all_1, 0, 0);
606#else
607	pmap_invalidate_all_1(0);
608#endif
609}
610
611static u_int32_t
612pmap_allocate_rid(void)
613{
614	int rid;
615
616	if (pmap_ridcount == pmap_ridmax)
617		panic("pmap_allocate_rid: All Region IDs used");
618
619	do {
620		rid = arc4random() & (pmap_ridmax - 1);
621	} while (pmap_ridbusy[rid / 64] & (1L << (rid & 63)));
622	pmap_ridbusy[rid / 64] |= (1L << (rid & 63));
623	pmap_ridcount++;
624
625	return rid;
626}
627
628static void
629pmap_free_rid(u_int32_t rid)
630{
631	mtx_lock(&pmap_ridmutex);
632	pmap_ridbusy[rid / 64] &= ~(1L << (rid & 63));
633	pmap_ridcount--;
634	mtx_unlock(&pmap_ridmutex);
635}
636
637static void
638pmap_ensure_rid(pmap_t pmap, vm_offset_t va)
639{
640	int rr;
641
642	rr = va >> 61;
643
644	/*
645	 * We get called for virtual addresses that may just as well be
646	 * kernel addresses (ie region 5, 6 or 7). Since the pm_rid field
647	 * only holds region IDs for user regions, we have to make sure
648	 * the region is within bounds.
649	 */
650	if (rr >= 5)
651		return;
652
653	if (pmap->pm_rid[rr])
654		return;
655
656	mtx_lock(&pmap_ridmutex);
657	pmap->pm_rid[rr] = pmap_allocate_rid();
658	if (pmap == PCPU_GET(current_pmap))
659		ia64_set_rr(IA64_RR_BASE(rr),
660			    (pmap->pm_rid[rr] << 8)|(PAGE_SHIFT << 2)|1);
661	mtx_unlock(&pmap_ridmutex);
662}
663
664/***************************************************
665 * Low level helper routines.....
666 ***************************************************/
667
668/*
669 * Install a pte into the VHPT
670 */
671static PMAP_INLINE void
672pmap_install_pte(struct ia64_lpte *vhpte, struct ia64_lpte *pte)
673{
674	u_int64_t *vhp, *p;
675
676	/* invalidate the pte */
677	atomic_set_64(&vhpte->pte_tag, 1L << 63);
678	ia64_mf();			/* make sure everyone sees */
679
680	vhp = (u_int64_t *) vhpte;
681	p = (u_int64_t *) pte;
682
683	vhp[0] = p[0];
684	vhp[1] = p[1];
685	vhp[2] = p[2];			/* sets ti to one */
686
687	ia64_mf();
688}
689
690/*
691 * Compare essential parts of pte.
692 */
693static PMAP_INLINE int
694pmap_equal_pte(struct ia64_lpte *pte1, struct ia64_lpte *pte2)
695{
696	return *(u_int64_t *) pte1 == *(u_int64_t *) pte2;
697}
698
699/*
700 * this routine defines the region(s) of memory that should
701 * not be tested for the modified bit.
702 */
703static PMAP_INLINE int
704pmap_track_modified(vm_offset_t va)
705{
706	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
707		return 1;
708	else
709		return 0;
710}
711
712/*
713 * Create the KSTACK for a new thread.
714 * This routine directly affects the fork perf for a process/thread.
715 */
716void
717pmap_new_thread(struct thread *td)
718{
719	vm_offset_t *ks;
720
721	/*
722	 * Use contigmalloc for user area so that we can use a region
723	 * 7 address for it which makes it impossible to accidentally
724	 * lose when recording a trapframe.
725	 */
726	ks = contigmalloc(KSTACK_PAGES * PAGE_SIZE, M_PMAP,
727			  M_WAITOK,
728			  0ul,
729			  256*1024*1024 - 1,
730			  PAGE_SIZE,
731			  256*1024*1024);
732
733	if (ks == NULL)
734		panic("pmap_new_thread: could not contigmalloc %d pages\n",
735		    KSTACK_PAGES);
736	td->td_md.md_kstackvirt = ks;
737	td->td_kstack = IA64_PHYS_TO_RR7(ia64_tpa((u_int64_t)ks));
738}
739
740/*
741 * Dispose the KSTACK for a thread that has exited.
742 * This routine directly impacts the exit perf of a process/thread.
743 */
744void
745pmap_dispose_thread(struct thread *td)
746{
747	contigfree(td->td_md.md_kstackvirt, KSTACK_PAGES * PAGE_SIZE, M_PMAP);
748	td->td_md.md_kstackvirt = 0;
749	td->td_kstack = 0;
750}
751
752/*
753 * Allow the KSTACK for a thread to be prejudicially paged out.
754 */
755void
756pmap_swapout_thread(struct thread *td)
757{
758}
759
760/*
761 * Bring the KSTACK for a specified thread back in.
762 */
763void
764pmap_swapin_thread(struct thread *td)
765{
766}
767
768/***************************************************
769 * Page table page management routines.....
770 ***************************************************/
771
772void
773pmap_pinit0(struct pmap *pmap)
774{
775	/* kernel_pmap is the same as any other pmap. */
776	pmap_pinit(pmap);
777}
778
779/*
780 * Initialize a preallocated and zeroed pmap structure,
781 * such as one in a vmspace structure.
782 */
783void
784pmap_pinit(struct pmap *pmap)
785{
786	int i;
787
788	pmap->pm_flags = 0;
789	for (i = 0; i < 5; i++)
790		pmap->pm_rid[i] = 0;
791	pmap->pm_ptphint = NULL;
792	pmap->pm_active = 0;
793	TAILQ_INIT(&pmap->pm_pvlist);
794	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
795}
796
797/*
798 * Wire in kernel global address entries.  To avoid a race condition
799 * between pmap initialization and pmap_growkernel, this procedure
800 * should be called after the vmspace is attached to the process
801 * but before this pmap is activated.
802 */
803void
804pmap_pinit2(struct pmap *pmap)
805{
806}
807
808/***************************************************
809* Pmap allocation/deallocation routines.
810 ***************************************************/
811
812/*
813 * Release any resources held by the given physical map.
814 * Called when a pmap initialized by pmap_pinit is being released.
815 * Should only be called if the map contains no valid mappings.
816 */
817void
818pmap_release(pmap_t pmap)
819{
820	int i;
821
822	for (i = 0; i < 5; i++)
823		if (pmap->pm_rid[i])
824			pmap_free_rid(pmap->pm_rid[i]);
825}
826
827/*
828 * grow the number of kernel page table entries, if needed
829 */
830void
831pmap_growkernel(vm_offset_t addr)
832{
833	struct ia64_lpte *ptepage;
834	vm_page_t nkpg;
835
836	if (kernel_vm_end == 0) {
837		kernel_vm_end = nkpt * PAGE_SIZE * NKPTEPG
838			+ IA64_RR_BASE(5);
839	}
840	addr = (addr + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
841	while (kernel_vm_end < addr) {
842		if (kptdir[KPTE_DIR_INDEX(kernel_vm_end)]) {
843			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG)
844				& ~(PAGE_SIZE * NKPTEPG - 1);
845			continue;
846		}
847
848		/*
849		 * We could handle more by increasing the size of kptdir.
850		 */
851		if (nkpt == MAXKPT)
852			panic("pmap_growkernel: out of kernel address space");
853
854		/*
855		 * This index is bogus, but out of the way
856		 */
857		nkpg = vm_page_alloc(kptobj, nkpt,
858		    VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
859		if (!nkpg)
860			panic("pmap_growkernel: no memory to grow kernel");
861
862		nkpt++;
863		ptepage = (struct ia64_lpte *)
864			IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
865		bzero(ptepage, PAGE_SIZE);
866		kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage;
867
868		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
869	}
870}
871
872/***************************************************
873 * page management routines.
874 ***************************************************/
875
876/*
877 * free the pv_entry back to the free list
878 */
879static PMAP_INLINE void
880free_pv_entry(pv_entry_t pv)
881{
882	pv_entry_count--;
883	uma_zfree(pvzone, pv);
884}
885
886/*
887 * get a new pv_entry, allocating a block from the system
888 * when needed.
889 * the memory allocation is performed bypassing the malloc code
890 * because of the possibility of allocations at interrupt time.
891 */
892static pv_entry_t
893get_pv_entry(void)
894{
895	pv_entry_count++;
896	if (pv_entry_high_water &&
897		(pv_entry_count > pv_entry_high_water) &&
898		(pmap_pagedaemon_waken == 0)) {
899		pmap_pagedaemon_waken = 1;
900		wakeup (&vm_pages_needed);
901	}
902	return uma_zalloc(pvzone, M_WAITOK);
903}
904
905/*
906 * Add an ia64_lpte to the VHPT.
907 */
908static void
909pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
910{
911	struct ia64_lpte *vhpte;
912
913	pmap_vhpt_inserts++;
914	pmap_vhpt_resident++;
915
916	vhpte = (struct ia64_lpte *) ia64_thash(va);
917
918	if (vhpte->pte_chain)
919		pmap_vhpt_collisions++;
920
921	pte->pte_chain = vhpte->pte_chain;
922	vhpte->pte_chain = ia64_tpa((vm_offset_t) pte);
923
924	if (!vhpte->pte_p && pte->pte_p)
925		pmap_install_pte(vhpte, pte);
926	else
927		ia64_mf();
928}
929
930/*
931 * Update VHPT after a pte has changed.
932 */
933static void
934pmap_update_vhpt(struct ia64_lpte *pte, vm_offset_t va)
935{
936	struct ia64_lpte *vhpte;
937
938	vhpte = (struct ia64_lpte *) ia64_thash(va);
939
940	if ((!vhpte->pte_p || vhpte->pte_tag == pte->pte_tag)
941	    && pte->pte_p)
942		pmap_install_pte(vhpte, pte);
943}
944
945/*
946 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
947 * worked or an appropriate error code otherwise.
948 */
949static int
950pmap_remove_vhpt(vm_offset_t va)
951{
952	struct ia64_lpte *pte;
953	struct ia64_lpte *lpte;
954	struct ia64_lpte *vhpte;
955	u_int64_t tag;
956	int error = ENOENT;
957
958	vhpte = (struct ia64_lpte *) ia64_thash(va);
959
960	/*
961	 * If the VHPTE is invalid, there can't be a collision chain.
962	 */
963	if (!vhpte->pte_p) {
964		KASSERT(!vhpte->pte_chain, ("bad vhpte"));
965		printf("can't remove vhpt entry for 0x%lx\n", va);
966		goto done;
967	}
968
969	lpte = vhpte;
970	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(vhpte->pte_chain);
971	tag = ia64_ttag(va);
972
973	while (pte->pte_tag != tag) {
974		lpte = pte;
975		if (pte->pte_chain)
976			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
977		else {
978			printf("can't remove vhpt entry for 0x%lx\n", va);
979			goto done;
980		}
981	}
982
983	/*
984	 * Snip this pv_entry out of the collision chain.
985	 */
986	lpte->pte_chain = pte->pte_chain;
987
988	/*
989	 * If the VHPTE matches as well, change it to map the first
990	 * element from the chain if there is one.
991	 */
992	if (vhpte->pte_tag == tag) {
993		if (vhpte->pte_chain) {
994			pte = (struct ia64_lpte *)
995				IA64_PHYS_TO_RR7(vhpte->pte_chain);
996			pmap_install_pte(vhpte, pte);
997		} else {
998			vhpte->pte_p = 0;
999			ia64_mf();
1000		}
1001	}
1002
1003	pmap_vhpt_resident--;
1004	error = 0;
1005 done:
1006	return error;
1007}
1008
1009/*
1010 * Find the ia64_lpte for the given va, if any.
1011 */
1012static struct ia64_lpte *
1013pmap_find_vhpt(vm_offset_t va)
1014{
1015	struct ia64_lpte *pte;
1016	u_int64_t tag;
1017
1018	pte = (struct ia64_lpte *) ia64_thash(va);
1019	if (!pte->pte_chain) {
1020		pte = 0;
1021		goto done;
1022	}
1023
1024	tag = ia64_ttag(va);
1025	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1026
1027	while (pte->pte_tag != tag) {
1028		if (pte->pte_chain) {
1029			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1030		} else {
1031			pte = 0;
1032			break;
1033		}
1034	}
1035
1036 done:
1037	return pte;
1038}
1039
1040/*
1041 * Remove an entry from the list of managed mappings.
1042 */
1043static int
1044pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1045{
1046	if (!pv) {
1047		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1048			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1049				if (pmap == pv->pv_pmap && va == pv->pv_va)
1050					break;
1051			}
1052		} else {
1053			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1054				if (va == pv->pv_va)
1055					break;
1056			}
1057		}
1058	}
1059
1060	if (pv) {
1061		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1062		m->md.pv_list_count--;
1063		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1064			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
1065
1066		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1067		free_pv_entry(pv);
1068		return 0;
1069	} else {
1070		return ENOENT;
1071	}
1072}
1073
1074/*
1075 * Create a pv entry for page at pa for
1076 * (pmap, va).
1077 */
1078static void
1079pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1080{
1081	pv_entry_t pv;
1082
1083	pv = get_pv_entry();
1084	pv->pv_pmap = pmap;
1085	pv->pv_va = va;
1086
1087	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1088	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1089	m->md.pv_list_count++;
1090}
1091
1092/*
1093 *	Routine:	pmap_extract
1094 *	Function:
1095 *		Extract the physical page address associated
1096 *		with the given map/virtual_address pair.
1097 */
1098vm_offset_t
1099pmap_extract(pmap, va)
1100	register pmap_t pmap;
1101	vm_offset_t va;
1102{
1103	pmap_t oldpmap;
1104	vm_offset_t pa;
1105
1106	oldpmap = pmap_install(pmap);
1107	pa = ia64_tpa(va);
1108	pmap_install(oldpmap);
1109	return pa;
1110}
1111
1112/***************************************************
1113 * Low level mapping routines.....
1114 ***************************************************/
1115
1116/*
1117 * Find the kernel lpte for mapping the given virtual address, which
1118 * must be in the part of region 5 which we can cover with our kernel
1119 * 'page tables'.
1120 */
1121static struct ia64_lpte *
1122pmap_find_kpte(vm_offset_t va)
1123{
1124	KASSERT((va >> 61) == 5,
1125		("kernel mapping 0x%lx not in region 5", va));
1126	KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG),
1127		("kernel mapping 0x%lx out of range", va));
1128	return &kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)];
1129}
1130
1131/*
1132 * Find a pte suitable for mapping a user-space address. If one exists
1133 * in the VHPT, that one will be returned, otherwise a new pte is
1134 * allocated.
1135 */
1136static struct ia64_lpte *
1137pmap_find_pte(vm_offset_t va)
1138{
1139	struct ia64_lpte *pte;
1140
1141	if (va >= VM_MAXUSER_ADDRESS)
1142		return pmap_find_kpte(va);
1143
1144	pte = pmap_find_vhpt(va);
1145	if (!pte) {
1146		pte = uma_zalloc(ptezone, M_WAITOK);
1147		pte->pte_p = 0;
1148	}
1149	return pte;
1150}
1151
1152/*
1153 * Free a pte which is now unused. This simply returns it to the zone
1154 * allocator if it is a user mapping. For kernel mappings, clear the
1155 * valid bit to make it clear that the mapping is not currently used.
1156 */
1157static void
1158pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1159{
1160	if (va < VM_MAXUSER_ADDRESS)
1161		uma_zfree(ptezone, pte);
1162	else
1163		pte->pte_p = 0;
1164}
1165
1166/*
1167 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1168 * the pte was orginally valid, then its assumed to already be in the
1169 * VHPT.
1170 */
1171static void
1172pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1173	     int ig, int pl, int ar)
1174{
1175	int wasvalid = pte->pte_p;
1176
1177	pte->pte_p = 1;
1178	pte->pte_ma = PTE_MA_WB;
1179	if (ig & PTE_IG_MANAGED) {
1180		pte->pte_a = 0;
1181		pte->pte_d = 0;
1182	} else {
1183		pte->pte_a = 1;
1184		pte->pte_d = 1;
1185	}
1186	pte->pte_pl = pl;
1187	pte->pte_ar = ar;
1188	pte->pte_ppn = pa >> 12;
1189	pte->pte_ed = 0;
1190	pte->pte_ig = ig;
1191
1192	pte->pte_ps = PAGE_SHIFT;
1193	pte->pte_key = 0;
1194
1195	pte->pte_tag = ia64_ttag(va);
1196
1197	if (wasvalid) {
1198		pmap_update_vhpt(pte, va);
1199	} else {
1200		pmap_enter_vhpt(pte, va);
1201	}
1202}
1203
1204/*
1205 * If a pte contains a valid mapping, clear it and update the VHPT.
1206 */
1207static void
1208pmap_clear_pte(struct ia64_lpte *pte, vm_offset_t va)
1209{
1210	if (pte->pte_p) {
1211		pmap_remove_vhpt(va);
1212		ia64_ptc_g(va, PAGE_SHIFT << 2);
1213		pte->pte_p = 0;
1214	}
1215}
1216
1217/*
1218 * Remove the (possibly managed) mapping represented by pte from the
1219 * given pmap.
1220 */
1221static int
1222pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1223		pv_entry_t pv, int freepte)
1224{
1225	int error;
1226	vm_page_t m;
1227
1228	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1229		("removing pte for non-current pmap"));
1230
1231	/*
1232	 * First remove from the VHPT.
1233	 */
1234	error = pmap_remove_vhpt(va);
1235	if (error)
1236		return error;
1237
1238	/*
1239	 * Make sure pmap_set_pte() knows it isn't in the VHPT.
1240	 */
1241	pte->pte_p = 0;
1242
1243	if (pte->pte_ig & PTE_IG_WIRED)
1244		pmap->pm_stats.wired_count -= 1;
1245
1246	pmap->pm_stats.resident_count -= 1;
1247	if (pte->pte_ig & PTE_IG_MANAGED) {
1248		m = PHYS_TO_VM_PAGE(pmap_pte_pa(pte));
1249		if (pte->pte_d)
1250			if (pmap_track_modified(va))
1251				vm_page_dirty(m);
1252		if (pte->pte_a)
1253			vm_page_flag_set(m, PG_REFERENCED);
1254
1255		if (freepte)
1256			pmap_free_pte(pte, va);
1257		return pmap_remove_entry(pmap, m, va, pv);
1258	} else {
1259		if (freepte)
1260			pmap_free_pte(pte, va);
1261		return 0;
1262	}
1263}
1264
1265/*
1266 * Add a list of wired pages to the kva
1267 * this routine is only used for temporary
1268 * kernel mappings that do not need to have
1269 * page modification or references recorded.
1270 * Note that old mappings are simply written
1271 * over.  The page *must* be wired.
1272 */
1273void
1274pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1275{
1276	int i;
1277	struct ia64_lpte *pte;
1278
1279	for (i = 0; i < count; i++) {
1280		vm_offset_t tva = va + i * PAGE_SIZE;
1281		int wasvalid;
1282		pte = pmap_find_kpte(tva);
1283		wasvalid = pte->pte_p;
1284		pmap_set_pte(pte, tva, VM_PAGE_TO_PHYS(m[i]),
1285			     0, PTE_PL_KERN, PTE_AR_RWX);
1286		if (wasvalid)
1287			ia64_ptc_g(tva, PAGE_SHIFT << 2);
1288	}
1289}
1290
1291/*
1292 * this routine jerks page mappings from the
1293 * kernel -- it is meant only for temporary mappings.
1294 */
1295void
1296pmap_qremove(vm_offset_t va, int count)
1297{
1298	int i;
1299	struct ia64_lpte *pte;
1300
1301	for (i = 0; i < count; i++) {
1302		pte = pmap_find_kpte(va);
1303		pmap_clear_pte(pte, va);
1304		va += PAGE_SIZE;
1305	}
1306}
1307
1308/*
1309 * Add a wired page to the kva.
1310 */
1311void
1312pmap_kenter(vm_offset_t va, vm_offset_t pa)
1313{
1314	struct ia64_lpte *pte;
1315	int wasvalid;
1316
1317	pte = pmap_find_kpte(va);
1318	wasvalid = pte->pte_p;
1319	pmap_set_pte(pte, va, pa, 0, PTE_PL_KERN, PTE_AR_RWX);
1320	if (wasvalid)
1321		ia64_ptc_g(va, PAGE_SHIFT << 2);
1322}
1323
1324/*
1325 * Remove a page from the kva
1326 */
1327void
1328pmap_kremove(vm_offset_t va)
1329{
1330	struct ia64_lpte *pte;
1331
1332	pte = pmap_find_kpte(va);
1333	pmap_clear_pte(pte, va);
1334}
1335
1336/*
1337 *	Used to map a range of physical addresses into kernel
1338 *	virtual address space.
1339 *
1340 *	The value passed in '*virt' is a suggested virtual address for
1341 *	the mapping. Architectures which can support a direct-mapped
1342 *	physical to virtual region can return the appropriate address
1343 *	within that region, leaving '*virt' unchanged. Other
1344 *	architectures should map the pages starting at '*virt' and
1345 *	update '*virt' with the first usable address after the mapped
1346 *	region.
1347 */
1348vm_offset_t
1349pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1350{
1351	return IA64_PHYS_TO_RR7(start);
1352}
1353
1354/*
1355 * This routine is very drastic, but can save the system
1356 * in a pinch.
1357 */
1358void
1359pmap_collect()
1360{
1361	int i;
1362	vm_page_t m;
1363	static int warningdone = 0;
1364
1365	if (pmap_pagedaemon_waken == 0)
1366		return;
1367
1368	if (warningdone < 5) {
1369		printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
1370		warningdone++;
1371	}
1372
1373	for(i = 0; i < vm_page_array_size; i++) {
1374		m = &vm_page_array[i];
1375		if (m->wire_count || m->hold_count || m->busy ||
1376		    (m->flags & (PG_BUSY | PG_UNMANAGED)))
1377			continue;
1378		pmap_remove_all(m);
1379	}
1380	pmap_pagedaemon_waken = 0;
1381}
1382
1383/*
1384 * Remove a single page from a process address space
1385 */
1386static void
1387pmap_remove_page(pmap_t pmap, vm_offset_t va)
1388{
1389	struct ia64_lpte *pte;
1390
1391	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1392		("removing page for non-current pmap"));
1393
1394	pte = pmap_find_vhpt(va);
1395	if (pte) {
1396		pmap_remove_pte(pmap, pte, va, 0, 1);
1397		pmap_invalidate_page(pmap, va);
1398	}
1399	return;
1400}
1401
1402/*
1403 *	Remove the given range of addresses from the specified map.
1404 *
1405 *	It is assumed that the start and end are properly
1406 *	rounded to the page size.
1407 */
1408void
1409pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1410{
1411	pmap_t oldpmap;
1412	vm_offset_t va;
1413	pv_entry_t pv;
1414	struct ia64_lpte *pte;
1415
1416	if (pmap == NULL)
1417		return;
1418
1419	if (pmap->pm_stats.resident_count == 0)
1420		return;
1421
1422	oldpmap = pmap_install(pmap);
1423
1424	/*
1425	 * special handling of removing one page.  a very
1426	 * common operation and easy to short circuit some
1427	 * code.
1428	 */
1429	if (sva + PAGE_SIZE == eva) {
1430		pmap_remove_page(pmap, sva);
1431		pmap_install(oldpmap);
1432		return;
1433	}
1434
1435	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1436		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1437			va = pv->pv_va;
1438			if (va >= sva && va < eva) {
1439				pte = pmap_find_vhpt(va);
1440				pmap_remove_pte(pmap, pte, va, pv, 1);
1441				pmap_invalidate_page(pmap, va);
1442			}
1443		}
1444
1445	} else {
1446		for (va = sva; va < eva; va = va += PAGE_SIZE) {
1447			pte = pmap_find_vhpt(va);
1448			if (pte) {
1449				pmap_remove_pte(pmap, pte, va, 0, 1);
1450				pmap_invalidate_page(pmap, va);
1451			}
1452		}
1453	}
1454
1455	pmap_install(oldpmap);
1456}
1457
1458/*
1459 *	Routine:	pmap_remove_all
1460 *	Function:
1461 *		Removes this physical page from
1462 *		all physical maps in which it resides.
1463 *		Reflects back modify bits to the pager.
1464 *
1465 *	Notes:
1466 *		Original versions of this routine were very
1467 *		inefficient because they iteratively called
1468 *		pmap_remove (slow...)
1469 */
1470
1471static void
1472pmap_remove_all(vm_page_t m)
1473{
1474	pmap_t oldpmap;
1475	pv_entry_t pv;
1476	int nmodify;
1477	int s;
1478
1479	nmodify = 0;
1480#if defined(PMAP_DIAGNOSTIC)
1481	/*
1482	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1483	 * pages!
1484	 */
1485	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1486		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1487	}
1488#endif
1489
1490	s = splvm();
1491
1492	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1493		struct ia64_lpte *pte;
1494		pmap_t pmap = pv->pv_pmap;
1495		vm_offset_t va = pv->pv_va;
1496
1497		oldpmap = pmap_install(pmap);
1498		pte = pmap_find_vhpt(va);
1499		if (pmap_pte_pa(pte) != VM_PAGE_TO_PHYS(m))
1500			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1501		pmap_remove_pte(pmap, pte, va, pv, 1);
1502		pmap_invalidate_page(pmap, va);
1503		pmap_install(oldpmap);
1504	}
1505
1506	vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
1507
1508	splx(s);
1509	return;
1510}
1511
1512/*
1513 *	Set the physical protection on the
1514 *	specified range of this map as requested.
1515 */
1516void
1517pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1518{
1519	pmap_t oldpmap;
1520	struct ia64_lpte *pte;
1521	int newprot;
1522
1523	if (pmap == NULL)
1524		return;
1525
1526	oldpmap = pmap_install(pmap);
1527
1528	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1529		pmap_remove(pmap, sva, eva);
1530		pmap_install(oldpmap);
1531		return;
1532	}
1533
1534	if (prot & VM_PROT_WRITE) {
1535		pmap_install(oldpmap);
1536		return;
1537	}
1538
1539	newprot = pte_prot(pmap, prot);
1540
1541	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1542		panic("pmap_protect: unaligned addresses");
1543
1544	while (sva < eva) {
1545		/*
1546		 * If page is invalid, skip this page
1547		 */
1548		pte = pmap_find_vhpt(sva);
1549		if (!pte) {
1550			sva += PAGE_SIZE;
1551			continue;
1552		}
1553
1554		if (pmap_pte_prot(pte) != newprot) {
1555			if (pte->pte_ig & PTE_IG_MANAGED) {
1556				vm_offset_t pa = pmap_pte_pa(pte);
1557				vm_page_t m = PHYS_TO_VM_PAGE(pa);
1558				if (pte->pte_d) {
1559					if (pmap_track_modified(sva))
1560						vm_page_dirty(m);
1561					pte->pte_d = 0;
1562				}
1563				if (pte->pte_a) {
1564					vm_page_flag_set(m, PG_REFERENCED);
1565					pte->pte_a = 0;
1566				}
1567			}
1568			pmap_pte_set_prot(pte, newprot);
1569			pmap_update_vhpt(pte, sva);
1570			pmap_invalidate_page(pmap, sva);
1571		}
1572
1573		sva += PAGE_SIZE;
1574	}
1575	pmap_install(oldpmap);
1576}
1577
1578/*
1579 *	Insert the given physical page (p) at
1580 *	the specified virtual address (v) in the
1581 *	target physical map with the protection requested.
1582 *
1583 *	If specified, the page will be wired down, meaning
1584 *	that the related pte can not be reclaimed.
1585 *
1586 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1587 *	or lose information.  That is, this routine must actually
1588 *	insert this page into the given map NOW.
1589 */
1590void
1591pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1592	   boolean_t wired)
1593{
1594	pmap_t oldpmap;
1595	vm_offset_t pa;
1596	vm_offset_t opa;
1597	struct ia64_lpte origpte;
1598	struct ia64_lpte *pte;
1599	int managed;
1600
1601	if (pmap == NULL)
1602		return;
1603
1604	pmap_ensure_rid(pmap, va);
1605
1606	oldpmap = pmap_install(pmap);
1607
1608	va &= ~PAGE_MASK;
1609#ifdef PMAP_DIAGNOSTIC
1610	if (va > VM_MAX_KERNEL_ADDRESS)
1611		panic("pmap_enter: toobig");
1612#endif
1613
1614	/*
1615	 * Find (or create) a pte for the given mapping.
1616	 */
1617	pte = pmap_find_pte(va);
1618	origpte = *pte;
1619
1620	if (origpte.pte_p)
1621		opa = pmap_pte_pa(&origpte);
1622	else
1623		opa = 0;
1624	managed = 0;
1625
1626	pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK;
1627
1628	/*
1629	 * Mapping has not changed, must be protection or wiring change.
1630	 */
1631	if (origpte.pte_p && (opa == pa)) {
1632		/*
1633		 * Wiring change, just update stats. We don't worry about
1634		 * wiring PT pages as they remain resident as long as there
1635		 * are valid mappings in them. Hence, if a user page is wired,
1636		 * the PT page will be also.
1637		 */
1638		if (wired && ((origpte.pte_ig & PTE_IG_WIRED) == 0))
1639			pmap->pm_stats.wired_count++;
1640		else if (!wired && (origpte.pte_ig & PTE_IG_WIRED))
1641			pmap->pm_stats.wired_count--;
1642
1643		/*
1644		 * We might be turning off write access to the page,
1645		 * so we go ahead and sense modify status.
1646		 */
1647		if (origpte.pte_ig & PTE_IG_MANAGED) {
1648			if (origpte.pte_d && pmap_track_modified(va)) {
1649				vm_page_t om;
1650				om = PHYS_TO_VM_PAGE(opa);
1651				vm_page_dirty(om);
1652			}
1653		}
1654
1655		managed = origpte.pte_ig & PTE_IG_MANAGED;
1656		goto validate;
1657	}
1658	/*
1659	 * Mapping has changed, invalidate old range and fall
1660	 * through to handle validating new mapping.
1661	 */
1662	if (opa) {
1663		int error;
1664		error = pmap_remove_pte(pmap, pte, va, 0, 0);
1665		if (error)
1666			panic("pmap_enter: pte vanished, va: 0x%lx", va);
1667	}
1668
1669	/*
1670	 * Enter on the PV list if part of our managed memory.
1671	 */
1672	if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
1673		pmap_insert_entry(pmap, va, m);
1674		managed |= PTE_IG_MANAGED;
1675	}
1676
1677	/*
1678	 * Increment counters
1679	 */
1680	pmap->pm_stats.resident_count++;
1681	if (wired)
1682		pmap->pm_stats.wired_count++;
1683
1684validate:
1685
1686	/*
1687	 * Now validate mapping with desired protection/wiring. This
1688	 * adds the pte to the VHPT if necessary.
1689	 */
1690	pmap_set_pte(pte, va, pa, managed | (wired ? PTE_IG_WIRED : 0),
1691		     pte_prot_pl(pmap, prot), pte_prot_ar(pmap, prot));
1692
1693	/*
1694	 * if the mapping or permission bits are different, we need
1695	 * to invalidate the page.
1696	 */
1697	if (!pmap_equal_pte(&origpte, pte))
1698		pmap_invalidate_page(pmap, va);
1699
1700	pmap_install(oldpmap);
1701}
1702
1703/*
1704 * this code makes some *MAJOR* assumptions:
1705 * 1. Current pmap & pmap exists.
1706 * 2. Not wired.
1707 * 3. Read access.
1708 * 4. No page table pages.
1709 * 5. Tlbflush is deferred to calling procedure.
1710 * 6. Page IS managed.
1711 * but is *MUCH* faster than pmap_enter...
1712 */
1713
1714static void
1715pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
1716{
1717	struct ia64_lpte *pte;
1718	pmap_t oldpmap;
1719
1720	pmap_ensure_rid(pmap, va);
1721
1722	oldpmap = pmap_install(pmap);
1723
1724	pte = pmap_find_pte(va);
1725	if (pte->pte_p)
1726		return;
1727
1728	/*
1729	 * Enter on the PV list since its part of our managed memory.
1730	 */
1731	pmap_insert_entry(pmap, va, m);
1732
1733	/*
1734	 * Increment counters
1735	 */
1736	pmap->pm_stats.resident_count++;
1737
1738	/*
1739	 * Initialise PTE with read-only protection and enter into VHPT.
1740	 */
1741	pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m),
1742		     PTE_IG_MANAGED,
1743		     PTE_PL_USER, PTE_AR_R);
1744
1745	pmap_install(oldpmap);
1746}
1747
1748/*
1749 * Make temporary mapping for a physical address. This is called
1750 * during dump.
1751 */
1752void *
1753pmap_kenter_temporary(vm_offset_t pa, int i)
1754{
1755	return (void *) IA64_PHYS_TO_RR7(pa - (i * PAGE_SIZE));
1756}
1757
1758#define MAX_INIT_PT (96)
1759/*
1760 * pmap_object_init_pt preloads the ptes for a given object
1761 * into the specified pmap.  This eliminates the blast of soft
1762 * faults on process startup and immediately after an mmap.
1763 */
1764void
1765pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1766		    vm_object_t object, vm_pindex_t pindex,
1767		    vm_size_t size, int limit)
1768{
1769	pmap_t oldpmap;
1770	vm_offset_t tmpidx;
1771	int psize;
1772	vm_page_t p;
1773	int objpgs;
1774
1775	if (pmap == NULL || object == NULL)
1776		return;
1777
1778	oldpmap = pmap_install(pmap);
1779
1780	psize = ia64_btop(size);
1781
1782	if ((object->type != OBJT_VNODE) ||
1783		((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
1784			(object->resident_page_count > MAX_INIT_PT))) {
1785		pmap_install(oldpmap);
1786		return;
1787	}
1788
1789	if (psize + pindex > object->size) {
1790		if (object->size < pindex)
1791			return;
1792		psize = object->size - pindex;
1793	}
1794
1795	/*
1796	 * if we are processing a major portion of the object, then scan the
1797	 * entire thing.
1798	 */
1799	if (psize > (object->resident_page_count >> 2)) {
1800		objpgs = psize;
1801
1802		for (p = TAILQ_FIRST(&object->memq);
1803		    ((objpgs > 0) && (p != NULL));
1804		    p = TAILQ_NEXT(p, listq)) {
1805
1806			tmpidx = p->pindex;
1807			if (tmpidx < pindex) {
1808				continue;
1809			}
1810			tmpidx -= pindex;
1811			if (tmpidx >= psize) {
1812				continue;
1813			}
1814			/*
1815			 * don't allow an madvise to blow away our really
1816			 * free pages allocating pv entries.
1817			 */
1818			if ((limit & MAP_PREFAULT_MADVISE) &&
1819			    cnt.v_free_count < cnt.v_free_reserved) {
1820				break;
1821			}
1822			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1823				(p->busy == 0) &&
1824			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1825				if ((p->queue - p->pc) == PQ_CACHE)
1826					vm_page_deactivate(p);
1827				vm_page_busy(p);
1828				pmap_enter_quick(pmap,
1829						 addr + ia64_ptob(tmpidx), p);
1830				vm_page_flag_set(p, PG_MAPPED);
1831				vm_page_wakeup(p);
1832			}
1833			objpgs -= 1;
1834		}
1835	} else {
1836		/*
1837		 * else lookup the pages one-by-one.
1838		 */
1839		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
1840			/*
1841			 * don't allow an madvise to blow away our really
1842			 * free pages allocating pv entries.
1843			 */
1844			if ((limit & MAP_PREFAULT_MADVISE) &&
1845			    cnt.v_free_count < cnt.v_free_reserved) {
1846				break;
1847			}
1848			p = vm_page_lookup(object, tmpidx + pindex);
1849			if (p &&
1850			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1851				(p->busy == 0) &&
1852			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1853				if ((p->queue - p->pc) == PQ_CACHE)
1854					vm_page_deactivate(p);
1855				vm_page_busy(p);
1856				pmap_enter_quick(pmap,
1857						 addr + ia64_ptob(tmpidx), p);
1858				vm_page_flag_set(p, PG_MAPPED);
1859				vm_page_wakeup(p);
1860			}
1861		}
1862	}
1863	pmap_install(oldpmap);
1864	return;
1865}
1866
1867/*
1868 * pmap_prefault provides a quick way of clustering
1869 * pagefaults into a processes address space.  It is a "cousin"
1870 * of pmap_object_init_pt, except it runs at page fault time instead
1871 * of mmap time.
1872 */
1873#define PFBAK 4
1874#define PFFOR 4
1875#define PAGEORDER_SIZE (PFBAK+PFFOR)
1876
1877static int pmap_prefault_pageorder[] = {
1878	-PAGE_SIZE, PAGE_SIZE,
1879	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
1880	-3 * PAGE_SIZE, 3 * PAGE_SIZE
1881	-4 * PAGE_SIZE, 4 * PAGE_SIZE
1882};
1883
1884void
1885pmap_prefault(pmap, addra, entry)
1886	pmap_t pmap;
1887	vm_offset_t addra;
1888	vm_map_entry_t entry;
1889{
1890	int i;
1891	vm_offset_t starta;
1892	vm_offset_t addr;
1893	vm_pindex_t pindex;
1894	vm_page_t m, mpte;
1895	vm_object_t object;
1896
1897	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)))
1898		return;
1899
1900	object = entry->object.vm_object;
1901
1902	starta = addra - PFBAK * PAGE_SIZE;
1903	if (starta < entry->start) {
1904		starta = entry->start;
1905	} else if (starta > addra) {
1906		starta = 0;
1907	}
1908
1909	mpte = NULL;
1910	for (i = 0; i < PAGEORDER_SIZE; i++) {
1911		vm_object_t lobject;
1912		struct ia64_lpte *pte;
1913
1914		addr = addra + pmap_prefault_pageorder[i];
1915		if (addr > addra + (PFFOR * PAGE_SIZE))
1916			addr = 0;
1917
1918		if (addr < starta || addr >= entry->end)
1919			continue;
1920
1921		pte = pmap_find_vhpt(addr);
1922		if (pte && pte->pte_p)
1923			continue;
1924
1925		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
1926		lobject = object;
1927		for (m = vm_page_lookup(lobject, pindex);
1928		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
1929		    lobject = lobject->backing_object) {
1930			if (lobject->backing_object_offset & PAGE_MASK)
1931				break;
1932			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
1933			m = vm_page_lookup(lobject->backing_object, pindex);
1934		}
1935
1936		/*
1937		 * give-up when a page is not in memory
1938		 */
1939		if (m == NULL)
1940			break;
1941
1942		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1943			(m->busy == 0) &&
1944		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1945
1946			if ((m->queue - m->pc) == PQ_CACHE) {
1947				vm_page_deactivate(m);
1948			}
1949			vm_page_busy(m);
1950			pmap_enter_quick(pmap, addr, m);
1951			vm_page_flag_set(m, PG_MAPPED);
1952			vm_page_wakeup(m);
1953		}
1954	}
1955}
1956
1957/*
1958 *	Routine:	pmap_change_wiring
1959 *	Function:	Change the wiring attribute for a map/virtual-address
1960 *			pair.
1961 *	In/out conditions:
1962 *			The mapping must already exist in the pmap.
1963 */
1964void
1965pmap_change_wiring(pmap, va, wired)
1966	register pmap_t pmap;
1967	vm_offset_t va;
1968	boolean_t wired;
1969{
1970	pmap_t oldpmap;
1971	struct ia64_lpte *pte;
1972
1973	if (pmap == NULL)
1974		return;
1975
1976	oldpmap = pmap_install(pmap);
1977
1978	pte = pmap_find_vhpt(va);
1979
1980	if (wired && !pmap_pte_w(pte))
1981		pmap->pm_stats.wired_count++;
1982	else if (!wired && pmap_pte_w(pte))
1983		pmap->pm_stats.wired_count--;
1984
1985	/*
1986	 * Wiring is not a hardware characteristic so there is no need to
1987	 * invalidate TLB.
1988	 */
1989	pmap_pte_set_w(pte, wired);
1990
1991	pmap_install(oldpmap);
1992}
1993
1994
1995
1996/*
1997 *	Copy the range specified by src_addr/len
1998 *	from the source map to the range dst_addr/len
1999 *	in the destination map.
2000 *
2001 *	This routine is only advisory and need not do anything.
2002 */
2003
2004void
2005pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2006	  vm_offset_t src_addr)
2007{
2008}
2009
2010
2011/*
2012 *	pmap_zero_page zeros the specified hardware page by
2013 *	mapping it into virtual memory and using bzero to clear
2014 *	its contents.
2015 */
2016
2017void
2018pmap_zero_page(vm_page_t m)
2019{
2020	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2021	bzero((caddr_t) va, PAGE_SIZE);
2022}
2023
2024
2025/*
2026 *	pmap_zero_page_area zeros the specified hardware page by
2027 *	mapping it into virtual memory and using bzero to clear
2028 *	its contents.
2029 *
2030 *	off and size must reside within a single page.
2031 */
2032
2033void
2034pmap_zero_page_area(vm_page_t m, int off, int size)
2035{
2036	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2037	bzero((char *)(caddr_t)va + off, size);
2038}
2039
2040
2041/*
2042 *	pmap_zero_page_idle zeros the specified hardware page by
2043 *	mapping it into virtual memory and using bzero to clear
2044 *	its contents.  This is for the vm_idlezero process.
2045 */
2046
2047void
2048pmap_zero_page_idle(vm_page_t m)
2049{
2050	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2051	bzero((caddr_t) va, PAGE_SIZE);
2052}
2053
2054
2055/*
2056 *	pmap_copy_page copies the specified (machine independent)
2057 *	page by mapping the page into virtual memory and using
2058 *	bcopy to copy the page, one machine dependent page at a
2059 *	time.
2060 */
2061void
2062pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2063{
2064	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
2065	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
2066	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
2067}
2068
2069
2070/*
2071 *	Routine:	pmap_pageable
2072 *	Function:
2073 *		Make the specified pages (by pmap, offset)
2074 *		pageable (or not) as requested.
2075 *
2076 *		A page which is not pageable may not take
2077 *		a fault; therefore, its page table entry
2078 *		must remain valid for the duration.
2079 *
2080 *		This routine is merely advisory; pmap_enter
2081 *		will specify that these pages are to be wired
2082 *		down (or not) as appropriate.
2083 */
2084void
2085pmap_pageable(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
2086	      boolean_t pageable)
2087{
2088}
2089
2090/*
2091 * Returns true if the pmap's pv is one of the first
2092 * 16 pvs linked to from this page.  This count may
2093 * be changed upwards or downwards in the future; it
2094 * is only necessary that true be returned for a small
2095 * subset of pmaps for proper page aging.
2096 */
2097boolean_t
2098pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2099{
2100	pv_entry_t pv;
2101	int loops = 0;
2102	int s;
2103
2104	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2105		return FALSE;
2106
2107	s = splvm();
2108
2109	/*
2110	 * Not found, check current mappings returning immediately if found.
2111	 */
2112	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2113		if (pv->pv_pmap == pmap) {
2114			splx(s);
2115			return TRUE;
2116		}
2117		loops++;
2118		if (loops >= 16)
2119			break;
2120	}
2121	splx(s);
2122	return (FALSE);
2123}
2124
2125#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2126/*
2127 * Remove all pages from specified address space
2128 * this aids process exit speeds.  Also, this code
2129 * is special cased for current process only, but
2130 * can have the more generic (and slightly slower)
2131 * mode enabled.  This is much faster than pmap_remove
2132 * in the case of running down an entire address space.
2133 */
2134void
2135pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2136{
2137	pv_entry_t pv, npv;
2138	int s;
2139
2140#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2141	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
2142		printf("warning: pmap_remove_pages called with non-current pmap\n");
2143		return;
2144	}
2145#endif
2146
2147	s = splvm();
2148	for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
2149		pv;
2150		pv = npv) {
2151		struct ia64_lpte *pte;
2152
2153		npv = TAILQ_NEXT(pv, pv_plist);
2154
2155		if (pv->pv_va >= eva || pv->pv_va < sva) {
2156			continue;
2157		}
2158
2159		pte = pmap_find_vhpt(pv->pv_va);
2160		if (!pte)
2161			panic("pmap_remove_pages: page on pm_pvlist has no pte\n");
2162
2163
2164/*
2165 * We cannot remove wired pages from a process' mapping at this time
2166 */
2167		if (pte->pte_ig & PTE_IG_WIRED) {
2168			continue;
2169		}
2170
2171		pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
2172	}
2173	splx(s);
2174
2175	pmap_invalidate_all(pmap);
2176}
2177
2178/*
2179 *      pmap_page_protect:
2180 *
2181 *      Lower the permission for all mappings to a given page.
2182 */
2183void
2184pmap_page_protect(vm_page_t m, vm_prot_t prot)
2185{
2186	pv_entry_t pv;
2187
2188	if ((prot & VM_PROT_WRITE) != 0)
2189		return;
2190	if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
2191		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2192			int newprot = pte_prot(pv->pv_pmap, prot);
2193			pmap_t oldpmap = pmap_install(pv->pv_pmap);
2194			struct ia64_lpte *pte;
2195			pte = pmap_find_vhpt(pv->pv_va);
2196			pmap_pte_set_prot(pte, newprot);
2197			pmap_update_vhpt(pte, pv->pv_va);
2198			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2199			pmap_install(oldpmap);
2200		}
2201	} else {
2202		pmap_remove_all(m);
2203	}
2204}
2205
2206vm_offset_t
2207pmap_phys_address(int ppn)
2208{
2209	return (ia64_ptob(ppn));
2210}
2211
2212/*
2213 *	pmap_ts_referenced:
2214 *
2215 *	Return a count of reference bits for a page, clearing those bits.
2216 *	It is not necessary for every reference bit to be cleared, but it
2217 *	is necessary that 0 only be returned when there are truly no
2218 *	reference bits set.
2219 *
2220 *	XXX: The exact number of bits to check and clear is a matter that
2221 *	should be tested and standardized at some point in the future for
2222 *	optimal aging of shared pages.
2223 */
2224int
2225pmap_ts_referenced(vm_page_t m)
2226{
2227	pv_entry_t pv;
2228	int count = 0;
2229
2230	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2231		return 0;
2232
2233	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2234		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2235		struct ia64_lpte *pte;
2236		pte = pmap_find_vhpt(pv->pv_va);
2237		if (pte->pte_a) {
2238			count++;
2239			pte->pte_a = 0;
2240			pmap_update_vhpt(pte, pv->pv_va);
2241			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2242		}
2243		pmap_install(oldpmap);
2244	}
2245
2246	return count;
2247}
2248
2249#if 0
2250/*
2251 *	pmap_is_referenced:
2252 *
2253 *	Return whether or not the specified physical page was referenced
2254 *	in any physical maps.
2255 */
2256static boolean_t
2257pmap_is_referenced(vm_page_t m)
2258{
2259	pv_entry_t pv;
2260
2261	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2262		return FALSE;
2263
2264	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2265		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2266		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2267		pmap_install(oldpmap);
2268		if (pte->pte_a)
2269			return 1;
2270	}
2271
2272	return 0;
2273}
2274#endif
2275
2276/*
2277 *	pmap_is_modified:
2278 *
2279 *	Return whether or not the specified physical page was modified
2280 *	in any physical maps.
2281 */
2282boolean_t
2283pmap_is_modified(vm_page_t m)
2284{
2285	pv_entry_t pv;
2286
2287	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2288		return FALSE;
2289
2290	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2291		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2292		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2293		pmap_install(oldpmap);
2294		if (pte->pte_d)
2295			return 1;
2296	}
2297
2298	return 0;
2299}
2300
2301/*
2302 *	Clear the modify bits on the specified physical page.
2303 */
2304void
2305pmap_clear_modify(vm_page_t m)
2306{
2307	pv_entry_t pv;
2308
2309	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2310		return;
2311
2312	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2313		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2314		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2315		if (pte->pte_d) {
2316			pte->pte_d = 0;
2317			pmap_update_vhpt(pte, pv->pv_va);
2318			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2319		}
2320		pmap_install(oldpmap);
2321	}
2322}
2323
2324/*
2325 *	pmap_clear_reference:
2326 *
2327 *	Clear the reference bit on the specified physical page.
2328 */
2329void
2330pmap_clear_reference(vm_page_t m)
2331{
2332	pv_entry_t pv;
2333
2334	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2335		return;
2336
2337	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2338		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2339		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2340		if (pte->pte_a) {
2341			pte->pte_a = 0;
2342			pmap_update_vhpt(pte, pv->pv_va);
2343			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2344		}
2345		pmap_install(oldpmap);
2346	}
2347}
2348
2349/*
2350 * Miscellaneous support routines follow
2351 */
2352
2353static void
2354ia64_protection_init()
2355{
2356	int prot, *kp, *up;
2357
2358	kp = protection_codes[0];
2359	up = protection_codes[1];
2360
2361	for (prot = 0; prot < 8; prot++) {
2362		switch (prot) {
2363		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
2364			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2365			*up++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2366			break;
2367
2368		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
2369			*kp++ = (PTE_AR_X_RX << 2) | PTE_PL_KERN;
2370			*up++ = (PTE_AR_X_RX << 2) | PTE_PL_USER;
2371			break;
2372
2373		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
2374			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2375			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2376			break;
2377
2378		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
2379			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2380			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2381			break;
2382
2383		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
2384			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2385			*up++ = (PTE_AR_R << 2) | PTE_PL_USER;
2386			break;
2387
2388		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
2389			*kp++ = (PTE_AR_RX << 2) | PTE_PL_KERN;
2390			*up++ = (PTE_AR_RX << 2) | PTE_PL_USER;
2391			break;
2392
2393		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
2394			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2395			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2396			break;
2397
2398		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
2399			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2400			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2401			break;
2402		}
2403	}
2404}
2405
2406/*
2407 * Map a set of physical memory pages into the kernel virtual
2408 * address space. Return a pointer to where it is mapped. This
2409 * routine is intended to be used for mapping device memory,
2410 * NOT real memory.
2411 */
2412void *
2413pmap_mapdev(vm_offset_t pa, vm_size_t size)
2414{
2415	return (void*) IA64_PHYS_TO_RR6(pa);
2416}
2417
2418/*
2419 * 'Unmap' a range mapped by pmap_mapdev().
2420 */
2421void
2422pmap_unmapdev(vm_offset_t va, vm_size_t size)
2423{
2424	return;
2425}
2426
2427/*
2428 * perform the pmap work for mincore
2429 */
2430int
2431pmap_mincore(pmap_t pmap, vm_offset_t addr)
2432{
2433	pmap_t oldpmap;
2434	struct ia64_lpte *pte;
2435	int val = 0;
2436
2437	oldpmap = pmap_install(pmap);
2438	pte = pmap_find_vhpt(addr);
2439	pmap_install(oldpmap);
2440
2441	if (!pte)
2442		return 0;
2443
2444	if (pmap_pte_v(pte)) {
2445		vm_page_t m;
2446		vm_offset_t pa;
2447
2448		val = MINCORE_INCORE;
2449		if ((pte->pte_ig & PTE_IG_MANAGED) == 0)
2450			return val;
2451
2452		pa = pmap_pte_pa(pte);
2453
2454		m = PHYS_TO_VM_PAGE(pa);
2455
2456		/*
2457		 * Modified by us
2458		 */
2459		if (pte->pte_d)
2460			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2461		/*
2462		 * Modified by someone
2463		 */
2464		else if (pmap_is_modified(m))
2465			val |= MINCORE_MODIFIED_OTHER;
2466		/*
2467		 * Referenced by us
2468		 */
2469		if (pte->pte_a)
2470			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2471
2472		/*
2473		 * Referenced by someone
2474		 */
2475		else if (pmap_ts_referenced(m)) {
2476			val |= MINCORE_REFERENCED_OTHER;
2477			vm_page_flag_set(m, PG_REFERENCED);
2478		}
2479	}
2480	return val;
2481}
2482
2483void
2484pmap_activate(struct thread *td)
2485{
2486	pmap_install(vmspace_pmap(td->td_proc->p_vmspace));
2487}
2488
2489pmap_t
2490pmap_install(pmap_t pmap)
2491{
2492	pmap_t oldpmap;
2493	int i;
2494
2495	critical_enter();
2496
2497	oldpmap = PCPU_GET(current_pmap);
2498
2499	if (pmap == oldpmap || pmap == kernel_pmap) {
2500		critical_exit();
2501		return pmap;
2502	}
2503
2504	if (oldpmap) {
2505		atomic_clear_32(&pmap->pm_active, PCPU_GET(cpumask));
2506	}
2507
2508	PCPU_SET(current_pmap, pmap);
2509	if (!pmap) {
2510		/*
2511		 * RIDs 0..4 have no mappings to make sure we generate
2512		 * page faults on accesses.
2513		 */
2514		ia64_set_rr(IA64_RR_BASE(0), (0 << 8)|(PAGE_SHIFT << 2)|1);
2515		ia64_set_rr(IA64_RR_BASE(1), (1 << 8)|(PAGE_SHIFT << 2)|1);
2516		ia64_set_rr(IA64_RR_BASE(2), (2 << 8)|(PAGE_SHIFT << 2)|1);
2517		ia64_set_rr(IA64_RR_BASE(3), (3 << 8)|(PAGE_SHIFT << 2)|1);
2518		ia64_set_rr(IA64_RR_BASE(4), (4 << 8)|(PAGE_SHIFT << 2)|1);
2519		critical_exit();
2520		return oldpmap;
2521	}
2522
2523	atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask));
2524
2525	for (i = 0; i < 5; i++)
2526		ia64_set_rr(IA64_RR_BASE(i),
2527			    (pmap->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2528
2529	critical_exit();
2530	return oldpmap;
2531}
2532
2533vm_offset_t
2534pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2535{
2536
2537	return addr;
2538}
2539
2540#include "opt_ddb.h"
2541
2542#ifdef DDB
2543
2544#include <ddb/ddb.h>
2545
2546static const char*	psnames[] = {
2547	"1B",	"2B",	"4B",	"8B",
2548	"16B",	"32B",	"64B",	"128B",
2549	"256B",	"512B",	"1K",	"2K",
2550	"4K",	"8K",	"16K",	"32K",
2551	"64K",	"128K",	"256K",	"512K",
2552	"1M",	"2M",	"4M",	"8M",
2553	"16M",	"32M",	"64M",	"128M",
2554	"256M",	"512M",	"1G",	"2G"
2555};
2556
2557static void
2558print_trs(int type)
2559{
2560	struct ia64_pal_result	res;
2561	int			i, maxtr;
2562	struct {
2563		struct ia64_pte	pte;
2564		struct ia64_itir itir;
2565		struct ia64_ifa ifa;
2566		struct ia64_rr	rr;
2567	}			buf;
2568	static const char*	manames[] = {
2569		"WB",	"bad",	"bad",	"bad",
2570		"UC",	"UCE",	"WC",	"NaT",
2571
2572	};
2573
2574	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2575	if (res.pal_status != 0) {
2576		db_printf("Can't get VM summary\n");
2577		return;
2578	}
2579
2580	if (type == 0)
2581		maxtr = (res.pal_result[0] >> 40) & 0xff;
2582	else
2583		maxtr = (res.pal_result[0] >> 32) & 0xff;
2584
2585	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2586	for (i = 0; i <= maxtr; i++) {
2587		bzero(&buf, sizeof(buf));
2588		res = ia64_call_pal_stacked_physical
2589			(PAL_VM_TR_READ, i, type, ia64_tpa((u_int64_t) &buf));
2590		if (!(res.pal_result[0] & 1))
2591			buf.pte.pte_ar = 0;
2592		if (!(res.pal_result[0] & 2))
2593			buf.pte.pte_pl = 0;
2594		if (!(res.pal_result[0] & 4))
2595			buf.pte.pte_d = 0;
2596		if (!(res.pal_result[0] & 8))
2597			buf.pte.pte_ma = 0;
2598		db_printf(
2599			"%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s %d %06x\n",
2600			buf.ifa.ifa_ig & 1,
2601			buf.rr.rr_rid,
2602			buf.ifa.ifa_vpn,
2603			buf.pte.pte_ppn,
2604			psnames[buf.itir.itir_ps],
2605			buf.pte.pte_ed,
2606			buf.pte.pte_ar,
2607			buf.pte.pte_pl,
2608			buf.pte.pte_d,
2609			buf.pte.pte_a,
2610			manames[buf.pte.pte_ma],
2611			buf.pte.pte_p,
2612			buf.itir.itir_key);
2613	}
2614}
2615
2616DB_COMMAND(itr, db_itr)
2617{
2618	print_trs(0);
2619}
2620
2621DB_COMMAND(dtr, db_dtr)
2622{
2623	print_trs(1);
2624}
2625
2626DB_COMMAND(rr, db_rr)
2627{
2628	int i;
2629	u_int64_t t;
2630	struct ia64_rr rr;
2631
2632	printf("RR RID    PgSz VE\n");
2633	for (i = 0; i < 8; i++) {
2634		__asm __volatile ("mov %0=rr[%1]"
2635				  : "=r"(t)
2636				  : "r"(IA64_RR_BASE(i)));
2637		*(u_int64_t *) &rr = t;
2638		printf("%d  %06x %4s %d\n",
2639		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2640	}
2641}
2642
2643DB_COMMAND(thash, db_thash)
2644{
2645	if (!have_addr)
2646		return;
2647
2648	db_printf("%p\n", (void *) ia64_thash(addr));
2649}
2650
2651DB_COMMAND(ttag, db_ttag)
2652{
2653	if (!have_addr)
2654		return;
2655
2656	db_printf("0x%lx\n", ia64_ttag(addr));
2657}
2658
2659#endif
2660