pmap.c revision 94779
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 * $FreeBSD: head/sys/ia64/ia64/pmap.c 94779 2002-04-15 16:07:52Z peter $
47 */
48
49/*
50 *	Manages physical address maps.
51 *
52 *	In addition to hardware address maps, this
53 *	module is called upon to provide software-use-only
54 *	maps which may or may not be stored in the same
55 *	form as hardware maps.  These pseudo-maps are
56 *	used to store intermediate results from copy
57 *	operations to and from address spaces.
58 *
59 *	Since the information managed by this module is
60 *	also stored by the logical address mapping module,
61 *	this module may throw away valid virtual-to-physical
62 *	mappings at almost any time.  However, invalidations
63 *	of virtual-to-physical mappings must be done as
64 *	requested.
65 *
66 *	In order to cope with hardware architectures which
67 *	make virtual-to-physical map invalidates expensive,
68 *	this module may delay invalidate or reduced protection
69 *	operations until such time as they are actually
70 *	necessary.  This module is given full information as
71 *	to which processors are currently using which maps,
72 *	and to when physical maps must be made correct.
73 */
74
75/*
76 * Following the Linux model, region IDs are allocated in groups of
77 * eight so that a single region ID can be used for as many RRs as we
78 * want by encoding the RR number into the low bits of the ID.
79 *
80 * We reserve region ID 0 for the kernel and allocate the remaining
81 * IDs for user pmaps.
82 *
83 * Region 0..4
84 *	User virtually mapped
85 *
86 * Region 5
87 *	Kernel virtually mapped
88 *
89 * Region 6
90 *	Kernel physically mapped uncacheable
91 *
92 * Region 7
93 *	Kernel physically mapped cacheable
94 */
95
96#include <sys/param.h>
97#include <sys/kernel.h>
98#include <sys/lock.h>
99#include <sys/malloc.h>
100#include <sys/mman.h>
101#include <sys/msgbuf.h>
102#include <sys/mutex.h>
103#include <sys/proc.h>
104#include <sys/sx.h>
105#include <sys/systm.h>
106#include <sys/vmmeter.h>
107#include <sys/smp.h>
108#include <sys/sysctl.h>
109
110#include <vm/vm.h>
111#include <vm/vm_param.h>
112#include <vm/vm_kern.h>
113#include <vm/vm_page.h>
114#include <vm/vm_map.h>
115#include <vm/vm_object.h>
116#include <vm/vm_extern.h>
117#include <vm/vm_pageout.h>
118#include <vm/vm_pager.h>
119#include <vm/uma.h>
120
121#include <sys/user.h>
122
123#include <machine/pal.h>
124#include <machine/md_var.h>
125
126MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
127
128#ifndef PMAP_SHPGPERPROC
129#define PMAP_SHPGPERPROC 200
130#endif
131
132#if defined(DIAGNOSTIC)
133#define PMAP_DIAGNOSTIC
134#endif
135
136#define MINPV 2048
137
138#if 0
139#define PMAP_DIAGNOSTIC
140#define PMAP_DEBUG
141#endif
142
143#if !defined(PMAP_DIAGNOSTIC)
144#define PMAP_INLINE __inline
145#else
146#define PMAP_INLINE
147#endif
148
149/*
150 * Get PDEs and PTEs for user/kernel address space
151 */
152#define pmap_pte_w(pte)		((pte)->pte_ig & PTE_IG_WIRED)
153#define pmap_pte_managed(pte)	((pte)->pte_ig & PTE_IG_MANAGED)
154#define pmap_pte_v(pte)		((pte)->pte_p)
155#define pmap_pte_pa(pte)	(((pte)->pte_ppn) << 12)
156#define pmap_pte_prot(pte)	(((pte)->pte_ar << 2) | (pte)->pte_pl)
157
158#define pmap_pte_set_w(pte, v) ((v)?((pte)->pte_ig |= PTE_IG_WIRED) \
159				:((pte)->pte_ig &= ~PTE_IG_WIRED))
160#define pmap_pte_set_prot(pte, v) do {		\
161    (pte)->pte_ar = v >> 2;			\
162    (pte)->pte_pl = v & 3;			\
163} while (0)
164
165/*
166 * Given a map and a machine independent protection code,
167 * convert to an ia64 protection code.
168 */
169#define pte_prot(m, p)		(protection_codes[m == pmap_kernel() ? 0 : 1][p])
170#define pte_prot_pl(m, p)	(pte_prot(m, p) & 3)
171#define pte_prot_ar(m, p)	(pte_prot(m, p) >> 2)
172int	protection_codes[2][8];
173
174/*
175 * Return non-zero if this pmap is currently active
176 */
177#define pmap_isactive(pmap)	(pmap->pm_active)
178
179/*
180 * Statically allocated kernel pmap
181 */
182static struct pmap kernel_pmap_store;
183pmap_t kernel_pmap;
184
185vm_offset_t avail_start;	/* PA of first available physical page */
186vm_offset_t avail_end;		/* PA of last available physical page */
187vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
188vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
189static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
190
191vm_offset_t vhpt_base, vhpt_size;
192
193/*
194 * We use an object to own the kernel's 'page tables'. For simplicity,
195 * we use one page directory to index a set of pages containing
196 * ia64_lptes. This gives us up to 2Gb of kernel virtual space.
197 */
198static vm_object_t kptobj;
199static int nkpt;
200static struct ia64_lpte **kptdir;
201#define KPTE_DIR_INDEX(va) \
202	((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
203#define KPTE_PTE_INDEX(va) \
204	((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
205#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
206
207vm_offset_t kernel_vm_end;
208
209/*
210 * Values for ptc.e. XXX values for SKI.
211 */
212static u_int64_t pmap_ptc_e_base = 0x100000000;
213static u_int64_t pmap_ptc_e_count1 = 3;
214static u_int64_t pmap_ptc_e_count2 = 2;
215static u_int64_t pmap_ptc_e_stride1 = 0x2000;
216static u_int64_t pmap_ptc_e_stride2 = 0x100000000;
217
218/*
219 * Data for the RID allocator
220 */
221static u_int64_t *pmap_ridbusy;
222static int pmap_ridmax, pmap_ridcount;
223struct mtx pmap_ridmutex;
224
225/*
226 * Data for the pv entry allocation mechanism
227 */
228static uma_zone_t pvzone;
229static struct vm_object pvzone_obj;
230static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
231static int pmap_pagedaemon_waken = 0;
232static struct pv_entry *pvbootentries;
233static int pvbootnext, pvbootmax;
234
235/*
236 * Data for allocating PTEs for user processes.
237 */
238static uma_zone_t ptezone;
239static struct vm_object ptezone_obj;
240#if 0
241static struct ia64_lpte *pteinit;
242#endif
243
244/*
245 * VHPT instrumentation.
246 */
247static int pmap_vhpt_inserts;
248static int pmap_vhpt_collisions;
249static int pmap_vhpt_resident;
250SYSCTL_DECL(_vm_stats);
251SYSCTL_NODE(_vm_stats, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
252SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
253	   &pmap_vhpt_inserts, 0, "");
254SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, collisions, CTLFLAG_RD,
255	   &pmap_vhpt_collisions, 0, "");
256SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, resident, CTLFLAG_RD,
257	   &pmap_vhpt_resident, 0, "");
258
259static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
260static pv_entry_t get_pv_entry(void);
261static void	ia64_protection_init(void);
262
263static void	pmap_invalidate_all(pmap_t pmap);
264static void	pmap_remove_all(vm_page_t m);
265static void	pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m);
266static void	*pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
267
268vm_offset_t
269pmap_steal_memory(vm_size_t size)
270{
271	vm_size_t bank_size;
272	vm_offset_t pa, va;
273
274	size = round_page(size);
275
276	bank_size = phys_avail[1] - phys_avail[0];
277	while (size > bank_size) {
278		int i;
279		for (i = 0; phys_avail[i+2]; i+= 2) {
280			phys_avail[i] = phys_avail[i+2];
281			phys_avail[i+1] = phys_avail[i+3];
282		}
283		phys_avail[i] = 0;
284		phys_avail[i+1] = 0;
285		if (!phys_avail[0])
286			panic("pmap_steal_memory: out of memory");
287		bank_size = phys_avail[1] - phys_avail[0];
288	}
289
290	pa = phys_avail[0];
291	phys_avail[0] += size;
292
293	va = IA64_PHYS_TO_RR7(pa);
294	bzero((caddr_t) va, size);
295	return va;
296}
297
298/*
299 *	Bootstrap the system enough to run with virtual memory.
300 */
301void
302pmap_bootstrap()
303{
304	int i, j, count, ridbits;
305	struct ia64_pal_result res;
306
307	/*
308	 * Query the PAL Code to find the loop parameters for the
309	 * ptc.e instruction.
310	 */
311	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
312	if (res.pal_status != 0)
313		panic("Can't configure ptc.e parameters");
314	pmap_ptc_e_base = res.pal_result[0];
315	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
316	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
317	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
318	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
319	if (bootverbose)
320		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
321		       "stride1=0x%lx, stride2=0x%lx\n",
322		       pmap_ptc_e_base,
323		       pmap_ptc_e_count1,
324		       pmap_ptc_e_count2,
325		       pmap_ptc_e_stride1,
326		       pmap_ptc_e_stride2);
327
328	/*
329	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
330	 */
331	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
332	if (res.pal_status != 0) {
333		if (bootverbose)
334			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
335		ridbits = 18; /* guaranteed minimum */
336	} else {
337		ridbits = (res.pal_result[1] >> 8) & 0xff;
338		if (bootverbose)
339			printf("Processor supports %d Region ID bits\n",
340			       ridbits);
341	}
342	pmap_ridmax = (1 << ridbits);
343	pmap_ridcount = 8;
344	pmap_ridbusy = (u_int64_t *)
345		pmap_steal_memory(pmap_ridmax / 8);
346	bzero(pmap_ridbusy, pmap_ridmax / 8);
347	pmap_ridbusy[0] |= 0xff;
348	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
349
350	/*
351	 * Allocate some memory for initial kernel 'page tables'.
352	 */
353	kptdir = (struct ia64_lpte **) pmap_steal_memory(PAGE_SIZE);
354	for (i = 0; i < NKPT; i++) {
355		kptdir[i] = (struct ia64_lpte *) pmap_steal_memory(PAGE_SIZE);
356	}
357	nkpt = NKPT;
358
359	avail_start = phys_avail[0];
360	for (i = 0; phys_avail[i+2]; i+= 2) ;
361	avail_end = phys_avail[i+1];
362	count = i+2;
363
364	/*
365	 * Figure out a useful size for the VHPT, based on the size of
366	 * physical memory and try to locate a region which is large
367	 * enough to contain the VHPT (which must be a power of two in
368	 * size and aligned to a natural boundary).
369	 */
370	vhpt_size = 15;
371	while ((1<<vhpt_size) < ia64_btop(avail_end - avail_start) * 32)
372		vhpt_size++;
373
374	vhpt_base = 0;
375	while (!vhpt_base) {
376		vm_offset_t mask;
377		if (bootverbose)
378			printf("Trying VHPT size 0x%lx\n", (1L<<vhpt_size));
379		mask = (1L << vhpt_size) - 1;
380		for (i = 0; i < count; i += 2) {
381			vm_offset_t base, limit;
382			base = (phys_avail[i] + mask) & ~mask;
383			limit = base + (1L << vhpt_size);
384			if (limit <= phys_avail[i+1])
385				/*
386				 * VHPT can fit in this region
387				 */
388				break;
389		}
390		if (!phys_avail[i]) {
391			/*
392			 * Can't fit, try next smaller size.
393			 */
394			vhpt_size--;
395		} else {
396			vhpt_base = (phys_avail[i] + mask) & ~mask;
397		}
398	}
399	if (vhpt_size < 15)
400		panic("Can't find space for VHPT");
401
402	if (bootverbose)
403		printf("Putting VHPT at %p\n", (void *) vhpt_base);
404	if (vhpt_base != phys_avail[i]) {
405		/*
406		 * Split this region.
407		 */
408		if (bootverbose)
409			printf("Splitting [%p-%p]\n",
410			       (void *) phys_avail[i],
411			       (void *) phys_avail[i+1]);
412		for (j = count; j > i; j -= 2) {
413			phys_avail[j] = phys_avail[j-2];
414			phys_avail[j+1] = phys_avail[j-2+1];
415		}
416		phys_avail[count+2] = 0;
417		phys_avail[count+3] = 0;
418		phys_avail[i+1] = vhpt_base;
419		phys_avail[i+2] = vhpt_base + (1L << vhpt_size);
420	} else {
421		phys_avail[i] = vhpt_base + (1L << vhpt_size);
422	}
423
424	vhpt_base = IA64_PHYS_TO_RR7(vhpt_base);
425	bzero((void *) vhpt_base, (1L << vhpt_size));
426	__asm __volatile("mov cr.pta=%0;; srlz.i;;"
427			 :: "r" (vhpt_base + (1<<8) + (vhpt_size<<2) + 1));
428
429	virtual_avail = IA64_RR_BASE(5);
430	virtual_end = IA64_RR_BASE(6)-1;
431
432	/*
433	 * Initialize protection array.
434	 */
435	ia64_protection_init();
436
437	/*
438	 * The kernel's pmap is statically allocated so we don't have to use
439	 * pmap_create, which is unlikely to work correctly at this part of
440	 * the boot sequence (XXX and which no longer exists).
441	 */
442	kernel_pmap = &kernel_pmap_store;
443	for (i = 0; i < 5; i++)
444		kernel_pmap->pm_rid[i] = 0;
445	kernel_pmap->pm_count = 1;
446	kernel_pmap->pm_active = 1;
447	TAILQ_INIT(&kernel_pmap->pm_pvlist);
448	PCPU_SET(current_pmap, kernel_pmap);
449
450	/*
451	 * Region 5 is mapped via the vhpt.
452	 */
453	ia64_set_rr(IA64_RR_BASE(5),
454		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
455
456	/*
457	 * Region 6 is direct mapped UC and region 7 is direct mapped
458	 * WC. The details of this is controlled by the Alt {I,D}TLB
459	 * handlers. Here we just make sure that they have the largest
460	 * possible page size to minimise TLB usage.
461	 */
462	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (28 << 2));
463	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (28 << 2));
464
465	/*
466	 * Set up proc0's PCB.
467	 */
468#if 0
469	thread0.td_pcb->pcb_hw.apcb_asn = 0;
470#endif
471
472	/*
473	 * Reserve some memory for allocating pvs while bootstrapping
474	 * the pv allocator. We need to have enough to cover mapping
475	 * the kmem_alloc region used to allocate the initial_pvs in
476	 * pmap_init. In general, the size of this region is
477	 * approximately (# physical pages) * (size of pv entry).
478	 */
479	pvbootmax = ((physmem * sizeof(struct pv_entry)) >> PAGE_SHIFT) + 128;
480	pvbootentries = (struct pv_entry *)
481		pmap_steal_memory(pvbootmax * sizeof(struct pv_entry));
482	pvbootnext = 0;
483
484	/*
485	 * Clear out any random TLB entries left over from booting.
486	 */
487	pmap_invalidate_all(kernel_pmap);
488}
489
490static void *
491pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
492{
493	*flags = UMA_SLAB_PRIV;
494	return (void *)kmem_alloc(kernel_map, bytes);
495}
496
497/*
498 *	Initialize the pmap module.
499 *	Called by vm_init, to initialize any structures that the pmap
500 *	system needs to map virtual memory.
501 *	pmap_init has been enhanced to support in a fairly consistant
502 *	way, discontiguous physical memory.
503 */
504void
505pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
506{
507	int i;
508	int initial_pvs;
509
510	/*
511	 * Allocate memory for random pmap data structures.  Includes the
512	 * pv_head_table.
513	 */
514
515	for(i = 0; i < vm_page_array_size; i++) {
516		vm_page_t m;
517
518		m = &vm_page_array[i];
519		TAILQ_INIT(&m->md.pv_list);
520		m->md.pv_list_count = 0;
521 	}
522
523	/*
524	 * Init the pv free list and the PTE free list.
525	 */
526	initial_pvs = vm_page_array_size;
527	if (initial_pvs < MINPV)
528		initial_pvs = MINPV;
529	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry),
530	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
531	uma_zone_set_allocf(pvzone, pmap_allocf);
532	uma_prealloc(pvzone, initial_pvs);
533
534	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
535	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
536	uma_zone_set_allocf(ptezone, pmap_allocf);
537	uma_prealloc(ptezone, initial_pvs);
538
539	/*
540	 * Create the object for the kernel's page tables.
541	 */
542	kptobj = vm_object_allocate(OBJT_DEFAULT, MAXKPT);
543
544	/*
545	 * Now it is safe to enable pv_table recording.
546	 */
547	pmap_initialized = TRUE;
548}
549
550/*
551 * Initialize the address space (zone) for the pv_entries.  Set a
552 * high water mark so that the system can recover from excessive
553 * numbers of pv entries.
554 */
555void
556pmap_init2()
557{
558	int shpgperproc = PMAP_SHPGPERPROC;
559
560	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
561	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
562	pv_entry_high_water = 9 * (pv_entry_max / 10);
563	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
564	uma_zone_set_obj(ptezone, &ptezone_obj, pv_entry_max);
565}
566
567
568/***************************************************
569 * Manipulate TLBs for a pmap
570 ***************************************************/
571
572static void
573pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
574{
575	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
576		("invalidating TLB for non-current pmap"));
577	ia64_ptc_g(va, PAGE_SHIFT << 2);
578}
579
580static void
581pmap_invalidate_all_1(void *arg)
582{
583	u_int64_t addr;
584	int i, j;
585	register_t psr;
586
587	psr = intr_disable();
588	addr = pmap_ptc_e_base;
589	for (i = 0; i < pmap_ptc_e_count1; i++) {
590		for (j = 0; j < pmap_ptc_e_count2; j++) {
591			ia64_ptc_e(addr);
592			addr += pmap_ptc_e_stride2;
593		}
594		addr += pmap_ptc_e_stride1;
595	}
596	intr_restore(psr);
597}
598
599static void
600pmap_invalidate_all(pmap_t pmap)
601{
602	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
603		("invalidating TLB for non-current pmap"));
604
605
606#ifdef SMP
607	smp_rendezvous(0, pmap_invalidate_all_1, 0, 0);
608#else
609	pmap_invalidate_all_1(0);
610#endif
611}
612
613static u_int32_t
614pmap_allocate_rid(void)
615{
616	int rid;
617
618	if (pmap_ridcount == pmap_ridmax)
619		panic("pmap_allocate_rid: All Region IDs used");
620
621	do {
622		rid = arc4random() & (pmap_ridmax - 1);
623	} while (pmap_ridbusy[rid / 64] & (1L << (rid & 63)));
624	pmap_ridbusy[rid / 64] |= (1L << (rid & 63));
625	pmap_ridcount++;
626
627	return rid;
628}
629
630static void
631pmap_free_rid(u_int32_t rid)
632{
633	mtx_lock(&pmap_ridmutex);
634	pmap_ridbusy[rid / 64] &= ~(1L << (rid & 63));
635	pmap_ridcount--;
636	mtx_unlock(&pmap_ridmutex);
637}
638
639static void
640pmap_ensure_rid(pmap_t pmap, vm_offset_t va)
641{
642	int rr;
643
644	rr = va >> 61;
645	if (pmap->pm_rid[rr])
646		return;
647
648	mtx_lock(&pmap_ridmutex);
649	pmap->pm_rid[rr] = pmap_allocate_rid();
650	if (pmap == PCPU_GET(current_pmap))
651		ia64_set_rr(IA64_RR_BASE(rr),
652			    (pmap->pm_rid[rr] << 8)|(PAGE_SHIFT << 2)|1);
653	mtx_unlock(&pmap_ridmutex);
654}
655
656/***************************************************
657 * Low level helper routines.....
658 ***************************************************/
659
660/*
661 * Install a pte into the VHPT
662 */
663static PMAP_INLINE void
664pmap_install_pte(struct ia64_lpte *vhpte, struct ia64_lpte *pte)
665{
666	u_int64_t *vhp, *p;
667
668	/* invalidate the pte */
669	atomic_set_64(&vhpte->pte_tag, 1L << 63);
670	ia64_mf();			/* make sure everyone sees */
671
672	vhp = (u_int64_t *) vhpte;
673	p = (u_int64_t *) pte;
674
675	vhp[0] = p[0];
676	vhp[1] = p[1];
677	vhp[2] = p[2];			/* sets ti to one */
678
679	ia64_mf();
680}
681
682/*
683 * Compare essential parts of pte.
684 */
685static PMAP_INLINE int
686pmap_equal_pte(struct ia64_lpte *pte1, struct ia64_lpte *pte2)
687{
688	return *(u_int64_t *) pte1 == *(u_int64_t *) pte2;
689}
690
691/*
692 * this routine defines the region(s) of memory that should
693 * not be tested for the modified bit.
694 */
695static PMAP_INLINE int
696pmap_track_modified(vm_offset_t va)
697{
698	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
699		return 1;
700	else
701		return 0;
702}
703
704/*
705 * Create the U area for a new process.
706 * This routine directly affects the fork perf for a process.
707 */
708void
709pmap_new_proc(struct proc *p)
710{
711	struct user *up;
712
713	/*
714	 * Use contigmalloc for user area so that we can use a region
715	 * 7 address for it which makes it impossible to accidentally
716	 * lose when recording a trapframe.
717	 */
718	up = contigmalloc(UAREA_PAGES * PAGE_SIZE, M_PMAP,
719			  M_WAITOK,
720			  0ul,
721			  256*1024*1024 - 1,
722			  PAGE_SIZE,
723			  256*1024*1024);
724
725	p->p_md.md_uservirt = up;
726	p->p_uarea = (struct user *)
727		IA64_PHYS_TO_RR7(ia64_tpa((u_int64_t) up));
728}
729
730/*
731 * Dispose the U area for a process that has exited.
732 * This routine directly impacts the exit perf of a process.
733 */
734void
735pmap_dispose_proc(struct proc *p)
736{
737	contigfree(p->p_md.md_uservirt, UAREA_PAGES * PAGE_SIZE, M_PMAP);
738	p->p_md.md_uservirt = 0;
739	p->p_uarea = 0;
740}
741
742/*
743 * Allow the U area for a process to be prejudicially paged out.
744 */
745void
746pmap_swapout_proc(struct proc *p)
747{
748}
749
750/*
751 * Bring the U area for a specified process back in.
752 */
753void
754pmap_swapin_proc(struct proc *p)
755{
756}
757
758/*
759 * Create the KSTACK for a new thread.
760 * This routine directly affects the fork perf for a process/thread.
761 */
762void
763pmap_new_thread(struct thread *td)
764{
765	vm_offset_t *ks;
766
767	/*
768	 * Use contigmalloc for user area so that we can use a region
769	 * 7 address for it which makes it impossible to accidentally
770	 * lose when recording a trapframe.
771	 */
772	ks = contigmalloc(KSTACK_PAGES * PAGE_SIZE, M_PMAP,
773			  M_WAITOK,
774			  0ul,
775			  256*1024*1024 - 1,
776			  PAGE_SIZE,
777			  256*1024*1024);
778
779	td->td_md.md_kstackvirt = ks;
780	td->td_kstack = IA64_PHYS_TO_RR7(ia64_tpa((u_int64_t)ks));
781}
782
783/*
784 * Dispose the KSTACK for a thread that has exited.
785 * This routine directly impacts the exit perf of a process/thread.
786 */
787void
788pmap_dispose_thread(struct thread *td)
789{
790	contigfree(td->td_md.md_kstackvirt, KSTACK_PAGES * PAGE_SIZE, M_PMAP);
791	td->td_md.md_kstackvirt = 0;
792	td->td_kstack = 0;
793}
794
795/*
796 * Allow the KSTACK for a thread to be prejudicially paged out.
797 */
798void
799pmap_swapout_thread(struct thread *td)
800{
801}
802
803/*
804 * Bring the KSTACK for a specified thread back in.
805 */
806void
807pmap_swapin_thread(struct thread *td)
808{
809}
810
811/***************************************************
812 * Page table page management routines.....
813 ***************************************************/
814
815void
816pmap_pinit0(struct pmap *pmap)
817{
818	int i;
819
820	/*
821	 * kernel_pmap is the same as any other pmap.
822	 */
823	pmap_pinit(pmap);
824	pmap->pm_flags = 0;
825	for (i = 0; i < 5; i++)
826		pmap->pm_rid[i] = 0;
827	pmap->pm_count = 1;
828	pmap->pm_ptphint = NULL;
829	pmap->pm_active = 0;
830	TAILQ_INIT(&pmap->pm_pvlist);
831	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
832}
833
834/*
835 * Initialize a preallocated and zeroed pmap structure,
836 * such as one in a vmspace structure.
837 */
838void
839pmap_pinit(struct pmap *pmap)
840{
841	int i;
842
843	pmap->pm_flags = 0;
844	for (i = 0; i < 5; i++)
845		pmap->pm_rid[i] = 0;
846	pmap->pm_count = 1;
847	pmap->pm_ptphint = NULL;
848	pmap->pm_active = 0;
849	TAILQ_INIT(&pmap->pm_pvlist);
850	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
851}
852
853/*
854 * Wire in kernel global address entries.  To avoid a race condition
855 * between pmap initialization and pmap_growkernel, this procedure
856 * should be called after the vmspace is attached to the process
857 * but before this pmap is activated.
858 */
859void
860pmap_pinit2(struct pmap *pmap)
861{
862}
863
864/***************************************************
865* Pmap allocation/deallocation routines.
866 ***************************************************/
867
868/*
869 * Release any resources held by the given physical map.
870 * Called when a pmap initialized by pmap_pinit is being released.
871 * Should only be called if the map contains no valid mappings.
872 */
873void
874pmap_release(pmap_t pmap)
875{
876	int i;
877
878	for (i = 0; i < 5; i++)
879		if (pmap->pm_rid[i])
880			pmap_free_rid(pmap->pm_rid[i]);
881}
882
883/*
884 * grow the number of kernel page table entries, if needed
885 */
886void
887pmap_growkernel(vm_offset_t addr)
888{
889	struct ia64_lpte *ptepage;
890	vm_page_t nkpg;
891
892	if (kernel_vm_end == 0) {
893		kernel_vm_end = nkpt * PAGE_SIZE * NKPTEPG
894			+ IA64_RR_BASE(5);
895	}
896	addr = (addr + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
897	while (kernel_vm_end < addr) {
898		if (kptdir[KPTE_DIR_INDEX(kernel_vm_end)]) {
899			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG)
900				& ~(PAGE_SIZE * NKPTEPG - 1);
901			continue;
902		}
903
904		/*
905		 * We could handle more by increasing the size of kptdir.
906		 */
907		if (nkpt == MAXKPT)
908			panic("pmap_growkernel: out of kernel address space");
909
910		/*
911		 * This index is bogus, but out of the way
912		 */
913		nkpg = vm_page_alloc(kptobj, nkpt, VM_ALLOC_SYSTEM);
914		if (!nkpg)
915			panic("pmap_growkernel: no memory to grow kernel");
916
917		nkpt++;
918
919		vm_page_wire(nkpg);
920		ptepage = (struct ia64_lpte *)
921			IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
922		bzero(ptepage, PAGE_SIZE);
923		kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage;
924
925		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
926	}
927}
928
929/*
930 *	Retire the given physical map from service.
931 *	Should only be called if the map contains
932 *	no valid mappings.
933 */
934void
935pmap_destroy(pmap_t pmap)
936{
937	int count;
938
939	if (pmap == NULL)
940		return;
941
942	count = --pmap->pm_count;
943	if (count == 0) {
944		pmap_release(pmap);
945		panic("destroying a pmap is not yet implemented");
946	}
947}
948
949/*
950 *	Add a reference to the specified pmap.
951 */
952void
953pmap_reference(pmap_t pmap)
954{
955	if (pmap != NULL) {
956		pmap->pm_count++;
957	}
958}
959
960/***************************************************
961* page management routines.
962 ***************************************************/
963
964/*
965 * free the pv_entry back to the free list
966 */
967static PMAP_INLINE void
968free_pv_entry(pv_entry_t pv)
969{
970	pv_entry_count--;
971	uma_zfree(pvzone, pv);
972}
973
974/*
975 * get a new pv_entry, allocating a block from the system
976 * when needed.
977 * the memory allocation is performed bypassing the malloc code
978 * because of the possibility of allocations at interrupt time.
979 */
980static pv_entry_t
981get_pv_entry(void)
982{
983	pv_entry_count++;
984	if (pv_entry_high_water &&
985		(pv_entry_count > pv_entry_high_water) &&
986		(pmap_pagedaemon_waken == 0)) {
987		pmap_pagedaemon_waken = 1;
988		wakeup (&vm_pages_needed);
989	}
990	return uma_zalloc(pvzone, M_WAITOK);
991}
992
993/*
994 * Add an ia64_lpte to the VHPT.
995 */
996static void
997pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
998{
999	struct ia64_lpte *vhpte;
1000
1001	pmap_vhpt_inserts++;
1002	pmap_vhpt_resident++;
1003
1004	vhpte = (struct ia64_lpte *) ia64_thash(va);
1005
1006	if (vhpte->pte_chain)
1007		pmap_vhpt_collisions++;
1008
1009	pte->pte_chain = vhpte->pte_chain;
1010	vhpte->pte_chain = ia64_tpa((vm_offset_t) pte);
1011
1012	if (!vhpte->pte_p && pte->pte_p)
1013		pmap_install_pte(vhpte, pte);
1014	else
1015		ia64_mf();
1016}
1017
1018/*
1019 * Update VHPT after a pte has changed.
1020 */
1021static void
1022pmap_update_vhpt(struct ia64_lpte *pte, vm_offset_t va)
1023{
1024	struct ia64_lpte *vhpte;
1025
1026	vhpte = (struct ia64_lpte *) ia64_thash(va);
1027
1028	if ((!vhpte->pte_p || vhpte->pte_tag == pte->pte_tag)
1029	    && pte->pte_p)
1030		pmap_install_pte(vhpte, pte);
1031}
1032
1033/*
1034 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
1035 * worked or an appropriate error code otherwise.
1036 */
1037static int
1038pmap_remove_vhpt(vm_offset_t va)
1039{
1040	struct ia64_lpte *pte;
1041	struct ia64_lpte *lpte;
1042	struct ia64_lpte *vhpte;
1043	u_int64_t tag;
1044	int error = ENOENT;
1045
1046	vhpte = (struct ia64_lpte *) ia64_thash(va);
1047
1048	/*
1049	 * If the VHPTE is invalid, there can't be a collision chain.
1050	 */
1051	if (!vhpte->pte_p) {
1052		KASSERT(!vhpte->pte_chain, ("bad vhpte"));
1053		printf("can't remove vhpt entry for 0x%lx\n", va);
1054		goto done;
1055	}
1056
1057	lpte = vhpte;
1058	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(vhpte->pte_chain);
1059	tag = ia64_ttag(va);
1060
1061	while (pte->pte_tag != tag) {
1062		lpte = pte;
1063		if (pte->pte_chain)
1064			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1065		else {
1066			printf("can't remove vhpt entry for 0x%lx\n", va);
1067			goto done;
1068		}
1069	}
1070
1071	/*
1072	 * Snip this pv_entry out of the collision chain.
1073	 */
1074	lpte->pte_chain = pte->pte_chain;
1075
1076	/*
1077	 * If the VHPTE matches as well, change it to map the first
1078	 * element from the chain if there is one.
1079	 */
1080	if (vhpte->pte_tag == tag) {
1081		if (vhpte->pte_chain) {
1082			pte = (struct ia64_lpte *)
1083				IA64_PHYS_TO_RR7(vhpte->pte_chain);
1084			pmap_install_pte(vhpte, pte);
1085		} else {
1086			vhpte->pte_p = 0;
1087			ia64_mf();
1088		}
1089	}
1090
1091	pmap_vhpt_resident--;
1092	error = 0;
1093 done:
1094	return error;
1095}
1096
1097/*
1098 * Find the ia64_lpte for the given va, if any.
1099 */
1100static struct ia64_lpte *
1101pmap_find_vhpt(vm_offset_t va)
1102{
1103	struct ia64_lpte *pte;
1104	u_int64_t tag;
1105
1106	pte = (struct ia64_lpte *) ia64_thash(va);
1107	if (!pte->pte_chain) {
1108		pte = 0;
1109		goto done;
1110	}
1111
1112	tag = ia64_ttag(va);
1113	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1114
1115	while (pte->pte_tag != tag) {
1116		if (pte->pte_chain) {
1117			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1118		} else {
1119			pte = 0;
1120			break;
1121		}
1122	}
1123
1124 done:
1125	return pte;
1126}
1127
1128/*
1129 * Remove an entry from the list of managed mappings.
1130 */
1131static int
1132pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1133{
1134	if (!pv) {
1135		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1136			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1137				if (pmap == pv->pv_pmap && va == pv->pv_va)
1138					break;
1139			}
1140		} else {
1141			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1142				if (va == pv->pv_va)
1143					break;
1144			}
1145		}
1146	}
1147
1148	if (pv) {
1149		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1150		m->md.pv_list_count--;
1151		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1152			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
1153
1154		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1155		free_pv_entry(pv);
1156		return 0;
1157	} else {
1158		return ENOENT;
1159	}
1160}
1161
1162/*
1163 * Create a pv entry for page at pa for
1164 * (pmap, va).
1165 */
1166static void
1167pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1168{
1169	pv_entry_t pv;
1170
1171	pv = get_pv_entry();
1172	pv->pv_pmap = pmap;
1173	pv->pv_va = va;
1174
1175	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1176	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1177	m->md.pv_list_count++;
1178}
1179
1180/*
1181 *	Routine:	pmap_extract
1182 *	Function:
1183 *		Extract the physical page address associated
1184 *		with the given map/virtual_address pair.
1185 */
1186vm_offset_t
1187pmap_extract(pmap, va)
1188	register pmap_t pmap;
1189	vm_offset_t va;
1190{
1191	pmap_t oldpmap;
1192	vm_offset_t pa;
1193
1194	oldpmap = pmap_install(pmap);
1195	pa = ia64_tpa(va);
1196	pmap_install(oldpmap);
1197	return pa;
1198}
1199
1200/***************************************************
1201 * Low level mapping routines.....
1202 ***************************************************/
1203
1204/*
1205 * Find the kernel lpte for mapping the given virtual address, which
1206 * must be in the part of region 5 which we can cover with our kernel
1207 * 'page tables'.
1208 */
1209static struct ia64_lpte *
1210pmap_find_kpte(vm_offset_t va)
1211{
1212	KASSERT((va >> 61) == 5,
1213		("kernel mapping 0x%lx not in region 5", va));
1214	KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG),
1215		("kernel mapping 0x%lx out of range", va));
1216	return &kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)];
1217}
1218
1219/*
1220 * Find a pte suitable for mapping a user-space address. If one exists
1221 * in the VHPT, that one will be returned, otherwise a new pte is
1222 * allocated.
1223 */
1224static struct ia64_lpte *
1225pmap_find_pte(vm_offset_t va)
1226{
1227	struct ia64_lpte *pte;
1228
1229	if (va >= VM_MAXUSER_ADDRESS)
1230		return pmap_find_kpte(va);
1231
1232	pte = pmap_find_vhpt(va);
1233	if (!pte) {
1234		pte = uma_zalloc(ptezone, M_WAITOK);
1235		pte->pte_p = 0;
1236	}
1237	return pte;
1238}
1239
1240/*
1241 * Free a pte which is now unused. This simply returns it to the zone
1242 * allocator if it is a user mapping. For kernel mappings, clear the
1243 * valid bit to make it clear that the mapping is not currently used.
1244 */
1245static void
1246pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1247{
1248	if (va < VM_MAXUSER_ADDRESS)
1249		uma_zfree(ptezone, pte);
1250	else
1251		pte->pte_p = 0;
1252}
1253
1254/*
1255 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1256 * the pte was orginally valid, then its assumed to already be in the
1257 * VHPT.
1258 */
1259static void
1260pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1261	     int ig, int pl, int ar)
1262{
1263	int wasvalid = pte->pte_p;
1264
1265	pte->pte_p = 1;
1266	pte->pte_ma = PTE_MA_WB;
1267	if (ig & PTE_IG_MANAGED) {
1268		pte->pte_a = 0;
1269		pte->pte_d = 0;
1270	} else {
1271		pte->pte_a = 1;
1272		pte->pte_d = 1;
1273	}
1274	pte->pte_pl = pl;
1275	pte->pte_ar = ar;
1276	pte->pte_ppn = pa >> 12;
1277	pte->pte_ed = 0;
1278	pte->pte_ig = ig;
1279
1280	pte->pte_ps = PAGE_SHIFT;
1281	pte->pte_key = 0;
1282
1283	pte->pte_tag = ia64_ttag(va);
1284
1285	if (wasvalid) {
1286		pmap_update_vhpt(pte, va);
1287	} else {
1288		pmap_enter_vhpt(pte, va);
1289	}
1290}
1291
1292/*
1293 * If a pte contains a valid mapping, clear it and update the VHPT.
1294 */
1295static void
1296pmap_clear_pte(struct ia64_lpte *pte, vm_offset_t va)
1297{
1298	if (pte->pte_p) {
1299		pmap_remove_vhpt(va);
1300		ia64_ptc_g(va, PAGE_SHIFT << 2);
1301		pte->pte_p = 0;
1302	}
1303}
1304
1305/*
1306 * Remove the (possibly managed) mapping represented by pte from the
1307 * given pmap.
1308 */
1309static int
1310pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1311		pv_entry_t pv, int freepte)
1312{
1313	int error;
1314	vm_page_t m;
1315
1316	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1317		("removing pte for non-current pmap"));
1318
1319	/*
1320	 * First remove from the VHPT.
1321	 */
1322	error = pmap_remove_vhpt(va);
1323	if (error)
1324		return error;
1325
1326	/*
1327	 * Make sure pmap_set_pte() knows it isn't in the VHPT.
1328	 */
1329	pte->pte_p = 0;
1330
1331	if (pte->pte_ig & PTE_IG_WIRED)
1332		pmap->pm_stats.wired_count -= 1;
1333
1334	pmap->pm_stats.resident_count -= 1;
1335	if (pte->pte_ig & PTE_IG_MANAGED) {
1336		m = PHYS_TO_VM_PAGE(pmap_pte_pa(pte));
1337		if (pte->pte_d)
1338			if (pmap_track_modified(va))
1339				vm_page_dirty(m);
1340		if (pte->pte_a)
1341			vm_page_flag_set(m, PG_REFERENCED);
1342
1343		if (freepte)
1344			pmap_free_pte(pte, va);
1345		return pmap_remove_entry(pmap, m, va, pv);
1346	} else {
1347		if (freepte)
1348			pmap_free_pte(pte, va);
1349		return 0;
1350	}
1351}
1352
1353/*
1354 * Add a list of wired pages to the kva
1355 * this routine is only used for temporary
1356 * kernel mappings that do not need to have
1357 * page modification or references recorded.
1358 * Note that old mappings are simply written
1359 * over.  The page *must* be wired.
1360 */
1361void
1362pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1363{
1364	int i;
1365	struct ia64_lpte *pte;
1366
1367	for (i = 0; i < count; i++) {
1368		vm_offset_t tva = va + i * PAGE_SIZE;
1369		int wasvalid;
1370		pte = pmap_find_kpte(tva);
1371		wasvalid = pte->pte_p;
1372		pmap_set_pte(pte, tva, VM_PAGE_TO_PHYS(m[i]),
1373			     0, PTE_PL_KERN, PTE_AR_RWX);
1374		if (wasvalid)
1375			ia64_ptc_g(tva, PAGE_SHIFT << 2);
1376	}
1377}
1378
1379/*
1380 * this routine jerks page mappings from the
1381 * kernel -- it is meant only for temporary mappings.
1382 */
1383void
1384pmap_qremove(vm_offset_t va, int count)
1385{
1386	int i;
1387	struct ia64_lpte *pte;
1388
1389	for (i = 0; i < count; i++) {
1390		pte = pmap_find_kpte(va);
1391		pmap_clear_pte(pte, va);
1392		va += PAGE_SIZE;
1393	}
1394}
1395
1396/*
1397 * Add a wired page to the kva.
1398 */
1399void
1400pmap_kenter(vm_offset_t va, vm_offset_t pa)
1401{
1402	struct ia64_lpte *pte;
1403	int wasvalid;
1404
1405	pte = pmap_find_kpte(va);
1406	wasvalid = pte->pte_p;
1407	pmap_set_pte(pte, va, pa, 0, PTE_PL_KERN, PTE_AR_RWX);
1408	if (wasvalid)
1409		ia64_ptc_g(va, PAGE_SHIFT << 2);
1410}
1411
1412/*
1413 * Remove a page from the kva
1414 */
1415void
1416pmap_kremove(vm_offset_t va)
1417{
1418	struct ia64_lpte *pte;
1419
1420	pte = pmap_find_kpte(va);
1421	pmap_clear_pte(pte, va);
1422}
1423
1424/*
1425 *	Used to map a range of physical addresses into kernel
1426 *	virtual address space.
1427 *
1428 *	The value passed in '*virt' is a suggested virtual address for
1429 *	the mapping. Architectures which can support a direct-mapped
1430 *	physical to virtual region can return the appropriate address
1431 *	within that region, leaving '*virt' unchanged. Other
1432 *	architectures should map the pages starting at '*virt' and
1433 *	update '*virt' with the first usable address after the mapped
1434 *	region.
1435 */
1436vm_offset_t
1437pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1438{
1439	return IA64_PHYS_TO_RR7(start);
1440}
1441
1442/*
1443 * This routine is very drastic, but can save the system
1444 * in a pinch.
1445 */
1446void
1447pmap_collect()
1448{
1449	int i;
1450	vm_page_t m;
1451	static int warningdone = 0;
1452
1453	if (pmap_pagedaemon_waken == 0)
1454		return;
1455
1456	if (warningdone < 5) {
1457		printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
1458		warningdone++;
1459	}
1460
1461	for(i = 0; i < vm_page_array_size; i++) {
1462		m = &vm_page_array[i];
1463		if (m->wire_count || m->hold_count || m->busy ||
1464		    (m->flags & (PG_BUSY | PG_UNMANAGED)))
1465			continue;
1466		pmap_remove_all(m);
1467	}
1468	pmap_pagedaemon_waken = 0;
1469}
1470
1471/*
1472 * Remove a single page from a process address space
1473 */
1474static void
1475pmap_remove_page(pmap_t pmap, vm_offset_t va)
1476{
1477	struct ia64_lpte *pte;
1478
1479	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1480		("removing page for non-current pmap"));
1481
1482	pte = pmap_find_vhpt(va);
1483	if (pte) {
1484		pmap_remove_pte(pmap, pte, va, 0, 1);
1485		pmap_invalidate_page(pmap, va);
1486	}
1487	return;
1488}
1489
1490/*
1491 *	Remove the given range of addresses from the specified map.
1492 *
1493 *	It is assumed that the start and end are properly
1494 *	rounded to the page size.
1495 */
1496void
1497pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1498{
1499	pmap_t oldpmap;
1500	vm_offset_t va;
1501	pv_entry_t pv;
1502	struct ia64_lpte *pte;
1503
1504	if (pmap == NULL)
1505		return;
1506
1507	if (pmap->pm_stats.resident_count == 0)
1508		return;
1509
1510	oldpmap = pmap_install(pmap);
1511
1512	/*
1513	 * special handling of removing one page.  a very
1514	 * common operation and easy to short circuit some
1515	 * code.
1516	 */
1517	if (sva + PAGE_SIZE == eva) {
1518		pmap_remove_page(pmap, sva);
1519		pmap_install(oldpmap);
1520		return;
1521	}
1522
1523	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1524		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1525			va = pv->pv_va;
1526			if (va >= sva && va < eva) {
1527				pte = pmap_find_vhpt(va);
1528				pmap_remove_pte(pmap, pte, va, pv, 1);
1529				pmap_invalidate_page(pmap, va);
1530			}
1531		}
1532
1533	} else {
1534		for (va = sva; va < eva; va = va += PAGE_SIZE) {
1535			pte = pmap_find_vhpt(va);
1536			if (pte) {
1537				pmap_remove_pte(pmap, pte, va, 0, 1);
1538				pmap_invalidate_page(pmap, va);
1539			}
1540		}
1541	}
1542
1543	pmap_install(oldpmap);
1544}
1545
1546/*
1547 *	Routine:	pmap_remove_all
1548 *	Function:
1549 *		Removes this physical page from
1550 *		all physical maps in which it resides.
1551 *		Reflects back modify bits to the pager.
1552 *
1553 *	Notes:
1554 *		Original versions of this routine were very
1555 *		inefficient because they iteratively called
1556 *		pmap_remove (slow...)
1557 */
1558
1559static void
1560pmap_remove_all(vm_page_t m)
1561{
1562	pmap_t oldpmap;
1563	pv_entry_t pv;
1564	int nmodify;
1565	int s;
1566
1567	nmodify = 0;
1568#if defined(PMAP_DIAGNOSTIC)
1569	/*
1570	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1571	 * pages!
1572	 */
1573	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1574		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1575	}
1576#endif
1577
1578	s = splvm();
1579
1580	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1581		struct ia64_lpte *pte;
1582		pmap_t pmap = pv->pv_pmap;
1583		vm_offset_t va = pv->pv_va;
1584
1585		oldpmap = pmap_install(pmap);
1586		pte = pmap_find_vhpt(va);
1587		if (pmap_pte_pa(pte) != VM_PAGE_TO_PHYS(m))
1588			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1589		pmap_remove_pte(pmap, pte, va, pv, 1);
1590		pmap_invalidate_page(pmap, va);
1591		pmap_install(oldpmap);
1592	}
1593
1594	vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
1595
1596	splx(s);
1597	return;
1598}
1599
1600/*
1601 *	Set the physical protection on the
1602 *	specified range of this map as requested.
1603 */
1604void
1605pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1606{
1607	pmap_t oldpmap;
1608	struct ia64_lpte *pte;
1609	int newprot;
1610
1611	if (pmap == NULL)
1612		return;
1613
1614	oldpmap = pmap_install(pmap);
1615
1616	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1617		pmap_remove(pmap, sva, eva);
1618		pmap_install(oldpmap);
1619		return;
1620	}
1621
1622	if (prot & VM_PROT_WRITE) {
1623		pmap_install(oldpmap);
1624		return;
1625	}
1626
1627	newprot = pte_prot(pmap, prot);
1628
1629	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1630		panic("pmap_protect: unaligned addresses");
1631
1632	while (sva < eva) {
1633		/*
1634		 * If page is invalid, skip this page
1635		 */
1636		pte = pmap_find_vhpt(sva);
1637		if (!pte) {
1638			sva += PAGE_SIZE;
1639			continue;
1640		}
1641
1642		if (pmap_pte_prot(pte) != newprot) {
1643			if (pte->pte_ig & PTE_IG_MANAGED) {
1644				vm_offset_t pa = pmap_pte_pa(pte);
1645				vm_page_t m = PHYS_TO_VM_PAGE(pa);
1646				if (pte->pte_d) {
1647					if (pmap_track_modified(sva))
1648						vm_page_dirty(m);
1649					pte->pte_d = 0;
1650				}
1651				if (pte->pte_a) {
1652					vm_page_flag_set(m, PG_REFERENCED);
1653					pte->pte_a = 0;
1654				}
1655			}
1656			pmap_pte_set_prot(pte, newprot);
1657			pmap_update_vhpt(pte, sva);
1658			pmap_invalidate_page(pmap, sva);
1659		}
1660
1661		sva += PAGE_SIZE;
1662	}
1663	pmap_install(oldpmap);
1664}
1665
1666/*
1667 *	Insert the given physical page (p) at
1668 *	the specified virtual address (v) in the
1669 *	target physical map with the protection requested.
1670 *
1671 *	If specified, the page will be wired down, meaning
1672 *	that the related pte can not be reclaimed.
1673 *
1674 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1675 *	or lose information.  That is, this routine must actually
1676 *	insert this page into the given map NOW.
1677 */
1678void
1679pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1680	   boolean_t wired)
1681{
1682	pmap_t oldpmap;
1683	vm_offset_t pa;
1684	vm_offset_t opa;
1685	struct ia64_lpte origpte;
1686	struct ia64_lpte *pte;
1687	int managed;
1688
1689	if (pmap == NULL)
1690		return;
1691
1692	pmap_ensure_rid(pmap, va);
1693
1694	oldpmap = pmap_install(pmap);
1695
1696	va &= ~PAGE_MASK;
1697#ifdef PMAP_DIAGNOSTIC
1698	if (va > VM_MAX_KERNEL_ADDRESS)
1699		panic("pmap_enter: toobig");
1700#endif
1701
1702	/*
1703	 * Find (or create) a pte for the given mapping.
1704	 */
1705	pte = pmap_find_pte(va);
1706	origpte = *pte;
1707
1708	if (origpte.pte_p)
1709		opa = pmap_pte_pa(&origpte);
1710	else
1711		opa = 0;
1712	managed = 0;
1713
1714	pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK;
1715
1716	/*
1717	 * Mapping has not changed, must be protection or wiring change.
1718	 */
1719	if (origpte.pte_p && (opa == pa)) {
1720		/*
1721		 * Wiring change, just update stats. We don't worry about
1722		 * wiring PT pages as they remain resident as long as there
1723		 * are valid mappings in them. Hence, if a user page is wired,
1724		 * the PT page will be also.
1725		 */
1726		if (wired && ((origpte.pte_ig & PTE_IG_WIRED) == 0))
1727			pmap->pm_stats.wired_count++;
1728		else if (!wired && (origpte.pte_ig & PTE_IG_WIRED))
1729			pmap->pm_stats.wired_count--;
1730
1731		/*
1732		 * We might be turning off write access to the page,
1733		 * so we go ahead and sense modify status.
1734		 */
1735		if (origpte.pte_ig & PTE_IG_MANAGED) {
1736			if (origpte.pte_d && pmap_track_modified(va)) {
1737				vm_page_t om;
1738				om = PHYS_TO_VM_PAGE(opa);
1739				vm_page_dirty(om);
1740			}
1741		}
1742
1743		managed = origpte.pte_ig & PTE_IG_MANAGED;
1744		goto validate;
1745	}
1746	/*
1747	 * Mapping has changed, invalidate old range and fall
1748	 * through to handle validating new mapping.
1749	 */
1750	if (opa) {
1751		int error;
1752		error = pmap_remove_pte(pmap, pte, va, 0, 0);
1753		if (error)
1754			panic("pmap_enter: pte vanished, va: 0x%lx", va);
1755	}
1756
1757	/*
1758	 * Enter on the PV list if part of our managed memory.
1759	 */
1760	if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
1761		pmap_insert_entry(pmap, va, m);
1762		managed |= PTE_IG_MANAGED;
1763	}
1764
1765	/*
1766	 * Increment counters
1767	 */
1768	pmap->pm_stats.resident_count++;
1769	if (wired)
1770		pmap->pm_stats.wired_count++;
1771
1772validate:
1773
1774	/*
1775	 * Now validate mapping with desired protection/wiring. This
1776	 * adds the pte to the VHPT if necessary.
1777	 */
1778	pmap_set_pte(pte, va, pa, managed | (wired ? PTE_IG_WIRED : 0),
1779		     pte_prot_pl(pmap, prot), pte_prot_ar(pmap, prot));
1780
1781	/*
1782	 * if the mapping or permission bits are different, we need
1783	 * to invalidate the page.
1784	 */
1785	if (!pmap_equal_pte(&origpte, pte))
1786		pmap_invalidate_page(pmap, va);
1787
1788	pmap_install(oldpmap);
1789}
1790
1791/*
1792 * this code makes some *MAJOR* assumptions:
1793 * 1. Current pmap & pmap exists.
1794 * 2. Not wired.
1795 * 3. Read access.
1796 * 4. No page table pages.
1797 * 5. Tlbflush is deferred to calling procedure.
1798 * 6. Page IS managed.
1799 * but is *MUCH* faster than pmap_enter...
1800 */
1801
1802static void
1803pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
1804{
1805	struct ia64_lpte *pte;
1806	pmap_t oldpmap;
1807
1808	pmap_ensure_rid(pmap, va);
1809
1810	oldpmap = pmap_install(pmap);
1811
1812	pte = pmap_find_pte(va);
1813	if (pte->pte_p)
1814		return;
1815
1816	/*
1817	 * Enter on the PV list since its part of our managed memory.
1818	 */
1819	pmap_insert_entry(pmap, va, m);
1820
1821	/*
1822	 * Increment counters
1823	 */
1824	pmap->pm_stats.resident_count++;
1825
1826	/*
1827	 * Initialise PTE with read-only protection and enter into VHPT.
1828	 */
1829	pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m),
1830		     PTE_IG_MANAGED,
1831		     PTE_PL_USER, PTE_AR_R);
1832
1833	pmap_install(oldpmap);
1834}
1835
1836/*
1837 * Make temporary mapping for a physical address. This is called
1838 * during dump.
1839 */
1840void *
1841pmap_kenter_temporary(vm_offset_t pa, int i)
1842{
1843	return (void *) IA64_PHYS_TO_RR7(pa - (i * PAGE_SIZE));
1844}
1845
1846#define MAX_INIT_PT (96)
1847/*
1848 * pmap_object_init_pt preloads the ptes for a given object
1849 * into the specified pmap.  This eliminates the blast of soft
1850 * faults on process startup and immediately after an mmap.
1851 */
1852void
1853pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1854		    vm_object_t object, vm_pindex_t pindex,
1855		    vm_size_t size, int limit)
1856{
1857	pmap_t oldpmap;
1858	vm_offset_t tmpidx;
1859	int psize;
1860	vm_page_t p;
1861	int objpgs;
1862
1863	if (pmap == NULL || object == NULL)
1864		return;
1865
1866	oldpmap = pmap_install(pmap);
1867
1868	psize = ia64_btop(size);
1869
1870	if ((object->type != OBJT_VNODE) ||
1871		((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
1872			(object->resident_page_count > MAX_INIT_PT))) {
1873		pmap_install(oldpmap);
1874		return;
1875	}
1876
1877	if (psize + pindex > object->size) {
1878		if (object->size < pindex)
1879			return;
1880		psize = object->size - pindex;
1881	}
1882
1883	/*
1884	 * if we are processing a major portion of the object, then scan the
1885	 * entire thing.
1886	 */
1887	if (psize > (object->resident_page_count >> 2)) {
1888		objpgs = psize;
1889
1890		for (p = TAILQ_FIRST(&object->memq);
1891		    ((objpgs > 0) && (p != NULL));
1892		    p = TAILQ_NEXT(p, listq)) {
1893
1894			tmpidx = p->pindex;
1895			if (tmpidx < pindex) {
1896				continue;
1897			}
1898			tmpidx -= pindex;
1899			if (tmpidx >= psize) {
1900				continue;
1901			}
1902			/*
1903			 * don't allow an madvise to blow away our really
1904			 * free pages allocating pv entries.
1905			 */
1906			if ((limit & MAP_PREFAULT_MADVISE) &&
1907			    cnt.v_free_count < cnt.v_free_reserved) {
1908				break;
1909			}
1910			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1911				(p->busy == 0) &&
1912			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1913				if ((p->queue - p->pc) == PQ_CACHE)
1914					vm_page_deactivate(p);
1915				vm_page_busy(p);
1916				pmap_enter_quick(pmap,
1917						 addr + ia64_ptob(tmpidx), p);
1918				vm_page_flag_set(p, PG_MAPPED);
1919				vm_page_wakeup(p);
1920			}
1921			objpgs -= 1;
1922		}
1923	} else {
1924		/*
1925		 * else lookup the pages one-by-one.
1926		 */
1927		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
1928			/*
1929			 * don't allow an madvise to blow away our really
1930			 * free pages allocating pv entries.
1931			 */
1932			if ((limit & MAP_PREFAULT_MADVISE) &&
1933			    cnt.v_free_count < cnt.v_free_reserved) {
1934				break;
1935			}
1936			p = vm_page_lookup(object, tmpidx + pindex);
1937			if (p &&
1938			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1939				(p->busy == 0) &&
1940			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1941				if ((p->queue - p->pc) == PQ_CACHE)
1942					vm_page_deactivate(p);
1943				vm_page_busy(p);
1944				pmap_enter_quick(pmap,
1945						 addr + ia64_ptob(tmpidx), p);
1946				vm_page_flag_set(p, PG_MAPPED);
1947				vm_page_wakeup(p);
1948			}
1949		}
1950	}
1951	pmap_install(oldpmap);
1952	return;
1953}
1954
1955/*
1956 * pmap_prefault provides a quick way of clustering
1957 * pagefaults into a processes address space.  It is a "cousin"
1958 * of pmap_object_init_pt, except it runs at page fault time instead
1959 * of mmap time.
1960 */
1961#define PFBAK 4
1962#define PFFOR 4
1963#define PAGEORDER_SIZE (PFBAK+PFFOR)
1964
1965static int pmap_prefault_pageorder[] = {
1966	-PAGE_SIZE, PAGE_SIZE,
1967	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
1968	-3 * PAGE_SIZE, 3 * PAGE_SIZE
1969	-4 * PAGE_SIZE, 4 * PAGE_SIZE
1970};
1971
1972void
1973pmap_prefault(pmap, addra, entry)
1974	pmap_t pmap;
1975	vm_offset_t addra;
1976	vm_map_entry_t entry;
1977{
1978	int i;
1979	vm_offset_t starta;
1980	vm_offset_t addr;
1981	vm_pindex_t pindex;
1982	vm_page_t m, mpte;
1983	vm_object_t object;
1984
1985	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)))
1986		return;
1987
1988	object = entry->object.vm_object;
1989
1990	starta = addra - PFBAK * PAGE_SIZE;
1991	if (starta < entry->start) {
1992		starta = entry->start;
1993	} else if (starta > addra) {
1994		starta = 0;
1995	}
1996
1997	mpte = NULL;
1998	for (i = 0; i < PAGEORDER_SIZE; i++) {
1999		vm_object_t lobject;
2000		struct ia64_lpte *pte;
2001
2002		addr = addra + pmap_prefault_pageorder[i];
2003		if (addr > addra + (PFFOR * PAGE_SIZE))
2004			addr = 0;
2005
2006		if (addr < starta || addr >= entry->end)
2007			continue;
2008
2009		pte = pmap_find_vhpt(addr);
2010		if (pte && pte->pte_p)
2011			continue;
2012
2013		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
2014		lobject = object;
2015		for (m = vm_page_lookup(lobject, pindex);
2016		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
2017		    lobject = lobject->backing_object) {
2018			if (lobject->backing_object_offset & PAGE_MASK)
2019				break;
2020			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
2021			m = vm_page_lookup(lobject->backing_object, pindex);
2022		}
2023
2024		/*
2025		 * give-up when a page is not in memory
2026		 */
2027		if (m == NULL)
2028			break;
2029
2030		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2031			(m->busy == 0) &&
2032		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2033
2034			if ((m->queue - m->pc) == PQ_CACHE) {
2035				vm_page_deactivate(m);
2036			}
2037			vm_page_busy(m);
2038			pmap_enter_quick(pmap, addr, m);
2039			vm_page_flag_set(m, PG_MAPPED);
2040			vm_page_wakeup(m);
2041		}
2042	}
2043}
2044
2045/*
2046 *	Routine:	pmap_change_wiring
2047 *	Function:	Change the wiring attribute for a map/virtual-address
2048 *			pair.
2049 *	In/out conditions:
2050 *			The mapping must already exist in the pmap.
2051 */
2052void
2053pmap_change_wiring(pmap, va, wired)
2054	register pmap_t pmap;
2055	vm_offset_t va;
2056	boolean_t wired;
2057{
2058	pmap_t oldpmap;
2059	struct ia64_lpte *pte;
2060
2061	if (pmap == NULL)
2062		return;
2063
2064	oldpmap = pmap_install(pmap);
2065
2066	pte = pmap_find_vhpt(va);
2067
2068	if (wired && !pmap_pte_w(pte))
2069		pmap->pm_stats.wired_count++;
2070	else if (!wired && pmap_pte_w(pte))
2071		pmap->pm_stats.wired_count--;
2072
2073	/*
2074	 * Wiring is not a hardware characteristic so there is no need to
2075	 * invalidate TLB.
2076	 */
2077	pmap_pte_set_w(pte, wired);
2078
2079	pmap_install(oldpmap);
2080}
2081
2082
2083
2084/*
2085 *	Copy the range specified by src_addr/len
2086 *	from the source map to the range dst_addr/len
2087 *	in the destination map.
2088 *
2089 *	This routine is only advisory and need not do anything.
2090 */
2091
2092void
2093pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2094	  vm_offset_t src_addr)
2095{
2096}
2097
2098/*
2099 *	Routine:	pmap_kernel
2100 *	Function:
2101 *		Returns the physical map handle for the kernel.
2102 */
2103pmap_t
2104pmap_kernel()
2105{
2106	return (kernel_pmap);
2107}
2108
2109/*
2110 *	pmap_zero_page zeros the specified hardware page by
2111 *	mapping it into virtual memory and using bzero to clear
2112 *	its contents.
2113 */
2114
2115void
2116pmap_zero_page(vm_page_t m)
2117{
2118	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2119	bzero((caddr_t) va, PAGE_SIZE);
2120}
2121
2122
2123/*
2124 *	pmap_zero_page_area zeros the specified hardware page by
2125 *	mapping it into virtual memory and using bzero to clear
2126 *	its contents.
2127 *
2128 *	off and size must reside within a single page.
2129 */
2130
2131void
2132pmap_zero_page_area(vm_page_t m, int off, int size)
2133{
2134	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2135	bzero((char *)(caddr_t)va + off, size);
2136}
2137
2138/*
2139 *	pmap_copy_page copies the specified (machine independent)
2140 *	page by mapping the page into virtual memory and using
2141 *	bcopy to copy the page, one machine dependent page at a
2142 *	time.
2143 */
2144void
2145pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2146{
2147	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
2148	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
2149	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
2150}
2151
2152
2153/*
2154 *	Routine:	pmap_pageable
2155 *	Function:
2156 *		Make the specified pages (by pmap, offset)
2157 *		pageable (or not) as requested.
2158 *
2159 *		A page which is not pageable may not take
2160 *		a fault; therefore, its page table entry
2161 *		must remain valid for the duration.
2162 *
2163 *		This routine is merely advisory; pmap_enter
2164 *		will specify that these pages are to be wired
2165 *		down (or not) as appropriate.
2166 */
2167void
2168pmap_pageable(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
2169	      boolean_t pageable)
2170{
2171}
2172
2173/*
2174 * Returns true if the pmap's pv is one of the first
2175 * 16 pvs linked to from this page.  This count may
2176 * be changed upwards or downwards in the future; it
2177 * is only necessary that true be returned for a small
2178 * subset of pmaps for proper page aging.
2179 */
2180boolean_t
2181pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2182{
2183	pv_entry_t pv;
2184	int loops = 0;
2185	int s;
2186
2187	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2188		return FALSE;
2189
2190	s = splvm();
2191
2192	/*
2193	 * Not found, check current mappings returning immediately if found.
2194	 */
2195	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2196		if (pv->pv_pmap == pmap) {
2197			splx(s);
2198			return TRUE;
2199		}
2200		loops++;
2201		if (loops >= 16)
2202			break;
2203	}
2204	splx(s);
2205	return (FALSE);
2206}
2207
2208#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2209/*
2210 * Remove all pages from specified address space
2211 * this aids process exit speeds.  Also, this code
2212 * is special cased for current process only, but
2213 * can have the more generic (and slightly slower)
2214 * mode enabled.  This is much faster than pmap_remove
2215 * in the case of running down an entire address space.
2216 */
2217void
2218pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2219{
2220	pv_entry_t pv, npv;
2221	int s;
2222
2223#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2224	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
2225		printf("warning: pmap_remove_pages called with non-current pmap\n");
2226		return;
2227	}
2228#endif
2229
2230	s = splvm();
2231	for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
2232		pv;
2233		pv = npv) {
2234		struct ia64_lpte *pte;
2235
2236		npv = TAILQ_NEXT(pv, pv_plist);
2237
2238		if (pv->pv_va >= eva || pv->pv_va < sva) {
2239			continue;
2240		}
2241
2242		pte = pmap_find_vhpt(pv->pv_va);
2243		if (!pte)
2244			panic("pmap_remove_pages: page on pm_pvlist has no pte\n");
2245
2246
2247/*
2248 * We cannot remove wired pages from a process' mapping at this time
2249 */
2250		if (pte->pte_ig & PTE_IG_WIRED) {
2251			continue;
2252		}
2253
2254		pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
2255	}
2256	splx(s);
2257
2258	pmap_invalidate_all(pmap);
2259}
2260
2261/*
2262 *      pmap_page_protect:
2263 *
2264 *      Lower the permission for all mappings to a given page.
2265 */
2266void
2267pmap_page_protect(vm_page_t m, vm_prot_t prot)
2268{
2269	pv_entry_t pv;
2270
2271	if ((prot & VM_PROT_WRITE) != 0)
2272		return;
2273	if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
2274		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2275			int newprot = pte_prot(pv->pv_pmap, prot);
2276			pmap_t oldpmap = pmap_install(pv->pv_pmap);
2277			struct ia64_lpte *pte;
2278			pte = pmap_find_vhpt(pv->pv_va);
2279			pmap_pte_set_prot(pte, newprot);
2280			pmap_update_vhpt(pte, pv->pv_va);
2281			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2282			pmap_install(oldpmap);
2283		}
2284	} else {
2285		pmap_remove_all(m);
2286	}
2287}
2288
2289vm_offset_t
2290pmap_phys_address(int ppn)
2291{
2292	return (ia64_ptob(ppn));
2293}
2294
2295/*
2296 *	pmap_ts_referenced:
2297 *
2298 *	Return a count of reference bits for a page, clearing those bits.
2299 *	It is not necessary for every reference bit to be cleared, but it
2300 *	is necessary that 0 only be returned when there are truly no
2301 *	reference bits set.
2302 *
2303 *	XXX: The exact number of bits to check and clear is a matter that
2304 *	should be tested and standardized at some point in the future for
2305 *	optimal aging of shared pages.
2306 */
2307int
2308pmap_ts_referenced(vm_page_t m)
2309{
2310	pv_entry_t pv;
2311	int count = 0;
2312
2313	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2314		return 0;
2315
2316	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2317		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2318		struct ia64_lpte *pte;
2319		pte = pmap_find_vhpt(pv->pv_va);
2320		if (pte->pte_a) {
2321			count++;
2322			pte->pte_a = 0;
2323			pmap_update_vhpt(pte, pv->pv_va);
2324			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2325		}
2326		pmap_install(oldpmap);
2327	}
2328
2329	return count;
2330}
2331
2332#if 0
2333/*
2334 *	pmap_is_referenced:
2335 *
2336 *	Return whether or not the specified physical page was referenced
2337 *	in any physical maps.
2338 */
2339static boolean_t
2340pmap_is_referenced(vm_page_t m)
2341{
2342	pv_entry_t pv;
2343
2344	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2345		return FALSE;
2346
2347	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2348		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2349		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2350		pmap_install(oldpmap);
2351		if (pte->pte_a)
2352			return 1;
2353	}
2354
2355	return 0;
2356}
2357#endif
2358
2359/*
2360 *	pmap_is_modified:
2361 *
2362 *	Return whether or not the specified physical page was modified
2363 *	in any physical maps.
2364 */
2365boolean_t
2366pmap_is_modified(vm_page_t m)
2367{
2368	pv_entry_t pv;
2369
2370	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2371		return FALSE;
2372
2373	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2374		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2375		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2376		pmap_install(oldpmap);
2377		if (pte->pte_d)
2378			return 1;
2379	}
2380
2381	return 0;
2382}
2383
2384/*
2385 *	Clear the modify bits on the specified physical page.
2386 */
2387void
2388pmap_clear_modify(vm_page_t m)
2389{
2390	pv_entry_t pv;
2391
2392	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2393		return;
2394
2395	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2396		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2397		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2398		if (pte->pte_d) {
2399			pte->pte_d = 0;
2400			pmap_update_vhpt(pte, pv->pv_va);
2401			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2402		}
2403		pmap_install(oldpmap);
2404	}
2405}
2406
2407/*
2408 *	pmap_clear_reference:
2409 *
2410 *	Clear the reference bit on the specified physical page.
2411 */
2412void
2413pmap_clear_reference(vm_page_t m)
2414{
2415	pv_entry_t pv;
2416
2417	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2418		return;
2419
2420	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2421		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2422		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2423		if (pte->pte_a) {
2424			pte->pte_a = 0;
2425			pmap_update_vhpt(pte, pv->pv_va);
2426			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2427		}
2428		pmap_install(oldpmap);
2429	}
2430}
2431
2432/*
2433 * Miscellaneous support routines follow
2434 */
2435
2436static void
2437ia64_protection_init()
2438{
2439	int prot, *kp, *up;
2440
2441	kp = protection_codes[0];
2442	up = protection_codes[1];
2443
2444	for (prot = 0; prot < 8; prot++) {
2445		switch (prot) {
2446		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
2447			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2448			*up++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2449			break;
2450
2451		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
2452			*kp++ = (PTE_AR_X_RX << 2) | PTE_PL_KERN;
2453			*up++ = (PTE_AR_X_RX << 2) | PTE_PL_USER;
2454			break;
2455
2456		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
2457			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2458			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2459			break;
2460
2461		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
2462			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2463			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2464			break;
2465
2466		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
2467			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2468			*up++ = (PTE_AR_R << 2) | PTE_PL_USER;
2469			break;
2470
2471		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
2472			*kp++ = (PTE_AR_RX << 2) | PTE_PL_KERN;
2473			*up++ = (PTE_AR_RX << 2) | PTE_PL_USER;
2474			break;
2475
2476		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
2477			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2478			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2479			break;
2480
2481		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
2482			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2483			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2484			break;
2485		}
2486	}
2487}
2488
2489/*
2490 * Map a set of physical memory pages into the kernel virtual
2491 * address space. Return a pointer to where it is mapped. This
2492 * routine is intended to be used for mapping device memory,
2493 * NOT real memory.
2494 */
2495void *
2496pmap_mapdev(vm_offset_t pa, vm_size_t size)
2497{
2498	return (void*) IA64_PHYS_TO_RR6(pa);
2499}
2500
2501/*
2502 * 'Unmap' a range mapped by pmap_mapdev().
2503 */
2504void
2505pmap_unmapdev(vm_offset_t va, vm_size_t size)
2506{
2507	return;
2508}
2509
2510/*
2511 * perform the pmap work for mincore
2512 */
2513int
2514pmap_mincore(pmap_t pmap, vm_offset_t addr)
2515{
2516	pmap_t oldpmap;
2517	struct ia64_lpte *pte;
2518	int val = 0;
2519
2520	oldpmap = pmap_install(pmap);
2521	pte = pmap_find_vhpt(addr);
2522	pmap_install(oldpmap);
2523
2524	if (!pte)
2525		return 0;
2526
2527	if (pmap_pte_v(pte)) {
2528		vm_page_t m;
2529		vm_offset_t pa;
2530
2531		val = MINCORE_INCORE;
2532		if ((pte->pte_ig & PTE_IG_MANAGED) == 0)
2533			return val;
2534
2535		pa = pmap_pte_pa(pte);
2536
2537		m = PHYS_TO_VM_PAGE(pa);
2538
2539		/*
2540		 * Modified by us
2541		 */
2542		if (pte->pte_d)
2543			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2544		/*
2545		 * Modified by someone
2546		 */
2547		else if (pmap_is_modified(m))
2548			val |= MINCORE_MODIFIED_OTHER;
2549		/*
2550		 * Referenced by us
2551		 */
2552		if (pte->pte_a)
2553			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2554
2555		/*
2556		 * Referenced by someone
2557		 */
2558		else if (pmap_ts_referenced(m)) {
2559			val |= MINCORE_REFERENCED_OTHER;
2560			vm_page_flag_set(m, PG_REFERENCED);
2561		}
2562	}
2563	return val;
2564}
2565
2566void
2567pmap_activate(struct thread *td)
2568{
2569	pmap_install(vmspace_pmap(td->td_proc->p_vmspace));
2570}
2571
2572pmap_t
2573pmap_install(pmap_t pmap)
2574{
2575	pmap_t oldpmap;
2576	int i;
2577
2578	critical_enter();
2579
2580	oldpmap = PCPU_GET(current_pmap);
2581
2582	if (pmap == oldpmap || pmap == kernel_pmap) {
2583		critical_exit();
2584		return pmap;
2585	}
2586
2587	if (oldpmap) {
2588		atomic_clear_32(&pmap->pm_active, PCPU_GET(cpumask));
2589	}
2590
2591	PCPU_SET(current_pmap, pmap);
2592	if (!pmap) {
2593		/*
2594		 * RIDs 0..4 have no mappings to make sure we generate
2595		 * page faults on accesses.
2596		 */
2597		ia64_set_rr(IA64_RR_BASE(0), (0 << 8)|(PAGE_SHIFT << 2)|1);
2598		ia64_set_rr(IA64_RR_BASE(1), (1 << 8)|(PAGE_SHIFT << 2)|1);
2599		ia64_set_rr(IA64_RR_BASE(2), (2 << 8)|(PAGE_SHIFT << 2)|1);
2600		ia64_set_rr(IA64_RR_BASE(3), (3 << 8)|(PAGE_SHIFT << 2)|1);
2601		ia64_set_rr(IA64_RR_BASE(4), (4 << 8)|(PAGE_SHIFT << 2)|1);
2602		critical_exit();
2603		return oldpmap;
2604	}
2605
2606	atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask));
2607
2608	for (i = 0; i < 5; i++)
2609		ia64_set_rr(IA64_RR_BASE(i),
2610			    (pmap->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2611
2612	critical_exit();
2613	return oldpmap;
2614}
2615
2616vm_offset_t
2617pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2618{
2619
2620	return addr;
2621}
2622
2623#include "opt_ddb.h"
2624
2625#ifdef DDB
2626
2627#include <ddb/ddb.h>
2628
2629static const char*	psnames[] = {
2630	"1B",	"2B",	"4B",	"8B",
2631	"16B",	"32B",	"64B",	"128B",
2632	"256B",	"512B",	"1K",	"2K",
2633	"4K",	"8K",	"16K",	"32K",
2634	"64K",	"128K",	"256K",	"512K",
2635	"1M",	"2M",	"4M",	"8M",
2636	"16M",	"32M",	"64M",	"128M",
2637	"256M",	"512M",	"1G",	"2G"
2638};
2639
2640static void
2641print_trs(int type)
2642{
2643	struct ia64_pal_result	res;
2644	int			i, maxtr;
2645	struct {
2646		struct ia64_pte	pte;
2647		struct ia64_itir itir;
2648		struct ia64_ifa ifa;
2649		struct ia64_rr	rr;
2650	}			buf;
2651	static const char*	manames[] = {
2652		"WB",	"bad",	"bad",	"bad",
2653		"UC",	"UCE",	"WC",	"NaT",
2654
2655	};
2656
2657	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2658	if (res.pal_status != 0) {
2659		db_printf("Can't get VM summary\n");
2660		return;
2661	}
2662
2663	if (type == 0)
2664		maxtr = (res.pal_result[0] >> 40) & 0xff;
2665	else
2666		maxtr = (res.pal_result[0] >> 32) & 0xff;
2667
2668	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2669	for (i = 0; i <= maxtr; i++) {
2670		bzero(&buf, sizeof(buf));
2671		res = ia64_call_pal_stacked_physical
2672			(PAL_VM_TR_READ, i, type, ia64_tpa((u_int64_t) &buf));
2673		if (!(res.pal_result[0] & 1))
2674			buf.pte.pte_ar = 0;
2675		if (!(res.pal_result[0] & 2))
2676			buf.pte.pte_pl = 0;
2677		if (!(res.pal_result[0] & 4))
2678			buf.pte.pte_d = 0;
2679		if (!(res.pal_result[0] & 8))
2680			buf.pte.pte_ma = 0;
2681		db_printf(
2682			"%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s %d %06x\n",
2683			buf.ifa.ifa_ig & 1,
2684			buf.rr.rr_rid,
2685			buf.ifa.ifa_vpn,
2686			buf.pte.pte_ppn,
2687			psnames[buf.itir.itir_ps],
2688			buf.pte.pte_ed,
2689			buf.pte.pte_ar,
2690			buf.pte.pte_pl,
2691			buf.pte.pte_d,
2692			buf.pte.pte_a,
2693			manames[buf.pte.pte_ma],
2694			buf.pte.pte_p,
2695			buf.itir.itir_key);
2696	}
2697}
2698
2699DB_COMMAND(itr, db_itr)
2700{
2701	print_trs(0);
2702}
2703
2704DB_COMMAND(dtr, db_dtr)
2705{
2706	print_trs(1);
2707}
2708
2709DB_COMMAND(rr, db_rr)
2710{
2711	int i;
2712	u_int64_t t;
2713	struct ia64_rr rr;
2714
2715	printf("RR RID    PgSz VE\n");
2716	for (i = 0; i < 8; i++) {
2717		__asm __volatile ("mov %0=rr[%1]"
2718				  : "=r"(t)
2719				  : "r"(IA64_RR_BASE(i)));
2720		*(u_int64_t *) &rr = t;
2721		printf("%d  %06x %4s %d\n",
2722		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2723	}
2724}
2725
2726DB_COMMAND(thash, db_thash)
2727{
2728	if (!have_addr)
2729		return;
2730
2731	db_printf("%p\n", (void *) ia64_thash(addr));
2732}
2733
2734DB_COMMAND(ttag, db_ttag)
2735{
2736	if (!have_addr)
2737		return;
2738
2739	db_printf("0x%lx\n", ia64_ttag(addr));
2740}
2741
2742#endif
2743