pmap.c revision 92870
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 * $FreeBSD: head/sys/ia64/ia64/pmap.c 92870 2002-03-21 09:50:11Z dfr $
47 */
48
49/*
50 *	Manages physical address maps.
51 *
52 *	In addition to hardware address maps, this
53 *	module is called upon to provide software-use-only
54 *	maps which may or may not be stored in the same
55 *	form as hardware maps.  These pseudo-maps are
56 *	used to store intermediate results from copy
57 *	operations to and from address spaces.
58 *
59 *	Since the information managed by this module is
60 *	also stored by the logical address mapping module,
61 *	this module may throw away valid virtual-to-physical
62 *	mappings at almost any time.  However, invalidations
63 *	of virtual-to-physical mappings must be done as
64 *	requested.
65 *
66 *	In order to cope with hardware architectures which
67 *	make virtual-to-physical map invalidates expensive,
68 *	this module may delay invalidate or reduced protection
69 *	operations until such time as they are actually
70 *	necessary.  This module is given full information as
71 *	to which processors are currently using which maps,
72 *	and to when physical maps must be made correct.
73 */
74
75/*
76 * Following the Linux model, region IDs are allocated in groups of
77 * eight so that a single region ID can be used for as many RRs as we
78 * want by encoding the RR number into the low bits of the ID.
79 *
80 * We reserve region ID 0 for the kernel and allocate the remaining
81 * IDs for user pmaps.
82 *
83 * Region 0..4
84 *	User virtually mapped
85 *
86 * Region 5
87 *	Kernel virtually mapped
88 *
89 * Region 6
90 *	Kernel physically mapped uncacheable
91 *
92 * Region 7
93 *	Kernel physically mapped cacheable
94 */
95
96#include <sys/param.h>
97#include <sys/kernel.h>
98#include <sys/lock.h>
99#include <sys/malloc.h>
100#include <sys/mman.h>
101#include <sys/msgbuf.h>
102#include <sys/mutex.h>
103#include <sys/proc.h>
104#include <sys/sx.h>
105#include <sys/systm.h>
106#include <sys/vmmeter.h>
107#include <sys/smp.h>
108#include <sys/sysctl.h>
109
110#include <vm/vm.h>
111#include <vm/vm_param.h>
112#include <vm/vm_kern.h>
113#include <vm/vm_page.h>
114#include <vm/vm_map.h>
115#include <vm/vm_object.h>
116#include <vm/vm_extern.h>
117#include <vm/vm_pageout.h>
118#include <vm/vm_pager.h>
119#include <vm/uma.h>
120
121#include <sys/user.h>
122
123#include <machine/pal.h>
124#include <machine/md_var.h>
125
126MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
127
128#ifndef PMAP_SHPGPERPROC
129#define PMAP_SHPGPERPROC 200
130#endif
131
132#if defined(DIAGNOSTIC)
133#define PMAP_DIAGNOSTIC
134#endif
135
136#define MINPV 2048
137
138#if 0
139#define PMAP_DIAGNOSTIC
140#define PMAP_DEBUG
141#endif
142
143#if !defined(PMAP_DIAGNOSTIC)
144#define PMAP_INLINE __inline
145#else
146#define PMAP_INLINE
147#endif
148
149/*
150 * Get PDEs and PTEs for user/kernel address space
151 */
152#define pmap_pte_w(pte)		((pte)->pte_ig & PTE_IG_WIRED)
153#define pmap_pte_managed(pte)	((pte)->pte_ig & PTE_IG_MANAGED)
154#define pmap_pte_v(pte)		((pte)->pte_p)
155#define pmap_pte_pa(pte)	(((pte)->pte_ppn) << 12)
156#define pmap_pte_prot(pte)	(((pte)->pte_ar << 2) | (pte)->pte_pl)
157
158#define pmap_pte_set_w(pte, v) ((v)?((pte)->pte_ig |= PTE_IG_WIRED) \
159				:((pte)->pte_ig &= ~PTE_IG_WIRED))
160#define pmap_pte_set_prot(pte, v) do {		\
161    (pte)->pte_ar = v >> 2;			\
162    (pte)->pte_pl = v & 3;			\
163} while (0)
164
165/*
166 * Given a map and a machine independent protection code,
167 * convert to an ia64 protection code.
168 */
169#define pte_prot(m, p)		(protection_codes[m == pmap_kernel() ? 0 : 1][p])
170#define pte_prot_pl(m, p)	(pte_prot(m, p) & 3)
171#define pte_prot_ar(m, p)	(pte_prot(m, p) >> 2)
172int	protection_codes[2][8];
173
174/*
175 * Return non-zero if this pmap is currently active
176 */
177#define pmap_isactive(pmap)	(pmap->pm_active)
178
179/*
180 * Statically allocated kernel pmap
181 */
182static struct pmap kernel_pmap_store;
183pmap_t kernel_pmap;
184
185vm_offset_t avail_start;	/* PA of first available physical page */
186vm_offset_t avail_end;		/* PA of last available physical page */
187vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
188vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
189static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
190
191vm_offset_t vhpt_base, vhpt_size;
192
193/*
194 * We use an object to own the kernel's 'page tables'. For simplicity,
195 * we use one page directory to index a set of pages containing
196 * ia64_lptes. This gives us up to 2Gb of kernel virtual space.
197 */
198static vm_object_t kptobj;
199static int nkpt;
200static struct ia64_lpte **kptdir;
201#define KPTE_DIR_INDEX(va) \
202	((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
203#define KPTE_PTE_INDEX(va) \
204	((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
205#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
206
207vm_offset_t kernel_vm_end;
208
209/*
210 * Values for ptc.e. XXX values for SKI.
211 */
212static u_int64_t pmap_ptc_e_base = 0x100000000;
213static u_int64_t pmap_ptc_e_count1 = 3;
214static u_int64_t pmap_ptc_e_count2 = 2;
215static u_int64_t pmap_ptc_e_stride1 = 0x2000;
216static u_int64_t pmap_ptc_e_stride2 = 0x100000000;
217
218/*
219 * Data for the RID allocator
220 */
221static u_int64_t *pmap_ridbusy;
222static int pmap_ridmax, pmap_ridcount;
223struct mtx pmap_ridmutex;
224
225/*
226 * Data for the pv entry allocation mechanism
227 */
228static uma_zone_t pvzone;
229static struct vm_object pvzone_obj;
230static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
231static int pmap_pagedaemon_waken = 0;
232static struct pv_entry *pvbootentries;
233static int pvbootnext, pvbootmax;
234
235/*
236 * Data for allocating PTEs for user processes.
237 */
238static uma_zone_t ptezone;
239static struct vm_object ptezone_obj;
240#if 0
241static struct ia64_lpte *pteinit;
242#endif
243
244/*
245 * VHPT instrumentation.
246 */
247static int pmap_vhpt_inserts;
248static int pmap_vhpt_collisions;
249static int pmap_vhpt_resident;
250SYSCTL_DECL(_vm_stats);
251SYSCTL_NODE(_vm_stats, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
252SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
253	   &pmap_vhpt_inserts, 0, "");
254SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, collisions, CTLFLAG_RD,
255	   &pmap_vhpt_collisions, 0, "");
256SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, resident, CTLFLAG_RD,
257	   &pmap_vhpt_resident, 0, "");
258
259static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
260static pv_entry_t get_pv_entry(void);
261static void	ia64_protection_init(void);
262
263static void	pmap_invalidate_all(pmap_t pmap);
264static void	pmap_remove_all(vm_page_t m);
265static void	pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m);
266static void	*pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
267
268vm_offset_t
269pmap_steal_memory(vm_size_t size)
270{
271	vm_size_t bank_size;
272	vm_offset_t pa, va;
273
274	size = round_page(size);
275
276	bank_size = phys_avail[1] - phys_avail[0];
277	while (size > bank_size) {
278		int i;
279		for (i = 0; phys_avail[i+2]; i+= 2) {
280			phys_avail[i] = phys_avail[i+2];
281			phys_avail[i+1] = phys_avail[i+3];
282		}
283		phys_avail[i] = 0;
284		phys_avail[i+1] = 0;
285		if (!phys_avail[0])
286			panic("pmap_steal_memory: out of memory");
287		bank_size = phys_avail[1] - phys_avail[0];
288	}
289
290	pa = phys_avail[0];
291	phys_avail[0] += size;
292
293	va = IA64_PHYS_TO_RR7(pa);
294	bzero((caddr_t) va, size);
295	return va;
296}
297
298/*
299 *	Bootstrap the system enough to run with virtual memory.
300 */
301void
302pmap_bootstrap()
303{
304	int i, j, count, ridbits;
305	struct ia64_pal_result res;
306
307	/*
308	 * Query the PAL Code to find the loop parameters for the
309	 * ptc.e instruction.
310	 */
311	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
312	if (res.pal_status != 0)
313		panic("Can't configure ptc.e parameters");
314	pmap_ptc_e_base = res.pal_result[0];
315	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
316	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
317	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
318	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
319	if (bootverbose)
320		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
321		       "stride1=0x%lx, stride2=0x%lx\n",
322		       pmap_ptc_e_base,
323		       pmap_ptc_e_count1,
324		       pmap_ptc_e_count2,
325		       pmap_ptc_e_stride1,
326		       pmap_ptc_e_stride2);
327
328	/*
329	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
330	 */
331	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
332	if (res.pal_status != 0) {
333		if (bootverbose)
334			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
335		ridbits = 18; /* guaranteed minimum */
336	} else {
337		ridbits = (res.pal_result[1] >> 8) & 0xff;
338		if (bootverbose)
339			printf("Processor supports %d Region ID bits\n",
340			       ridbits);
341	}
342	pmap_ridmax = (1 << ridbits);
343	pmap_ridcount = 8;
344	pmap_ridbusy = (u_int64_t *)
345		pmap_steal_memory(pmap_ridmax / 8);
346	bzero(pmap_ridbusy, pmap_ridmax / 8);
347	pmap_ridbusy[0] |= 0xff;
348	mtx_init(&pmap_ridmutex, "RID allocator lock", MTX_DEF);
349
350	/*
351	 * Allocate some memory for initial kernel 'page tables'.
352	 */
353	kptdir = (struct ia64_lpte **) pmap_steal_memory(PAGE_SIZE);
354	for (i = 0; i < NKPT; i++) {
355		kptdir[i] = (struct ia64_lpte *) pmap_steal_memory(PAGE_SIZE);
356	}
357	nkpt = NKPT;
358
359	avail_start = phys_avail[0];
360	for (i = 0; phys_avail[i+2]; i+= 2) ;
361	avail_end = phys_avail[i+1];
362	count = i+2;
363
364	/*
365	 * Figure out a useful size for the VHPT, based on the size of
366	 * physical memory and try to locate a region which is large
367	 * enough to contain the VHPT (which must be a power of two in
368	 * size and aligned to a natural boundary).
369	 */
370	vhpt_size = 15;
371	while ((1<<vhpt_size) < ia64_btop(avail_end - avail_start) * 32)
372		vhpt_size++;
373
374	vhpt_base = 0;
375	while (!vhpt_base) {
376		vm_offset_t mask;
377		if (bootverbose)
378			printf("Trying VHPT size 0x%lx\n", (1L<<vhpt_size));
379		mask = (1L << vhpt_size) - 1;
380		for (i = 0; i < count; i += 2) {
381			vm_offset_t base, limit;
382			base = (phys_avail[i] + mask) & ~mask;
383			limit = base + (1L << vhpt_size);
384			if (limit <= phys_avail[i+1])
385				/*
386				 * VHPT can fit in this region
387				 */
388				break;
389		}
390		if (!phys_avail[i]) {
391			/*
392			 * Can't fit, try next smaller size.
393			 */
394			vhpt_size--;
395		} else {
396			vhpt_base = (phys_avail[i] + mask) & ~mask;
397		}
398	}
399	if (vhpt_size < 15)
400		panic("Can't find space for VHPT");
401
402	if (bootverbose)
403		printf("Putting VHPT at %p\n", (void *) vhpt_base);
404	if (vhpt_base != phys_avail[i]) {
405		/*
406		 * Split this region.
407		 */
408		if (bootverbose)
409			printf("Splitting [%p-%p]\n",
410			       (void *) phys_avail[i],
411			       (void *) phys_avail[i+1]);
412		for (j = count; j > i; j -= 2) {
413			phys_avail[j] = phys_avail[j-2];
414			phys_avail[j+1] = phys_avail[j-2+1];
415		}
416		phys_avail[count+2] = 0;
417		phys_avail[count+3] = 0;
418		phys_avail[i+1] = vhpt_base;
419		phys_avail[i+2] = vhpt_base + (1L << vhpt_size);
420	} else {
421		phys_avail[i] = vhpt_base + (1L << vhpt_size);
422	}
423
424	vhpt_base = IA64_PHYS_TO_RR7(vhpt_base);
425	bzero((void *) vhpt_base, (1L << vhpt_size));
426	__asm __volatile("mov cr.pta=%0;; srlz.i;;"
427			 :: "r" (vhpt_base + (1<<8) + (vhpt_size<<2) + 1));
428
429	virtual_avail = IA64_RR_BASE(5);
430	virtual_end = IA64_RR_BASE(6)-1;
431
432	/*
433	 * Initialize protection array.
434	 */
435	ia64_protection_init();
436
437	/*
438	 * The kernel's pmap is statically allocated so we don't have to use
439	 * pmap_create, which is unlikely to work correctly at this part of
440	 * the boot sequence (XXX and which no longer exists).
441	 */
442	kernel_pmap = &kernel_pmap_store;
443	for (i = 0; i < 5; i++)
444		kernel_pmap->pm_rid[i] = 0;
445	kernel_pmap->pm_count = 1;
446	kernel_pmap->pm_active = 1;
447	TAILQ_INIT(&kernel_pmap->pm_pvlist);
448
449	/*
450	 * Region 5 is mapped via the vhpt.
451	 */
452	ia64_set_rr(IA64_RR_BASE(5),
453		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
454
455	/*
456	 * Region 6 is direct mapped UC and region 7 is direct mapped
457	 * WC. The details of this is controlled by the Alt {I,D}TLB
458	 * handlers. Here we just make sure that they have the largest
459	 * possible page size to minimise TLB usage.
460	 */
461	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (28 << 2));
462	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (28 << 2));
463
464	/*
465	 * Set up proc0's PCB.
466	 */
467#if 0
468	thread0.td_pcb->pcb_hw.apcb_asn = 0;
469#endif
470
471	/*
472	 * Reserve some memory for allocating pvs while bootstrapping
473	 * the pv allocator. We need to have enough to cover mapping
474	 * the kmem_alloc region used to allocate the initial_pvs in
475	 * pmap_init. In general, the size of this region is
476	 * approximately (# physical pages) * (size of pv entry).
477	 */
478	pvbootmax = ((physmem * sizeof(struct pv_entry)) >> PAGE_SHIFT) + 128;
479	pvbootentries = (struct pv_entry *)
480		pmap_steal_memory(pvbootmax * sizeof(struct pv_entry));
481	pvbootnext = 0;
482
483	/*
484	 * Clear out any random TLB entries left over from booting.
485	 */
486	pmap_invalidate_all(kernel_pmap);
487}
488
489static void *
490pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
491{
492	*flags = UMA_SLAB_PRIV;
493	return (void *)kmem_alloc(kernel_map, bytes);
494}
495
496/*
497 *	Initialize the pmap module.
498 *	Called by vm_init, to initialize any structures that the pmap
499 *	system needs to map virtual memory.
500 *	pmap_init has been enhanced to support in a fairly consistant
501 *	way, discontiguous physical memory.
502 */
503void
504pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
505{
506	int i;
507	int initial_pvs;
508
509	/*
510	 * Allocate memory for random pmap data structures.  Includes the
511	 * pv_head_table.
512	 */
513
514	for(i = 0; i < vm_page_array_size; i++) {
515		vm_page_t m;
516
517		m = &vm_page_array[i];
518		TAILQ_INIT(&m->md.pv_list);
519		m->md.pv_list_count = 0;
520 	}
521
522	/*
523	 * Init the pv free list and the PTE free list.
524	 */
525	initial_pvs = vm_page_array_size;
526	if (initial_pvs < MINPV)
527		initial_pvs = MINPV;
528	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry),
529	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
530	uma_zone_set_allocf(pvzone, pmap_allocf);
531	uma_prealloc(pvzone, initial_pvs);
532
533	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
534	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
535	uma_zone_set_allocf(ptezone, pmap_allocf);
536	uma_prealloc(ptezone, initial_pvs);
537
538	/*
539	 * Create the object for the kernel's page tables.
540	 */
541	kptobj = vm_object_allocate(OBJT_DEFAULT, MAXKPT);
542
543	/*
544	 * Now it is safe to enable pv_table recording.
545	 */
546	pmap_initialized = TRUE;
547}
548
549/*
550 * Initialize the address space (zone) for the pv_entries.  Set a
551 * high water mark so that the system can recover from excessive
552 * numbers of pv entries.
553 */
554void
555pmap_init2()
556{
557	int shpgperproc = PMAP_SHPGPERPROC;
558
559	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
560	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
561	pv_entry_high_water = 9 * (pv_entry_max / 10);
562	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
563	uma_zone_set_obj(ptezone, &ptezone_obj, pv_entry_max);
564}
565
566
567/***************************************************
568 * Manipulate TLBs for a pmap
569 ***************************************************/
570
571static void
572pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
573{
574	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
575		("invalidating TLB for non-current pmap"));
576	ia64_ptc_g(va, PAGE_SHIFT << 2);
577}
578
579static void
580pmap_invalidate_all_1(void *arg)
581{
582	u_int64_t addr;
583	int i, j;
584	register_t psr;
585
586	psr = intr_disable();
587	addr = pmap_ptc_e_base;
588	for (i = 0; i < pmap_ptc_e_count1; i++) {
589		for (j = 0; j < pmap_ptc_e_count2; j++) {
590			ia64_ptc_e(addr);
591			addr += pmap_ptc_e_stride2;
592		}
593		addr += pmap_ptc_e_stride1;
594	}
595	intr_restore(psr);
596}
597
598static void
599pmap_invalidate_all(pmap_t pmap)
600{
601	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
602		("invalidating TLB for non-current pmap"));
603
604
605#ifdef SMP
606	smp_rendezvous(0, pmap_invalidate_all_1, 0, 0);
607#else
608	pmap_invalidate_all_1(0);
609#endif
610}
611
612static u_int32_t
613pmap_allocate_rid(void)
614{
615	int rid;
616
617	if (pmap_ridcount == pmap_ridmax)
618		panic("pmap_allocate_rid: All Region IDs used");
619
620	do {
621		rid = arc4random() & (pmap_ridmax - 1);
622	} while (pmap_ridbusy[rid / 64] & (1L << (rid & 63)));
623	pmap_ridbusy[rid / 64] |= (1L << (rid & 63));
624	pmap_ridcount++;
625
626	return rid;
627}
628
629static void
630pmap_free_rid(u_int32_t rid)
631{
632	mtx_lock(&pmap_ridmutex);
633	pmap_ridbusy[rid / 64] &= ~(1L << (rid & 63));
634	pmap_ridcount--;
635	mtx_unlock(&pmap_ridmutex);
636}
637
638static void
639pmap_ensure_rid(pmap_t pmap, vm_offset_t va)
640{
641	int rr;
642
643	rr = va >> 61;
644	if (pmap->pm_rid[rr])
645		return;
646
647	mtx_lock(&pmap_ridmutex);
648	pmap->pm_rid[rr] = pmap_allocate_rid();
649	if (pmap == PCPU_GET(current_pmap))
650		ia64_set_rr(IA64_RR_BASE(rr),
651			    (pmap->pm_rid[rr] << 8)|(PAGE_SHIFT << 2)|1);
652	mtx_unlock(&pmap_ridmutex);
653}
654
655/***************************************************
656 * Low level helper routines.....
657 ***************************************************/
658
659/*
660 * Install a pte into the VHPT
661 */
662static PMAP_INLINE void
663pmap_install_pte(struct ia64_lpte *vhpte, struct ia64_lpte *pte)
664{
665	u_int64_t *vhp, *p;
666
667	/* invalidate the pte */
668	atomic_set_64(&vhpte->pte_tag, 1L << 63);
669	ia64_mf();			/* make sure everyone sees */
670
671	vhp = (u_int64_t *) vhpte;
672	p = (u_int64_t *) pte;
673
674	vhp[0] = p[0];
675	vhp[1] = p[1];
676	vhp[2] = p[2];			/* sets ti to one */
677
678	ia64_mf();
679}
680
681/*
682 * Compare essential parts of pte.
683 */
684static PMAP_INLINE int
685pmap_equal_pte(struct ia64_lpte *pte1, struct ia64_lpte *pte2)
686{
687	return *(u_int64_t *) pte1 == *(u_int64_t *) pte2;
688}
689
690/*
691 * this routine defines the region(s) of memory that should
692 * not be tested for the modified bit.
693 */
694static PMAP_INLINE int
695pmap_track_modified(vm_offset_t va)
696{
697	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
698		return 1;
699	else
700		return 0;
701}
702
703/*
704 * Create the U area for a new process.
705 * This routine directly affects the fork perf for a process.
706 */
707void
708pmap_new_proc(struct proc *p)
709{
710	struct user *up;
711
712	/*
713	 * Use contigmalloc for user area so that we can use a region
714	 * 7 address for it which makes it impossible to accidentally
715	 * lose when recording a trapframe.
716	 */
717	up = contigmalloc(UAREA_PAGES * PAGE_SIZE, M_PMAP,
718			  M_WAITOK,
719			  0ul,
720			  256*1024*1024 - 1,
721			  PAGE_SIZE,
722			  256*1024*1024);
723
724	p->p_md.md_uservirt = up;
725	p->p_uarea = (struct user *)
726		IA64_PHYS_TO_RR7(ia64_tpa((u_int64_t) up));
727}
728
729/*
730 * Dispose the U area for a process that has exited.
731 * This routine directly impacts the exit perf of a process.
732 */
733void
734pmap_dispose_proc(struct proc *p)
735{
736	contigfree(p->p_md.md_uservirt, UAREA_PAGES * PAGE_SIZE, M_PMAP);
737	p->p_md.md_uservirt = 0;
738	p->p_uarea = 0;
739}
740
741/*
742 * Allow the U area for a process to be prejudicially paged out.
743 */
744void
745pmap_swapout_proc(struct proc *p)
746{
747}
748
749/*
750 * Bring the U area for a specified process back in.
751 */
752void
753pmap_swapin_proc(struct proc *p)
754{
755}
756
757/*
758 * Create the KSTACK for a new thread.
759 * This routine directly affects the fork perf for a process/thread.
760 */
761void
762pmap_new_thread(struct thread *td)
763{
764	vm_offset_t *ks;
765
766	/*
767	 * Use contigmalloc for user area so that we can use a region
768	 * 7 address for it which makes it impossible to accidentally
769	 * lose when recording a trapframe.
770	 */
771	ks = contigmalloc(KSTACK_PAGES * PAGE_SIZE, M_PMAP,
772			  M_WAITOK,
773			  0ul,
774			  256*1024*1024 - 1,
775			  PAGE_SIZE,
776			  256*1024*1024);
777
778	td->td_md.md_kstackvirt = ks;
779	td->td_kstack = IA64_PHYS_TO_RR7(ia64_tpa((u_int64_t)ks));
780}
781
782/*
783 * Dispose the KSTACK for a thread that has exited.
784 * This routine directly impacts the exit perf of a process/thread.
785 */
786void
787pmap_dispose_thread(struct thread *td)
788{
789	contigfree(td->td_md.md_kstackvirt, KSTACK_PAGES * PAGE_SIZE, M_PMAP);
790	td->td_md.md_kstackvirt = 0;
791	td->td_kstack = 0;
792}
793
794/*
795 * Allow the KSTACK for a thread to be prejudicially paged out.
796 */
797void
798pmap_swapout_thread(struct thread *td)
799{
800}
801
802/*
803 * Bring the KSTACK for a specified thread back in.
804 */
805void
806pmap_swapin_thread(struct thread *td)
807{
808}
809
810/***************************************************
811 * Page table page management routines.....
812 ***************************************************/
813
814void
815pmap_pinit0(struct pmap *pmap)
816{
817	int i;
818
819	/*
820	 * kernel_pmap is the same as any other pmap.
821	 */
822	pmap_pinit(pmap);
823	pmap->pm_flags = 0;
824	for (i = 0; i < 5; i++)
825		pmap->pm_rid[i] = 0;
826	pmap->pm_count = 1;
827	pmap->pm_ptphint = NULL;
828	pmap->pm_active = 0;
829	TAILQ_INIT(&pmap->pm_pvlist);
830	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
831}
832
833/*
834 * Initialize a preallocated and zeroed pmap structure,
835 * such as one in a vmspace structure.
836 */
837void
838pmap_pinit(struct pmap *pmap)
839{
840	int i;
841
842	pmap->pm_flags = 0;
843	for (i = 0; i < 5; i++)
844		pmap->pm_rid[i] = 0;
845	pmap->pm_count = 1;
846	pmap->pm_ptphint = NULL;
847	pmap->pm_active = 0;
848	TAILQ_INIT(&pmap->pm_pvlist);
849	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
850}
851
852/*
853 * Wire in kernel global address entries.  To avoid a race condition
854 * between pmap initialization and pmap_growkernel, this procedure
855 * should be called after the vmspace is attached to the process
856 * but before this pmap is activated.
857 */
858void
859pmap_pinit2(struct pmap *pmap)
860{
861}
862
863/***************************************************
864* Pmap allocation/deallocation routines.
865 ***************************************************/
866
867/*
868 * Release any resources held by the given physical map.
869 * Called when a pmap initialized by pmap_pinit is being released.
870 * Should only be called if the map contains no valid mappings.
871 */
872void
873pmap_release(pmap_t pmap)
874{
875	int i;
876
877	for (i = 0; i < 5; i++)
878		if (pmap->pm_rid[i])
879			pmap_free_rid(pmap->pm_rid[i]);
880}
881
882/*
883 * grow the number of kernel page table entries, if needed
884 */
885void
886pmap_growkernel(vm_offset_t addr)
887{
888	struct ia64_lpte *ptepage;
889	vm_page_t nkpg;
890
891	if (kernel_vm_end == 0) {
892		kernel_vm_end = nkpt * PAGE_SIZE * NKPTEPG
893			+ IA64_RR_BASE(5);
894	}
895	addr = (addr + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
896	while (kernel_vm_end < addr) {
897		if (kptdir[KPTE_DIR_INDEX(kernel_vm_end)]) {
898			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG)
899				& ~(PAGE_SIZE * NKPTEPG - 1);
900			continue;
901		}
902
903		/*
904		 * We could handle more by increasing the size of kptdir.
905		 */
906		if (nkpt == MAXKPT)
907			panic("pmap_growkernel: out of kernel address space");
908
909		/*
910		 * This index is bogus, but out of the way
911		 */
912		nkpg = vm_page_alloc(kptobj, nkpt, VM_ALLOC_SYSTEM);
913		if (!nkpg)
914			panic("pmap_growkernel: no memory to grow kernel");
915
916		nkpt++;
917
918		vm_page_wire(nkpg);
919		ptepage = (struct ia64_lpte *)
920			IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
921		bzero(ptepage, PAGE_SIZE);
922		kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage;
923
924		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
925	}
926}
927
928/*
929 *	Retire the given physical map from service.
930 *	Should only be called if the map contains
931 *	no valid mappings.
932 */
933void
934pmap_destroy(pmap_t pmap)
935{
936	int count;
937
938	if (pmap == NULL)
939		return;
940
941	count = --pmap->pm_count;
942	if (count == 0) {
943		pmap_release(pmap);
944		panic("destroying a pmap is not yet implemented");
945	}
946}
947
948/*
949 *	Add a reference to the specified pmap.
950 */
951void
952pmap_reference(pmap_t pmap)
953{
954	if (pmap != NULL) {
955		pmap->pm_count++;
956	}
957}
958
959/***************************************************
960* page management routines.
961 ***************************************************/
962
963/*
964 * free the pv_entry back to the free list
965 */
966static PMAP_INLINE void
967free_pv_entry(pv_entry_t pv)
968{
969	pv_entry_count--;
970	uma_zfree(pvzone, pv);
971}
972
973/*
974 * get a new pv_entry, allocating a block from the system
975 * when needed.
976 * the memory allocation is performed bypassing the malloc code
977 * because of the possibility of allocations at interrupt time.
978 */
979static pv_entry_t
980get_pv_entry(void)
981{
982	pv_entry_count++;
983	if (pv_entry_high_water &&
984		(pv_entry_count > pv_entry_high_water) &&
985		(pmap_pagedaemon_waken == 0)) {
986		pmap_pagedaemon_waken = 1;
987		wakeup (&vm_pages_needed);
988	}
989	return uma_zalloc(pvzone, M_WAITOK);
990}
991
992/*
993 * Add an ia64_lpte to the VHPT.
994 */
995static void
996pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
997{
998	struct ia64_lpte *vhpte;
999
1000	pmap_vhpt_inserts++;
1001	pmap_vhpt_resident++;
1002
1003	vhpte = (struct ia64_lpte *) ia64_thash(va);
1004
1005	if (vhpte->pte_chain)
1006		pmap_vhpt_collisions++;
1007
1008	pte->pte_chain = vhpte->pte_chain;
1009	vhpte->pte_chain = ia64_tpa((vm_offset_t) pte);
1010
1011	if (!vhpte->pte_p && pte->pte_p)
1012		pmap_install_pte(vhpte, pte);
1013	else
1014		ia64_mf();
1015}
1016
1017/*
1018 * Update VHPT after a pte has changed.
1019 */
1020static void
1021pmap_update_vhpt(struct ia64_lpte *pte, vm_offset_t va)
1022{
1023	struct ia64_lpte *vhpte;
1024
1025	vhpte = (struct ia64_lpte *) ia64_thash(va);
1026
1027	if ((!vhpte->pte_p || vhpte->pte_tag == pte->pte_tag)
1028	    && pte->pte_p)
1029		pmap_install_pte(vhpte, pte);
1030}
1031
1032/*
1033 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
1034 * worked or an appropriate error code otherwise.
1035 */
1036static int
1037pmap_remove_vhpt(vm_offset_t va)
1038{
1039	struct ia64_lpte *pte;
1040	struct ia64_lpte *lpte;
1041	struct ia64_lpte *vhpte;
1042	u_int64_t tag;
1043	int error = ENOENT;
1044
1045	vhpte = (struct ia64_lpte *) ia64_thash(va);
1046
1047	/*
1048	 * If the VHPTE is invalid, there can't be a collision chain.
1049	 */
1050	if (!vhpte->pte_p) {
1051		KASSERT(!vhpte->pte_chain, ("bad vhpte"));
1052		printf("can't remove vhpt entry for 0x%lx\n", va);
1053		goto done;
1054	}
1055
1056	lpte = vhpte;
1057	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(vhpte->pte_chain);
1058	tag = ia64_ttag(va);
1059
1060	while (pte->pte_tag != tag) {
1061		lpte = pte;
1062		if (pte->pte_chain)
1063			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1064		else {
1065			printf("can't remove vhpt entry for 0x%lx\n", va);
1066			goto done;
1067		}
1068	}
1069
1070	/*
1071	 * Snip this pv_entry out of the collision chain.
1072	 */
1073	lpte->pte_chain = pte->pte_chain;
1074
1075	/*
1076	 * If the VHPTE matches as well, change it to map the first
1077	 * element from the chain if there is one.
1078	 */
1079	if (vhpte->pte_tag == tag) {
1080		if (vhpte->pte_chain) {
1081			pte = (struct ia64_lpte *)
1082				IA64_PHYS_TO_RR7(vhpte->pte_chain);
1083			pmap_install_pte(vhpte, pte);
1084		} else {
1085			vhpte->pte_p = 0;
1086			ia64_mf();
1087		}
1088	}
1089
1090	pmap_vhpt_resident--;
1091	error = 0;
1092 done:
1093	return error;
1094}
1095
1096/*
1097 * Find the ia64_lpte for the given va, if any.
1098 */
1099static struct ia64_lpte *
1100pmap_find_vhpt(vm_offset_t va)
1101{
1102	struct ia64_lpte *pte;
1103	u_int64_t tag;
1104
1105	pte = (struct ia64_lpte *) ia64_thash(va);
1106	if (!pte->pte_chain) {
1107		pte = 0;
1108		goto done;
1109	}
1110
1111	tag = ia64_ttag(va);
1112	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1113
1114	while (pte->pte_tag != tag) {
1115		if (pte->pte_chain) {
1116			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1117		} else {
1118			pte = 0;
1119			break;
1120		}
1121	}
1122
1123 done:
1124	return pte;
1125}
1126
1127/*
1128 * Remove an entry from the list of managed mappings.
1129 */
1130static int
1131pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1132{
1133	if (!pv) {
1134		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1135			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1136				if (pmap == pv->pv_pmap && va == pv->pv_va)
1137					break;
1138			}
1139		} else {
1140			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1141				if (va == pv->pv_va)
1142					break;
1143			}
1144		}
1145	}
1146
1147	if (pv) {
1148		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1149		m->md.pv_list_count--;
1150		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1151			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
1152
1153		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1154		free_pv_entry(pv);
1155		return 0;
1156	} else {
1157		return ENOENT;
1158	}
1159}
1160
1161/*
1162 * Create a pv entry for page at pa for
1163 * (pmap, va).
1164 */
1165static void
1166pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1167{
1168	pv_entry_t pv;
1169
1170	pv = get_pv_entry();
1171	pv->pv_pmap = pmap;
1172	pv->pv_va = va;
1173
1174	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1175	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1176	m->md.pv_list_count++;
1177}
1178
1179/*
1180 *	Routine:	pmap_extract
1181 *	Function:
1182 *		Extract the physical page address associated
1183 *		with the given map/virtual_address pair.
1184 */
1185vm_offset_t
1186pmap_extract(pmap, va)
1187	register pmap_t pmap;
1188	vm_offset_t va;
1189{
1190	pmap_t oldpmap;
1191	vm_offset_t pa;
1192
1193	oldpmap = pmap_install(pmap);
1194	pa = ia64_tpa(va);
1195	pmap_install(oldpmap);
1196	return pa;
1197}
1198
1199/***************************************************
1200 * Low level mapping routines.....
1201 ***************************************************/
1202
1203/*
1204 * Find the kernel lpte for mapping the given virtual address, which
1205 * must be in the part of region 5 which we can cover with our kernel
1206 * 'page tables'.
1207 */
1208static struct ia64_lpte *
1209pmap_find_kpte(vm_offset_t va)
1210{
1211	KASSERT((va >> 61) == 5,
1212		("kernel mapping 0x%lx not in region 5", va));
1213	KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG),
1214		("kernel mapping 0x%lx out of range", va));
1215	return &kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)];
1216}
1217
1218/*
1219 * Find a pte suitable for mapping a user-space address. If one exists
1220 * in the VHPT, that one will be returned, otherwise a new pte is
1221 * allocated.
1222 */
1223static struct ia64_lpte *
1224pmap_find_pte(vm_offset_t va)
1225{
1226	struct ia64_lpte *pte;
1227
1228	if (va >= VM_MAXUSER_ADDRESS)
1229		return pmap_find_kpte(va);
1230
1231	pte = pmap_find_vhpt(va);
1232	if (!pte) {
1233		pte = uma_zalloc(ptezone, M_WAITOK);
1234		pte->pte_p = 0;
1235	}
1236	return pte;
1237}
1238
1239/*
1240 * Free a pte which is now unused. This simply returns it to the zone
1241 * allocator if it is a user mapping. For kernel mappings, clear the
1242 * valid bit to make it clear that the mapping is not currently used.
1243 */
1244static void
1245pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1246{
1247	if (va < VM_MAXUSER_ADDRESS)
1248		uma_zfree(ptezone, pte);
1249	else
1250		pte->pte_p = 0;
1251}
1252
1253/*
1254 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1255 * the pte was orginally valid, then its assumed to already be in the
1256 * VHPT.
1257 */
1258static void
1259pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1260	     int ig, int pl, int ar)
1261{
1262	int wasvalid = pte->pte_p;
1263
1264	pte->pte_p = 1;
1265	pte->pte_ma = PTE_MA_WB;
1266	if (ig & PTE_IG_MANAGED) {
1267		pte->pte_a = 0;
1268		pte->pte_d = 0;
1269	} else {
1270		pte->pte_a = 1;
1271		pte->pte_d = 1;
1272	}
1273	pte->pte_pl = pl;
1274	pte->pte_ar = ar;
1275	pte->pte_ppn = pa >> 12;
1276	pte->pte_ed = 0;
1277	pte->pte_ig = ig;
1278
1279	pte->pte_ps = PAGE_SHIFT;
1280	pte->pte_key = 0;
1281
1282	pte->pte_tag = ia64_ttag(va);
1283
1284	if (wasvalid) {
1285		pmap_update_vhpt(pte, va);
1286	} else {
1287		pmap_enter_vhpt(pte, va);
1288	}
1289}
1290
1291/*
1292 * If a pte contains a valid mapping, clear it and update the VHPT.
1293 */
1294static void
1295pmap_clear_pte(struct ia64_lpte *pte, vm_offset_t va)
1296{
1297	if (pte->pte_p) {
1298		pmap_remove_vhpt(va);
1299		ia64_ptc_g(va, PAGE_SHIFT << 2);
1300		pte->pte_p = 0;
1301	}
1302}
1303
1304/*
1305 * Remove the (possibly managed) mapping represented by pte from the
1306 * given pmap.
1307 */
1308static int
1309pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1310		pv_entry_t pv, int freepte)
1311{
1312	int error;
1313	vm_page_t m;
1314
1315	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1316		("removing pte for non-current pmap"));
1317
1318	/*
1319	 * First remove from the VHPT.
1320	 */
1321	error = pmap_remove_vhpt(va);
1322	if (error)
1323		return error;
1324
1325	/*
1326	 * Make sure pmap_set_pte() knows it isn't in the VHPT.
1327	 */
1328	pte->pte_p = 0;
1329
1330	if (pte->pte_ig & PTE_IG_WIRED)
1331		pmap->pm_stats.wired_count -= 1;
1332
1333	pmap->pm_stats.resident_count -= 1;
1334	if (pte->pte_ig & PTE_IG_MANAGED) {
1335		m = PHYS_TO_VM_PAGE(pmap_pte_pa(pte));
1336		if (pte->pte_d)
1337			if (pmap_track_modified(va))
1338				vm_page_dirty(m);
1339		if (pte->pte_a)
1340			vm_page_flag_set(m, PG_REFERENCED);
1341
1342		if (freepte)
1343			pmap_free_pte(pte, va);
1344		return pmap_remove_entry(pmap, m, va, pv);
1345	} else {
1346		if (freepte)
1347			pmap_free_pte(pte, va);
1348		return 0;
1349	}
1350}
1351
1352/*
1353 * Add a list of wired pages to the kva
1354 * this routine is only used for temporary
1355 * kernel mappings that do not need to have
1356 * page modification or references recorded.
1357 * Note that old mappings are simply written
1358 * over.  The page *must* be wired.
1359 */
1360void
1361pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1362{
1363	int i;
1364	struct ia64_lpte *pte;
1365
1366	for (i = 0; i < count; i++) {
1367		vm_offset_t tva = va + i * PAGE_SIZE;
1368		int wasvalid;
1369		pte = pmap_find_kpte(tva);
1370		wasvalid = pte->pte_p;
1371		pmap_set_pte(pte, tva, VM_PAGE_TO_PHYS(m[i]),
1372			     0, PTE_PL_KERN, PTE_AR_RWX);
1373		if (wasvalid)
1374			ia64_ptc_g(tva, PAGE_SHIFT << 2);
1375	}
1376}
1377
1378/*
1379 * this routine jerks page mappings from the
1380 * kernel -- it is meant only for temporary mappings.
1381 */
1382void
1383pmap_qremove(vm_offset_t va, int count)
1384{
1385	int i;
1386	struct ia64_lpte *pte;
1387
1388	for (i = 0; i < count; i++) {
1389		pte = pmap_find_kpte(va);
1390		pmap_clear_pte(pte, va);
1391		va += PAGE_SIZE;
1392	}
1393}
1394
1395/*
1396 * Add a wired page to the kva.
1397 */
1398void
1399pmap_kenter(vm_offset_t va, vm_offset_t pa)
1400{
1401	struct ia64_lpte *pte;
1402	int wasvalid;
1403
1404	pte = pmap_find_kpte(va);
1405	wasvalid = pte->pte_p;
1406	pmap_set_pte(pte, va, pa, 0, PTE_PL_KERN, PTE_AR_RWX);
1407	if (wasvalid)
1408		ia64_ptc_g(va, PAGE_SHIFT << 2);
1409}
1410
1411/*
1412 * Remove a page from the kva
1413 */
1414void
1415pmap_kremove(vm_offset_t va)
1416{
1417	struct ia64_lpte *pte;
1418
1419	pte = pmap_find_kpte(va);
1420	pmap_clear_pte(pte, va);
1421}
1422
1423/*
1424 *	Used to map a range of physical addresses into kernel
1425 *	virtual address space.
1426 *
1427 *	The value passed in '*virt' is a suggested virtual address for
1428 *	the mapping. Architectures which can support a direct-mapped
1429 *	physical to virtual region can return the appropriate address
1430 *	within that region, leaving '*virt' unchanged. Other
1431 *	architectures should map the pages starting at '*virt' and
1432 *	update '*virt' with the first usable address after the mapped
1433 *	region.
1434 */
1435vm_offset_t
1436pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1437{
1438	return IA64_PHYS_TO_RR7(start);
1439}
1440
1441/*
1442 * This routine is very drastic, but can save the system
1443 * in a pinch.
1444 */
1445void
1446pmap_collect()
1447{
1448	int i;
1449	vm_page_t m;
1450	static int warningdone = 0;
1451
1452	if (pmap_pagedaemon_waken == 0)
1453		return;
1454
1455	if (warningdone < 5) {
1456		printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
1457		warningdone++;
1458	}
1459
1460	for(i = 0; i < vm_page_array_size; i++) {
1461		m = &vm_page_array[i];
1462		if (m->wire_count || m->hold_count || m->busy ||
1463		    (m->flags & (PG_BUSY | PG_UNMANAGED)))
1464			continue;
1465		pmap_remove_all(m);
1466	}
1467	pmap_pagedaemon_waken = 0;
1468}
1469
1470/*
1471 * Remove a single page from a process address space
1472 */
1473static void
1474pmap_remove_page(pmap_t pmap, vm_offset_t va)
1475{
1476	struct ia64_lpte *pte;
1477
1478	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1479		("removing page for non-current pmap"));
1480
1481	pte = pmap_find_vhpt(va);
1482	if (pte) {
1483		pmap_remove_pte(pmap, pte, va, 0, 1);
1484		pmap_invalidate_page(pmap, va);
1485	}
1486	return;
1487}
1488
1489/*
1490 *	Remove the given range of addresses from the specified map.
1491 *
1492 *	It is assumed that the start and end are properly
1493 *	rounded to the page size.
1494 */
1495void
1496pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1497{
1498	pmap_t oldpmap;
1499	vm_offset_t va;
1500	pv_entry_t pv;
1501	struct ia64_lpte *pte;
1502
1503	if (pmap == NULL)
1504		return;
1505
1506	if (pmap->pm_stats.resident_count == 0)
1507		return;
1508
1509	oldpmap = pmap_install(pmap);
1510
1511	/*
1512	 * special handling of removing one page.  a very
1513	 * common operation and easy to short circuit some
1514	 * code.
1515	 */
1516	if (sva + PAGE_SIZE == eva) {
1517		pmap_remove_page(pmap, sva);
1518		pmap_install(oldpmap);
1519		return;
1520	}
1521
1522	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1523		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1524			va = pv->pv_va;
1525			if (va >= sva && va < eva) {
1526				pte = pmap_find_vhpt(va);
1527				pmap_remove_pte(pmap, pte, va, pv, 1);
1528				pmap_invalidate_page(pmap, va);
1529			}
1530		}
1531
1532	} else {
1533		for (va = sva; va < eva; va = va += PAGE_SIZE) {
1534			pte = pmap_find_vhpt(va);
1535			if (pte) {
1536				pmap_remove_pte(pmap, pte, va, 0, 1);
1537				pmap_invalidate_page(pmap, va);
1538			}
1539		}
1540	}
1541
1542	pmap_install(oldpmap);
1543}
1544
1545/*
1546 *	Routine:	pmap_remove_all
1547 *	Function:
1548 *		Removes this physical page from
1549 *		all physical maps in which it resides.
1550 *		Reflects back modify bits to the pager.
1551 *
1552 *	Notes:
1553 *		Original versions of this routine were very
1554 *		inefficient because they iteratively called
1555 *		pmap_remove (slow...)
1556 */
1557
1558static void
1559pmap_remove_all(vm_page_t m)
1560{
1561	pmap_t oldpmap;
1562	pv_entry_t pv;
1563	int nmodify;
1564	int s;
1565
1566	nmodify = 0;
1567#if defined(PMAP_DIAGNOSTIC)
1568	/*
1569	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1570	 * pages!
1571	 */
1572	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1573		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1574	}
1575#endif
1576
1577	s = splvm();
1578
1579	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1580		struct ia64_lpte *pte;
1581		pmap_t pmap = pv->pv_pmap;
1582		vm_offset_t va = pv->pv_va;
1583
1584		oldpmap = pmap_install(pmap);
1585		pte = pmap_find_vhpt(va);
1586		if (pmap_pte_pa(pte) != VM_PAGE_TO_PHYS(m))
1587			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1588		pmap_remove_pte(pmap, pte, va, pv, 1);
1589		pmap_invalidate_page(pmap, va);
1590		pmap_install(oldpmap);
1591	}
1592
1593	vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
1594
1595	splx(s);
1596	return;
1597}
1598
1599/*
1600 *	Set the physical protection on the
1601 *	specified range of this map as requested.
1602 */
1603void
1604pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1605{
1606	pmap_t oldpmap;
1607	struct ia64_lpte *pte;
1608	int newprot;
1609
1610	if (pmap == NULL)
1611		return;
1612
1613	oldpmap = pmap_install(pmap);
1614
1615	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1616		pmap_remove(pmap, sva, eva);
1617		pmap_install(oldpmap);
1618		return;
1619	}
1620
1621	if (prot & VM_PROT_WRITE) {
1622		pmap_install(oldpmap);
1623		return;
1624	}
1625
1626	newprot = pte_prot(pmap, prot);
1627
1628	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1629		panic("pmap_protect: unaligned addresses");
1630
1631	while (sva < eva) {
1632		/*
1633		 * If page is invalid, skip this page
1634		 */
1635		pte = pmap_find_vhpt(sva);
1636		if (!pte) {
1637			sva += PAGE_SIZE;
1638			continue;
1639		}
1640
1641		if (pmap_pte_prot(pte) != newprot) {
1642			if (pte->pte_ig & PTE_IG_MANAGED) {
1643				vm_offset_t pa = pmap_pte_pa(pte);
1644				vm_page_t m = PHYS_TO_VM_PAGE(pa);
1645				if (pte->pte_d) {
1646					if (pmap_track_modified(sva))
1647						vm_page_dirty(m);
1648					pte->pte_d = 0;
1649				}
1650				if (pte->pte_a) {
1651					vm_page_flag_set(m, PG_REFERENCED);
1652					pte->pte_a = 0;
1653				}
1654			}
1655			pmap_pte_set_prot(pte, newprot);
1656			pmap_update_vhpt(pte, sva);
1657			pmap_invalidate_page(pmap, sva);
1658		}
1659
1660		sva += PAGE_SIZE;
1661	}
1662	pmap_install(oldpmap);
1663}
1664
1665/*
1666 *	Insert the given physical page (p) at
1667 *	the specified virtual address (v) in the
1668 *	target physical map with the protection requested.
1669 *
1670 *	If specified, the page will be wired down, meaning
1671 *	that the related pte can not be reclaimed.
1672 *
1673 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1674 *	or lose information.  That is, this routine must actually
1675 *	insert this page into the given map NOW.
1676 */
1677void
1678pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1679	   boolean_t wired)
1680{
1681	pmap_t oldpmap;
1682	vm_offset_t pa;
1683	vm_offset_t opa;
1684	struct ia64_lpte origpte;
1685	struct ia64_lpte *pte;
1686	int managed;
1687
1688	if (pmap == NULL)
1689		return;
1690
1691	pmap_ensure_rid(pmap, va);
1692
1693	oldpmap = pmap_install(pmap);
1694
1695	va &= ~PAGE_MASK;
1696#ifdef PMAP_DIAGNOSTIC
1697	if (va > VM_MAX_KERNEL_ADDRESS)
1698		panic("pmap_enter: toobig");
1699#endif
1700
1701	/*
1702	 * Find (or create) a pte for the given mapping.
1703	 */
1704	pte = pmap_find_pte(va);
1705	origpte = *pte;
1706
1707	if (origpte.pte_p)
1708		opa = pmap_pte_pa(&origpte);
1709	else
1710		opa = 0;
1711	managed = 0;
1712
1713	pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK;
1714
1715	/*
1716	 * Mapping has not changed, must be protection or wiring change.
1717	 */
1718	if (origpte.pte_p && (opa == pa)) {
1719		/*
1720		 * Wiring change, just update stats. We don't worry about
1721		 * wiring PT pages as they remain resident as long as there
1722		 * are valid mappings in them. Hence, if a user page is wired,
1723		 * the PT page will be also.
1724		 */
1725		if (wired && ((origpte.pte_ig & PTE_IG_WIRED) == 0))
1726			pmap->pm_stats.wired_count++;
1727		else if (!wired && (origpte.pte_ig & PTE_IG_WIRED))
1728			pmap->pm_stats.wired_count--;
1729
1730		/*
1731		 * We might be turning off write access to the page,
1732		 * so we go ahead and sense modify status.
1733		 */
1734		if (origpte.pte_ig & PTE_IG_MANAGED) {
1735			if (origpte.pte_d && pmap_track_modified(va)) {
1736				vm_page_t om;
1737				om = PHYS_TO_VM_PAGE(opa);
1738				vm_page_dirty(om);
1739			}
1740		}
1741
1742		managed = origpte.pte_ig & PTE_IG_MANAGED;
1743		goto validate;
1744	}
1745	/*
1746	 * Mapping has changed, invalidate old range and fall
1747	 * through to handle validating new mapping.
1748	 */
1749	if (opa) {
1750		int error;
1751		error = pmap_remove_pte(pmap, pte, va, 0, 0);
1752		if (error)
1753			panic("pmap_enter: pte vanished, va: 0x%lx", va);
1754	}
1755
1756	/*
1757	 * Enter on the PV list if part of our managed memory.
1758	 */
1759	if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
1760		pmap_insert_entry(pmap, va, m);
1761		managed |= PTE_IG_MANAGED;
1762	}
1763
1764	/*
1765	 * Increment counters
1766	 */
1767	pmap->pm_stats.resident_count++;
1768	if (wired)
1769		pmap->pm_stats.wired_count++;
1770
1771validate:
1772
1773	/*
1774	 * Now validate mapping with desired protection/wiring. This
1775	 * adds the pte to the VHPT if necessary.
1776	 */
1777	pmap_set_pte(pte, va, pa, managed | (wired ? PTE_IG_WIRED : 0),
1778		     pte_prot_pl(pmap, prot), pte_prot_ar(pmap, prot));
1779
1780	/*
1781	 * if the mapping or permission bits are different, we need
1782	 * to invalidate the page.
1783	 */
1784	if (!pmap_equal_pte(&origpte, pte))
1785		pmap_invalidate_page(pmap, va);
1786
1787	pmap_install(oldpmap);
1788}
1789
1790/*
1791 * this code makes some *MAJOR* assumptions:
1792 * 1. Current pmap & pmap exists.
1793 * 2. Not wired.
1794 * 3. Read access.
1795 * 4. No page table pages.
1796 * 5. Tlbflush is deferred to calling procedure.
1797 * 6. Page IS managed.
1798 * but is *MUCH* faster than pmap_enter...
1799 */
1800
1801static void
1802pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
1803{
1804	struct ia64_lpte *pte;
1805	pmap_t oldpmap;
1806
1807	pmap_ensure_rid(pmap, va);
1808
1809	oldpmap = pmap_install(pmap);
1810
1811	pte = pmap_find_pte(va);
1812	if (pte->pte_p)
1813		return;
1814
1815	/*
1816	 * Enter on the PV list since its part of our managed memory.
1817	 */
1818	pmap_insert_entry(pmap, va, m);
1819
1820	/*
1821	 * Increment counters
1822	 */
1823	pmap->pm_stats.resident_count++;
1824
1825	/*
1826	 * Initialise PTE with read-only protection and enter into VHPT.
1827	 */
1828	pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m),
1829		     PTE_IG_MANAGED,
1830		     PTE_PL_USER, PTE_AR_R);
1831
1832	pmap_install(oldpmap);
1833}
1834
1835/*
1836 * Make temporary mapping for a physical address. This is called
1837 * during dump.
1838 */
1839void *
1840pmap_kenter_temporary(vm_offset_t pa, int i)
1841{
1842	return (void *) IA64_PHYS_TO_RR7(pa - (i * PAGE_SIZE));
1843}
1844
1845#define MAX_INIT_PT (96)
1846/*
1847 * pmap_object_init_pt preloads the ptes for a given object
1848 * into the specified pmap.  This eliminates the blast of soft
1849 * faults on process startup and immediately after an mmap.
1850 */
1851void
1852pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1853		    vm_object_t object, vm_pindex_t pindex,
1854		    vm_size_t size, int limit)
1855{
1856	pmap_t oldpmap;
1857	vm_offset_t tmpidx;
1858	int psize;
1859	vm_page_t p;
1860	int objpgs;
1861
1862	if (pmap == NULL || object == NULL)
1863		return;
1864
1865	oldpmap = pmap_install(pmap);
1866
1867	psize = ia64_btop(size);
1868
1869	if ((object->type != OBJT_VNODE) ||
1870		((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
1871			(object->resident_page_count > MAX_INIT_PT))) {
1872		pmap_install(oldpmap);
1873		return;
1874	}
1875
1876	if (psize + pindex > object->size) {
1877		if (object->size < pindex)
1878			return;
1879		psize = object->size - pindex;
1880	}
1881
1882	/*
1883	 * if we are processing a major portion of the object, then scan the
1884	 * entire thing.
1885	 */
1886	if (psize > (object->resident_page_count >> 2)) {
1887		objpgs = psize;
1888
1889		for (p = TAILQ_FIRST(&object->memq);
1890		    ((objpgs > 0) && (p != NULL));
1891		    p = TAILQ_NEXT(p, listq)) {
1892
1893			tmpidx = p->pindex;
1894			if (tmpidx < pindex) {
1895				continue;
1896			}
1897			tmpidx -= pindex;
1898			if (tmpidx >= psize) {
1899				continue;
1900			}
1901			/*
1902			 * don't allow an madvise to blow away our really
1903			 * free pages allocating pv entries.
1904			 */
1905			if ((limit & MAP_PREFAULT_MADVISE) &&
1906			    cnt.v_free_count < cnt.v_free_reserved) {
1907				break;
1908			}
1909			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1910				(p->busy == 0) &&
1911			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1912				if ((p->queue - p->pc) == PQ_CACHE)
1913					vm_page_deactivate(p);
1914				vm_page_busy(p);
1915				pmap_enter_quick(pmap,
1916						 addr + ia64_ptob(tmpidx), p);
1917				vm_page_flag_set(p, PG_MAPPED);
1918				vm_page_wakeup(p);
1919			}
1920			objpgs -= 1;
1921		}
1922	} else {
1923		/*
1924		 * else lookup the pages one-by-one.
1925		 */
1926		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
1927			/*
1928			 * don't allow an madvise to blow away our really
1929			 * free pages allocating pv entries.
1930			 */
1931			if ((limit & MAP_PREFAULT_MADVISE) &&
1932			    cnt.v_free_count < cnt.v_free_reserved) {
1933				break;
1934			}
1935			p = vm_page_lookup(object, tmpidx + pindex);
1936			if (p &&
1937			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1938				(p->busy == 0) &&
1939			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1940				if ((p->queue - p->pc) == PQ_CACHE)
1941					vm_page_deactivate(p);
1942				vm_page_busy(p);
1943				pmap_enter_quick(pmap,
1944						 addr + ia64_ptob(tmpidx), p);
1945				vm_page_flag_set(p, PG_MAPPED);
1946				vm_page_wakeup(p);
1947			}
1948		}
1949	}
1950	pmap_install(oldpmap);
1951	return;
1952}
1953
1954/*
1955 * pmap_prefault provides a quick way of clustering
1956 * pagefaults into a processes address space.  It is a "cousin"
1957 * of pmap_object_init_pt, except it runs at page fault time instead
1958 * of mmap time.
1959 */
1960#define PFBAK 4
1961#define PFFOR 4
1962#define PAGEORDER_SIZE (PFBAK+PFFOR)
1963
1964static int pmap_prefault_pageorder[] = {
1965	-PAGE_SIZE, PAGE_SIZE,
1966	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
1967	-3 * PAGE_SIZE, 3 * PAGE_SIZE
1968	-4 * PAGE_SIZE, 4 * PAGE_SIZE
1969};
1970
1971void
1972pmap_prefault(pmap, addra, entry)
1973	pmap_t pmap;
1974	vm_offset_t addra;
1975	vm_map_entry_t entry;
1976{
1977	int i;
1978	vm_offset_t starta;
1979	vm_offset_t addr;
1980	vm_pindex_t pindex;
1981	vm_page_t m, mpte;
1982	vm_object_t object;
1983
1984	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)))
1985		return;
1986
1987	object = entry->object.vm_object;
1988
1989	starta = addra - PFBAK * PAGE_SIZE;
1990	if (starta < entry->start) {
1991		starta = entry->start;
1992	} else if (starta > addra) {
1993		starta = 0;
1994	}
1995
1996	mpte = NULL;
1997	for (i = 0; i < PAGEORDER_SIZE; i++) {
1998		vm_object_t lobject;
1999		struct ia64_lpte *pte;
2000
2001		addr = addra + pmap_prefault_pageorder[i];
2002		if (addr > addra + (PFFOR * PAGE_SIZE))
2003			addr = 0;
2004
2005		if (addr < starta || addr >= entry->end)
2006			continue;
2007
2008		pte = pmap_find_vhpt(addr);
2009		if (pte && pte->pte_p)
2010			continue;
2011
2012		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
2013		lobject = object;
2014		for (m = vm_page_lookup(lobject, pindex);
2015		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
2016		    lobject = lobject->backing_object) {
2017			if (lobject->backing_object_offset & PAGE_MASK)
2018				break;
2019			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
2020			m = vm_page_lookup(lobject->backing_object, pindex);
2021		}
2022
2023		/*
2024		 * give-up when a page is not in memory
2025		 */
2026		if (m == NULL)
2027			break;
2028
2029		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2030			(m->busy == 0) &&
2031		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2032
2033			if ((m->queue - m->pc) == PQ_CACHE) {
2034				vm_page_deactivate(m);
2035			}
2036			vm_page_busy(m);
2037			pmap_enter_quick(pmap, addr, m);
2038			vm_page_flag_set(m, PG_MAPPED);
2039			vm_page_wakeup(m);
2040		}
2041	}
2042}
2043
2044/*
2045 *	Routine:	pmap_change_wiring
2046 *	Function:	Change the wiring attribute for a map/virtual-address
2047 *			pair.
2048 *	In/out conditions:
2049 *			The mapping must already exist in the pmap.
2050 */
2051void
2052pmap_change_wiring(pmap, va, wired)
2053	register pmap_t pmap;
2054	vm_offset_t va;
2055	boolean_t wired;
2056{
2057	pmap_t oldpmap;
2058	struct ia64_lpte *pte;
2059
2060	if (pmap == NULL)
2061		return;
2062
2063	oldpmap = pmap_install(pmap);
2064
2065	pte = pmap_find_vhpt(va);
2066
2067	if (wired && !pmap_pte_w(pte))
2068		pmap->pm_stats.wired_count++;
2069	else if (!wired && pmap_pte_w(pte))
2070		pmap->pm_stats.wired_count--;
2071
2072	/*
2073	 * Wiring is not a hardware characteristic so there is no need to
2074	 * invalidate TLB.
2075	 */
2076	pmap_pte_set_w(pte, wired);
2077
2078	pmap_install(oldpmap);
2079}
2080
2081
2082
2083/*
2084 *	Copy the range specified by src_addr/len
2085 *	from the source map to the range dst_addr/len
2086 *	in the destination map.
2087 *
2088 *	This routine is only advisory and need not do anything.
2089 */
2090
2091void
2092pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2093	  vm_offset_t src_addr)
2094{
2095}
2096
2097/*
2098 *	Routine:	pmap_kernel
2099 *	Function:
2100 *		Returns the physical map handle for the kernel.
2101 */
2102pmap_t
2103pmap_kernel()
2104{
2105	return (kernel_pmap);
2106}
2107
2108/*
2109 *	pmap_zero_page zeros the specified hardware page by
2110 *	mapping it into virtual memory and using bzero to clear
2111 *	its contents.
2112 */
2113
2114void
2115pmap_zero_page(vm_offset_t pa)
2116{
2117	vm_offset_t va = IA64_PHYS_TO_RR7(pa);
2118	bzero((caddr_t) va, PAGE_SIZE);
2119}
2120
2121
2122/*
2123 *	pmap_zero_page_area zeros the specified hardware page by
2124 *	mapping it into virtual memory and using bzero to clear
2125 *	its contents.
2126 *
2127 *	off and size must reside within a single page.
2128 */
2129
2130void
2131pmap_zero_page_area(vm_offset_t pa, int off, int size)
2132{
2133	vm_offset_t va = IA64_PHYS_TO_RR7(pa);
2134	bzero((char *)(caddr_t)va + off, size);
2135}
2136
2137/*
2138 *	pmap_copy_page copies the specified (machine independent)
2139 *	page by mapping the page into virtual memory and using
2140 *	bcopy to copy the page, one machine dependent page at a
2141 *	time.
2142 */
2143void
2144pmap_copy_page(vm_offset_t src, vm_offset_t dst)
2145{
2146	src = IA64_PHYS_TO_RR7(src);
2147	dst = IA64_PHYS_TO_RR7(dst);
2148	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
2149}
2150
2151
2152/*
2153 *	Routine:	pmap_pageable
2154 *	Function:
2155 *		Make the specified pages (by pmap, offset)
2156 *		pageable (or not) as requested.
2157 *
2158 *		A page which is not pageable may not take
2159 *		a fault; therefore, its page table entry
2160 *		must remain valid for the duration.
2161 *
2162 *		This routine is merely advisory; pmap_enter
2163 *		will specify that these pages are to be wired
2164 *		down (or not) as appropriate.
2165 */
2166void
2167pmap_pageable(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
2168	      boolean_t pageable)
2169{
2170}
2171
2172/*
2173 * Returns true if the pmap's pv is one of the first
2174 * 16 pvs linked to from this page.  This count may
2175 * be changed upwards or downwards in the future; it
2176 * is only necessary that true be returned for a small
2177 * subset of pmaps for proper page aging.
2178 */
2179boolean_t
2180pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2181{
2182	pv_entry_t pv;
2183	int loops = 0;
2184	int s;
2185
2186	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2187		return FALSE;
2188
2189	s = splvm();
2190
2191	/*
2192	 * Not found, check current mappings returning immediately if found.
2193	 */
2194	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2195		if (pv->pv_pmap == pmap) {
2196			splx(s);
2197			return TRUE;
2198		}
2199		loops++;
2200		if (loops >= 16)
2201			break;
2202	}
2203	splx(s);
2204	return (FALSE);
2205}
2206
2207#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2208/*
2209 * Remove all pages from specified address space
2210 * this aids process exit speeds.  Also, this code
2211 * is special cased for current process only, but
2212 * can have the more generic (and slightly slower)
2213 * mode enabled.  This is much faster than pmap_remove
2214 * in the case of running down an entire address space.
2215 */
2216void
2217pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2218{
2219	pv_entry_t pv, npv;
2220	int s;
2221
2222#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2223	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
2224		printf("warning: pmap_remove_pages called with non-current pmap\n");
2225		return;
2226	}
2227#endif
2228
2229	s = splvm();
2230	for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
2231		pv;
2232		pv = npv) {
2233		struct ia64_lpte *pte;
2234
2235		npv = TAILQ_NEXT(pv, pv_plist);
2236
2237		if (pv->pv_va >= eva || pv->pv_va < sva) {
2238			continue;
2239		}
2240
2241		pte = pmap_find_vhpt(pv->pv_va);
2242		if (!pte)
2243			panic("pmap_remove_pages: page on pm_pvlist has no pte\n");
2244
2245
2246/*
2247 * We cannot remove wired pages from a process' mapping at this time
2248 */
2249		if (pte->pte_ig & PTE_IG_WIRED) {
2250			continue;
2251		}
2252
2253		pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
2254	}
2255	splx(s);
2256
2257	pmap_invalidate_all(pmap);
2258}
2259
2260/*
2261 *      pmap_page_protect:
2262 *
2263 *      Lower the permission for all mappings to a given page.
2264 */
2265void
2266pmap_page_protect(vm_page_t m, vm_prot_t prot)
2267{
2268	pv_entry_t pv;
2269
2270	if ((prot & VM_PROT_WRITE) != 0)
2271		return;
2272	if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
2273		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2274			int newprot = pte_prot(pv->pv_pmap, prot);
2275			pmap_t oldpmap = pmap_install(pv->pv_pmap);
2276			struct ia64_lpte *pte;
2277			pte = pmap_find_vhpt(pv->pv_va);
2278			pmap_pte_set_prot(pte, newprot);
2279			pmap_update_vhpt(pte, pv->pv_va);
2280			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2281			pmap_install(oldpmap);
2282		}
2283	} else {
2284		pmap_remove_all(m);
2285	}
2286}
2287
2288vm_offset_t
2289pmap_phys_address(int ppn)
2290{
2291	return (ia64_ptob(ppn));
2292}
2293
2294/*
2295 *	pmap_ts_referenced:
2296 *
2297 *	Return a count of reference bits for a page, clearing those bits.
2298 *	It is not necessary for every reference bit to be cleared, but it
2299 *	is necessary that 0 only be returned when there are truly no
2300 *	reference bits set.
2301 *
2302 *	XXX: The exact number of bits to check and clear is a matter that
2303 *	should be tested and standardized at some point in the future for
2304 *	optimal aging of shared pages.
2305 */
2306int
2307pmap_ts_referenced(vm_page_t m)
2308{
2309	pv_entry_t pv;
2310	int count = 0;
2311
2312	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2313		return 0;
2314
2315	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2316		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2317		struct ia64_lpte *pte;
2318		pte = pmap_find_vhpt(pv->pv_va);
2319		if (pte->pte_a) {
2320			count++;
2321			pte->pte_a = 0;
2322			pmap_update_vhpt(pte, pv->pv_va);
2323			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2324		}
2325		pmap_install(oldpmap);
2326	}
2327
2328	return count;
2329}
2330
2331#if 0
2332/*
2333 *	pmap_is_referenced:
2334 *
2335 *	Return whether or not the specified physical page was referenced
2336 *	in any physical maps.
2337 */
2338static boolean_t
2339pmap_is_referenced(vm_page_t m)
2340{
2341	pv_entry_t pv;
2342
2343	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2344		return FALSE;
2345
2346	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2347		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2348		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2349		pmap_install(oldpmap);
2350		if (pte->pte_a)
2351			return 1;
2352	}
2353
2354	return 0;
2355}
2356#endif
2357
2358/*
2359 *	pmap_is_modified:
2360 *
2361 *	Return whether or not the specified physical page was modified
2362 *	in any physical maps.
2363 */
2364boolean_t
2365pmap_is_modified(vm_page_t m)
2366{
2367	pv_entry_t pv;
2368
2369	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2370		return FALSE;
2371
2372	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2373		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2374		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2375		pmap_install(oldpmap);
2376		if (pte->pte_d)
2377			return 1;
2378	}
2379
2380	return 0;
2381}
2382
2383/*
2384 *	Clear the modify bits on the specified physical page.
2385 */
2386void
2387pmap_clear_modify(vm_page_t m)
2388{
2389	pv_entry_t pv;
2390
2391	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2392		return;
2393
2394	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2395		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2396		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2397		if (pte->pte_d) {
2398			pte->pte_d = 0;
2399			pmap_update_vhpt(pte, pv->pv_va);
2400			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2401		}
2402		pmap_install(oldpmap);
2403	}
2404}
2405
2406/*
2407 *	pmap_clear_reference:
2408 *
2409 *	Clear the reference bit on the specified physical page.
2410 */
2411void
2412pmap_clear_reference(vm_page_t m)
2413{
2414	pv_entry_t pv;
2415
2416	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2417		return;
2418
2419	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2420		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2421		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2422		if (pte->pte_a) {
2423			pte->pte_a = 0;
2424			pmap_update_vhpt(pte, pv->pv_va);
2425			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2426		}
2427		pmap_install(oldpmap);
2428	}
2429}
2430
2431/*
2432 * Miscellaneous support routines follow
2433 */
2434
2435static void
2436ia64_protection_init()
2437{
2438	int prot, *kp, *up;
2439
2440	kp = protection_codes[0];
2441	up = protection_codes[1];
2442
2443	for (prot = 0; prot < 8; prot++) {
2444		switch (prot) {
2445		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
2446			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2447			*up++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2448			break;
2449
2450		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
2451			*kp++ = (PTE_AR_X_RX << 2) | PTE_PL_KERN;
2452			*up++ = (PTE_AR_X_RX << 2) | PTE_PL_USER;
2453			break;
2454
2455		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
2456			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2457			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2458			break;
2459
2460		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
2461			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2462			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2463			break;
2464
2465		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
2466			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2467			*up++ = (PTE_AR_R << 2) | PTE_PL_USER;
2468			break;
2469
2470		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
2471			*kp++ = (PTE_AR_RX << 2) | PTE_PL_KERN;
2472			*up++ = (PTE_AR_RX << 2) | PTE_PL_USER;
2473			break;
2474
2475		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
2476			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2477			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2478			break;
2479
2480		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
2481			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2482			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2483			break;
2484		}
2485	}
2486}
2487
2488/*
2489 * Map a set of physical memory pages into the kernel virtual
2490 * address space. Return a pointer to where it is mapped. This
2491 * routine is intended to be used for mapping device memory,
2492 * NOT real memory.
2493 */
2494void *
2495pmap_mapdev(vm_offset_t pa, vm_size_t size)
2496{
2497	return (void*) IA64_PHYS_TO_RR6(pa);
2498}
2499
2500/*
2501 * 'Unmap' a range mapped by pmap_mapdev().
2502 */
2503void
2504pmap_unmapdev(vm_offset_t va, vm_size_t size)
2505{
2506	return;
2507}
2508
2509/*
2510 * perform the pmap work for mincore
2511 */
2512int
2513pmap_mincore(pmap_t pmap, vm_offset_t addr)
2514{
2515	pmap_t oldpmap;
2516	struct ia64_lpte *pte;
2517	int val = 0;
2518
2519	oldpmap = pmap_install(pmap);
2520	pte = pmap_find_vhpt(addr);
2521	pmap_install(oldpmap);
2522
2523	if (!pte)
2524		return 0;
2525
2526	if (pmap_pte_v(pte)) {
2527		vm_page_t m;
2528		vm_offset_t pa;
2529
2530		val = MINCORE_INCORE;
2531		if ((pte->pte_ig & PTE_IG_MANAGED) == 0)
2532			return val;
2533
2534		pa = pmap_pte_pa(pte);
2535
2536		m = PHYS_TO_VM_PAGE(pa);
2537
2538		/*
2539		 * Modified by us
2540		 */
2541		if (pte->pte_d)
2542			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2543		/*
2544		 * Modified by someone
2545		 */
2546		else if (pmap_is_modified(m))
2547			val |= MINCORE_MODIFIED_OTHER;
2548		/*
2549		 * Referenced by us
2550		 */
2551		if (pte->pte_a)
2552			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2553
2554		/*
2555		 * Referenced by someone
2556		 */
2557		else if (pmap_ts_referenced(m)) {
2558			val |= MINCORE_REFERENCED_OTHER;
2559			vm_page_flag_set(m, PG_REFERENCED);
2560		}
2561	}
2562	return val;
2563}
2564
2565void
2566pmap_activate(struct thread *td)
2567{
2568	pmap_install(vmspace_pmap(td->td_proc->p_vmspace));
2569}
2570
2571pmap_t
2572pmap_install(pmap_t pmap)
2573{
2574	pmap_t oldpmap;
2575	int i;
2576
2577	critical_enter();
2578
2579	oldpmap = PCPU_GET(current_pmap);
2580
2581	if (pmap == oldpmap || pmap == kernel_pmap) {
2582		critical_exit();
2583		return pmap;
2584	}
2585
2586	if (oldpmap) {
2587		atomic_clear_32(&pmap->pm_active, PCPU_GET(cpumask));
2588	}
2589
2590	PCPU_SET(current_pmap, pmap);
2591	if (!pmap) {
2592		/*
2593		 * RIDs 0..4 have no mappings to make sure we generate
2594		 * page faults on accesses.
2595		 */
2596		ia64_set_rr(IA64_RR_BASE(0), (0 << 8)|(PAGE_SHIFT << 2)|1);
2597		ia64_set_rr(IA64_RR_BASE(1), (1 << 8)|(PAGE_SHIFT << 2)|1);
2598		ia64_set_rr(IA64_RR_BASE(2), (2 << 8)|(PAGE_SHIFT << 2)|1);
2599		ia64_set_rr(IA64_RR_BASE(3), (3 << 8)|(PAGE_SHIFT << 2)|1);
2600		ia64_set_rr(IA64_RR_BASE(4), (4 << 8)|(PAGE_SHIFT << 2)|1);
2601		critical_exit();
2602		return oldpmap;
2603	}
2604
2605	atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask));
2606
2607	for (i = 0; i < 5; i++)
2608		ia64_set_rr(IA64_RR_BASE(i),
2609			    (pmap->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2610
2611	critical_exit();
2612	return oldpmap;
2613}
2614
2615vm_offset_t
2616pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2617{
2618
2619	return addr;
2620}
2621
2622#include "opt_ddb.h"
2623
2624#ifdef DDB
2625
2626#include <ddb/ddb.h>
2627
2628static const char*	psnames[] = {
2629	"1B",	"2B",	"4B",	"8B",
2630	"16B",	"32B",	"64B",	"128B",
2631	"256B",	"512B",	"1K",	"2K",
2632	"4K",	"8K",	"16K",	"32K",
2633	"64K",	"128K",	"256K",	"512K",
2634	"1M",	"2M",	"4M",	"8M",
2635	"16M",	"32M",	"64M",	"128M",
2636	"256M",	"512M",	"1G",	"2G"
2637};
2638
2639static void
2640print_trs(int type)
2641{
2642	struct ia64_pal_result	res;
2643	int			i, maxtr;
2644	struct {
2645		struct ia64_pte	pte;
2646		struct ia64_itir itir;
2647		struct ia64_ifa ifa;
2648		struct ia64_rr	rr;
2649	}			buf;
2650	static const char*	manames[] = {
2651		"WB",	"bad",	"bad",	"bad",
2652		"UC",	"UCE",	"WC",	"NaT",
2653
2654	};
2655
2656	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2657	if (res.pal_status != 0) {
2658		db_printf("Can't get VM summary\n");
2659		return;
2660	}
2661
2662	if (type == 0)
2663		maxtr = (res.pal_result[0] >> 40) & 0xff;
2664	else
2665		maxtr = (res.pal_result[0] >> 32) & 0xff;
2666
2667	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2668	for (i = 0; i <= maxtr; i++) {
2669		bzero(&buf, sizeof(buf));
2670		res = ia64_call_pal_stacked_physical
2671			(PAL_VM_TR_READ, i, type, ia64_tpa((u_int64_t) &buf));
2672		if (!(res.pal_result[0] & 1))
2673			buf.pte.pte_ar = 0;
2674		if (!(res.pal_result[0] & 2))
2675			buf.pte.pte_pl = 0;
2676		if (!(res.pal_result[0] & 4))
2677			buf.pte.pte_d = 0;
2678		if (!(res.pal_result[0] & 8))
2679			buf.pte.pte_ma = 0;
2680		db_printf(
2681			"%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s %d %06x\n",
2682			buf.ifa.ifa_ig & 1,
2683			buf.rr.rr_rid,
2684			buf.ifa.ifa_vpn,
2685			buf.pte.pte_ppn,
2686			psnames[buf.itir.itir_ps],
2687			buf.pte.pte_ed,
2688			buf.pte.pte_ar,
2689			buf.pte.pte_pl,
2690			buf.pte.pte_d,
2691			buf.pte.pte_a,
2692			manames[buf.pte.pte_ma],
2693			buf.pte.pte_p,
2694			buf.itir.itir_key);
2695	}
2696}
2697
2698DB_COMMAND(itr, db_itr)
2699{
2700	print_trs(0);
2701}
2702
2703DB_COMMAND(dtr, db_dtr)
2704{
2705	print_trs(1);
2706}
2707
2708DB_COMMAND(rr, db_rr)
2709{
2710	int i;
2711	u_int64_t t;
2712	struct ia64_rr rr;
2713
2714	printf("RR RID    PgSz VE\n");
2715	for (i = 0; i < 8; i++) {
2716		__asm __volatile ("mov %0=rr[%1]"
2717				  : "=r"(t)
2718				  : "r"(IA64_RR_BASE(i)));
2719		*(u_int64_t *) &rr = t;
2720		printf("%d  %06x %4s %d\n",
2721		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2722	}
2723}
2724
2725DB_COMMAND(thash, db_thash)
2726{
2727	if (!have_addr)
2728		return;
2729
2730	db_printf("%p\n", (void *) ia64_thash(addr));
2731}
2732
2733DB_COMMAND(ttag, db_ttag)
2734{
2735	if (!have_addr)
2736		return;
2737
2738	db_printf("0x%lx\n", ia64_ttag(addr));
2739}
2740
2741#endif
2742