pmap.c revision 100000
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 * $FreeBSD: head/sys/ia64/ia64/pmap.c 100000 2002-07-14 23:31:04Z alc $
47 */
48
49/*
50 *	Manages physical address maps.
51 *
52 *	In addition to hardware address maps, this
53 *	module is called upon to provide software-use-only
54 *	maps which may or may not be stored in the same
55 *	form as hardware maps.  These pseudo-maps are
56 *	used to store intermediate results from copy
57 *	operations to and from address spaces.
58 *
59 *	Since the information managed by this module is
60 *	also stored by the logical address mapping module,
61 *	this module may throw away valid virtual-to-physical
62 *	mappings at almost any time.  However, invalidations
63 *	of virtual-to-physical mappings must be done as
64 *	requested.
65 *
66 *	In order to cope with hardware architectures which
67 *	make virtual-to-physical map invalidates expensive,
68 *	this module may delay invalidate or reduced protection
69 *	operations until such time as they are actually
70 *	necessary.  This module is given full information as
71 *	to which processors are currently using which maps,
72 *	and to when physical maps must be made correct.
73 */
74
75/*
76 * Following the Linux model, region IDs are allocated in groups of
77 * eight so that a single region ID can be used for as many RRs as we
78 * want by encoding the RR number into the low bits of the ID.
79 *
80 * We reserve region ID 0 for the kernel and allocate the remaining
81 * IDs for user pmaps.
82 *
83 * Region 0..4
84 *	User virtually mapped
85 *
86 * Region 5
87 *	Kernel virtually mapped
88 *
89 * Region 6
90 *	Kernel physically mapped uncacheable
91 *
92 * Region 7
93 *	Kernel physically mapped cacheable
94 */
95
96#include <sys/param.h>
97#include <sys/kernel.h>
98#include <sys/lock.h>
99#include <sys/malloc.h>
100#include <sys/mman.h>
101#include <sys/msgbuf.h>
102#include <sys/mutex.h>
103#include <sys/proc.h>
104#include <sys/sx.h>
105#include <sys/systm.h>
106#include <sys/vmmeter.h>
107#include <sys/smp.h>
108#include <sys/sysctl.h>
109
110#include <vm/vm.h>
111#include <vm/vm_param.h>
112#include <vm/vm_kern.h>
113#include <vm/vm_page.h>
114#include <vm/vm_map.h>
115#include <vm/vm_object.h>
116#include <vm/vm_extern.h>
117#include <vm/vm_pageout.h>
118#include <vm/vm_pager.h>
119#include <vm/uma.h>
120
121#include <sys/user.h>
122
123#include <machine/pal.h>
124#include <machine/md_var.h>
125
126MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
127
128#ifndef PMAP_SHPGPERPROC
129#define PMAP_SHPGPERPROC 200
130#endif
131
132#if defined(DIAGNOSTIC)
133#define PMAP_DIAGNOSTIC
134#endif
135
136#define MINPV 2048
137
138#if 0
139#define PMAP_DIAGNOSTIC
140#define PMAP_DEBUG
141#endif
142
143#if !defined(PMAP_DIAGNOSTIC)
144#define PMAP_INLINE __inline
145#else
146#define PMAP_INLINE
147#endif
148
149/*
150 * Get PDEs and PTEs for user/kernel address space
151 */
152#define pmap_pte_w(pte)		((pte)->pte_ig & PTE_IG_WIRED)
153#define pmap_pte_managed(pte)	((pte)->pte_ig & PTE_IG_MANAGED)
154#define pmap_pte_v(pte)		((pte)->pte_p)
155#define pmap_pte_pa(pte)	(((pte)->pte_ppn) << 12)
156#define pmap_pte_prot(pte)	(((pte)->pte_ar << 2) | (pte)->pte_pl)
157
158#define pmap_pte_set_w(pte, v) ((v)?((pte)->pte_ig |= PTE_IG_WIRED) \
159				:((pte)->pte_ig &= ~PTE_IG_WIRED))
160#define pmap_pte_set_prot(pte, v) do {		\
161    (pte)->pte_ar = v >> 2;			\
162    (pte)->pte_pl = v & 3;			\
163} while (0)
164
165/*
166 * Given a map and a machine independent protection code,
167 * convert to an ia64 protection code.
168 */
169#define pte_prot(m, p)		(protection_codes[m == kernel_pmap ? 0 : 1][p])
170#define pte_prot_pl(m, p)	(pte_prot(m, p) & 3)
171#define pte_prot_ar(m, p)	(pte_prot(m, p) >> 2)
172int	protection_codes[2][8];
173
174/*
175 * Return non-zero if this pmap is currently active
176 */
177#define pmap_isactive(pmap)	(pmap->pm_active)
178
179/*
180 * Statically allocated kernel pmap
181 */
182struct pmap kernel_pmap_store;
183
184vm_offset_t avail_start;	/* PA of first available physical page */
185vm_offset_t avail_end;		/* PA of last available physical page */
186vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
187vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
188static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
189
190vm_offset_t vhpt_base, vhpt_size;
191
192/*
193 * We use an object to own the kernel's 'page tables'. For simplicity,
194 * we use one page directory to index a set of pages containing
195 * ia64_lptes. This gives us up to 2Gb of kernel virtual space.
196 */
197static vm_object_t kptobj;
198static int nkpt;
199static struct ia64_lpte **kptdir;
200#define KPTE_DIR_INDEX(va) \
201	((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
202#define KPTE_PTE_INDEX(va) \
203	((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
204#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
205
206vm_offset_t kernel_vm_end;
207
208/*
209 * Values for ptc.e. XXX values for SKI.
210 */
211static u_int64_t pmap_ptc_e_base = 0x100000000;
212static u_int64_t pmap_ptc_e_count1 = 3;
213static u_int64_t pmap_ptc_e_count2 = 2;
214static u_int64_t pmap_ptc_e_stride1 = 0x2000;
215static u_int64_t pmap_ptc_e_stride2 = 0x100000000;
216
217/*
218 * Data for the RID allocator
219 */
220static u_int64_t *pmap_ridbusy;
221static int pmap_ridmax, pmap_ridcount;
222struct mtx pmap_ridmutex;
223
224/*
225 * Data for the pv entry allocation mechanism
226 */
227static uma_zone_t pvzone;
228static struct vm_object pvzone_obj;
229static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
230static int pmap_pagedaemon_waken = 0;
231static struct pv_entry *pvbootentries;
232static int pvbootnext, pvbootmax;
233
234/*
235 * Data for allocating PTEs for user processes.
236 */
237static uma_zone_t ptezone;
238static struct vm_object ptezone_obj;
239#if 0
240static struct ia64_lpte *pteinit;
241#endif
242
243/*
244 * VHPT instrumentation.
245 */
246static int pmap_vhpt_inserts;
247static int pmap_vhpt_collisions;
248static int pmap_vhpt_resident;
249SYSCTL_DECL(_vm_stats);
250SYSCTL_NODE(_vm_stats, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
251SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
252	   &pmap_vhpt_inserts, 0, "");
253SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, collisions, CTLFLAG_RD,
254	   &pmap_vhpt_collisions, 0, "");
255SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, resident, CTLFLAG_RD,
256	   &pmap_vhpt_resident, 0, "");
257
258static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
259static pv_entry_t get_pv_entry(void);
260static void	ia64_protection_init(void);
261
262static void	pmap_invalidate_all(pmap_t pmap);
263static void	pmap_remove_all(vm_page_t m);
264static void	pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m);
265static void	*pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
266
267vm_offset_t
268pmap_steal_memory(vm_size_t size)
269{
270	vm_size_t bank_size;
271	vm_offset_t pa, va;
272
273	size = round_page(size);
274
275	bank_size = phys_avail[1] - phys_avail[0];
276	while (size > bank_size) {
277		int i;
278		for (i = 0; phys_avail[i+2]; i+= 2) {
279			phys_avail[i] = phys_avail[i+2];
280			phys_avail[i+1] = phys_avail[i+3];
281		}
282		phys_avail[i] = 0;
283		phys_avail[i+1] = 0;
284		if (!phys_avail[0])
285			panic("pmap_steal_memory: out of memory");
286		bank_size = phys_avail[1] - phys_avail[0];
287	}
288
289	pa = phys_avail[0];
290	phys_avail[0] += size;
291
292	va = IA64_PHYS_TO_RR7(pa);
293	bzero((caddr_t) va, size);
294	return va;
295}
296
297/*
298 *	Bootstrap the system enough to run with virtual memory.
299 */
300void
301pmap_bootstrap()
302{
303	int i, j, count, ridbits;
304	struct ia64_pal_result res;
305
306	/*
307	 * Query the PAL Code to find the loop parameters for the
308	 * ptc.e instruction.
309	 */
310	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
311	if (res.pal_status != 0)
312		panic("Can't configure ptc.e parameters");
313	pmap_ptc_e_base = res.pal_result[0];
314	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
315	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
316	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
317	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
318	if (bootverbose)
319		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
320		       "stride1=0x%lx, stride2=0x%lx\n",
321		       pmap_ptc_e_base,
322		       pmap_ptc_e_count1,
323		       pmap_ptc_e_count2,
324		       pmap_ptc_e_stride1,
325		       pmap_ptc_e_stride2);
326
327	/*
328	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
329	 */
330	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
331	if (res.pal_status != 0) {
332		if (bootverbose)
333			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
334		ridbits = 18; /* guaranteed minimum */
335	} else {
336		ridbits = (res.pal_result[1] >> 8) & 0xff;
337		if (bootverbose)
338			printf("Processor supports %d Region ID bits\n",
339			       ridbits);
340	}
341	pmap_ridmax = (1 << ridbits);
342	pmap_ridcount = 8;
343	pmap_ridbusy = (u_int64_t *)
344		pmap_steal_memory(pmap_ridmax / 8);
345	bzero(pmap_ridbusy, pmap_ridmax / 8);
346	pmap_ridbusy[0] |= 0xff;
347	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
348
349	/*
350	 * Allocate some memory for initial kernel 'page tables'.
351	 */
352	kptdir = (struct ia64_lpte **) pmap_steal_memory(PAGE_SIZE);
353	for (i = 0; i < NKPT; i++) {
354		kptdir[i] = (struct ia64_lpte *) pmap_steal_memory(PAGE_SIZE);
355	}
356	nkpt = NKPT;
357
358	avail_start = phys_avail[0];
359	for (i = 0; phys_avail[i+2]; i+= 2) ;
360	avail_end = phys_avail[i+1];
361	count = i+2;
362
363	/*
364	 * Figure out a useful size for the VHPT, based on the size of
365	 * physical memory and try to locate a region which is large
366	 * enough to contain the VHPT (which must be a power of two in
367	 * size and aligned to a natural boundary).
368	 */
369	vhpt_size = 15;
370	while ((1<<vhpt_size) < ia64_btop(avail_end - avail_start) * 32)
371		vhpt_size++;
372
373	vhpt_base = 0;
374	while (!vhpt_base) {
375		vm_offset_t mask;
376		if (bootverbose)
377			printf("Trying VHPT size 0x%lx\n", (1L<<vhpt_size));
378		mask = (1L << vhpt_size) - 1;
379		for (i = 0; i < count; i += 2) {
380			vm_offset_t base, limit;
381			base = (phys_avail[i] + mask) & ~mask;
382			limit = base + (1L << vhpt_size);
383			if (limit <= phys_avail[i+1])
384				/*
385				 * VHPT can fit in this region
386				 */
387				break;
388		}
389		if (!phys_avail[i]) {
390			/*
391			 * Can't fit, try next smaller size.
392			 */
393			vhpt_size--;
394		} else {
395			vhpt_base = (phys_avail[i] + mask) & ~mask;
396		}
397	}
398	if (vhpt_size < 15)
399		panic("Can't find space for VHPT");
400
401	if (bootverbose)
402		printf("Putting VHPT at %p\n", (void *) vhpt_base);
403	if (vhpt_base != phys_avail[i]) {
404		/*
405		 * Split this region.
406		 */
407		if (bootverbose)
408			printf("Splitting [%p-%p]\n",
409			       (void *) phys_avail[i],
410			       (void *) phys_avail[i+1]);
411		for (j = count; j > i; j -= 2) {
412			phys_avail[j] = phys_avail[j-2];
413			phys_avail[j+1] = phys_avail[j-2+1];
414		}
415		phys_avail[count+2] = 0;
416		phys_avail[count+3] = 0;
417		phys_avail[i+1] = vhpt_base;
418		phys_avail[i+2] = vhpt_base + (1L << vhpt_size);
419	} else {
420		phys_avail[i] = vhpt_base + (1L << vhpt_size);
421	}
422
423	vhpt_base = IA64_PHYS_TO_RR7(vhpt_base);
424	bzero((void *) vhpt_base, (1L << vhpt_size));
425	__asm __volatile("mov cr.pta=%0;; srlz.i;;"
426			 :: "r" (vhpt_base + (1<<8) + (vhpt_size<<2) + 1));
427
428	virtual_avail = IA64_RR_BASE(5);
429	virtual_end = IA64_RR_BASE(6)-1;
430
431	/*
432	 * Initialize protection array.
433	 */
434	ia64_protection_init();
435
436	/*
437	 * Initialize the kernel pmap (which is statically allocated).
438	 */
439	for (i = 0; i < 5; i++)
440		kernel_pmap->pm_rid[i] = 0;
441	kernel_pmap->pm_active = 1;
442	TAILQ_INIT(&kernel_pmap->pm_pvlist);
443	PCPU_SET(current_pmap, kernel_pmap);
444
445	/*
446	 * Region 5 is mapped via the vhpt.
447	 */
448	ia64_set_rr(IA64_RR_BASE(5),
449		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
450
451	/*
452	 * Region 6 is direct mapped UC and region 7 is direct mapped
453	 * WC. The details of this is controlled by the Alt {I,D}TLB
454	 * handlers. Here we just make sure that they have the largest
455	 * possible page size to minimise TLB usage.
456	 */
457	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (28 << 2));
458	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (28 << 2));
459
460	/*
461	 * Set up proc0's PCB.
462	 */
463#if 0
464	thread0.td_pcb->pcb_hw.apcb_asn = 0;
465#endif
466
467	/*
468	 * Reserve some memory for allocating pvs while bootstrapping
469	 * the pv allocator. We need to have enough to cover mapping
470	 * the kmem_alloc region used to allocate the initial_pvs in
471	 * pmap_init. In general, the size of this region is
472	 * approximately (# physical pages) * (size of pv entry).
473	 */
474	pvbootmax = ((physmem * sizeof(struct pv_entry)) >> PAGE_SHIFT) + 128;
475	pvbootentries = (struct pv_entry *)
476		pmap_steal_memory(pvbootmax * sizeof(struct pv_entry));
477	pvbootnext = 0;
478
479	/*
480	 * Clear out any random TLB entries left over from booting.
481	 */
482	pmap_invalidate_all(kernel_pmap);
483}
484
485static void *
486pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
487{
488	*flags = UMA_SLAB_PRIV;
489	return (void *)IA64_PHYS_TO_RR7(ia64_tpa(kmem_alloc(kernel_map, bytes)));
490}
491
492/*
493 *	Initialize the pmap module.
494 *	Called by vm_init, to initialize any structures that the pmap
495 *	system needs to map virtual memory.
496 *	pmap_init has been enhanced to support in a fairly consistant
497 *	way, discontiguous physical memory.
498 */
499void
500pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
501{
502	int i;
503	int initial_pvs;
504
505	/*
506	 * Allocate memory for random pmap data structures.  Includes the
507	 * pv_head_table.
508	 */
509
510	for(i = 0; i < vm_page_array_size; i++) {
511		vm_page_t m;
512
513		m = &vm_page_array[i];
514		TAILQ_INIT(&m->md.pv_list);
515		m->md.pv_list_count = 0;
516 	}
517
518	/*
519	 * Init the pv free list and the PTE free list.
520	 */
521	initial_pvs = vm_page_array_size;
522	if (initial_pvs < MINPV)
523		initial_pvs = MINPV;
524	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry),
525	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
526	uma_zone_set_allocf(pvzone, pmap_allocf);
527	uma_prealloc(pvzone, initial_pvs);
528
529	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
530	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
531	uma_zone_set_allocf(ptezone, pmap_allocf);
532	uma_prealloc(ptezone, initial_pvs);
533
534	/*
535	 * Create the object for the kernel's page tables.
536	 */
537	kptobj = vm_object_allocate(OBJT_DEFAULT, MAXKPT);
538
539	/*
540	 * Now it is safe to enable pv_table recording.
541	 */
542	pmap_initialized = TRUE;
543}
544
545/*
546 * Initialize the address space (zone) for the pv_entries.  Set a
547 * high water mark so that the system can recover from excessive
548 * numbers of pv entries.
549 */
550void
551pmap_init2()
552{
553	int shpgperproc = PMAP_SHPGPERPROC;
554
555	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
556	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
557	pv_entry_high_water = 9 * (pv_entry_max / 10);
558	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
559	uma_zone_set_obj(ptezone, &ptezone_obj, pv_entry_max);
560}
561
562
563/***************************************************
564 * Manipulate TLBs for a pmap
565 ***************************************************/
566
567static void
568pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
569{
570	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
571		("invalidating TLB for non-current pmap"));
572	ia64_ptc_g(va, PAGE_SHIFT << 2);
573}
574
575static void
576pmap_invalidate_all_1(void *arg)
577{
578	u_int64_t addr;
579	int i, j;
580	register_t psr;
581
582	psr = intr_disable();
583	addr = pmap_ptc_e_base;
584	for (i = 0; i < pmap_ptc_e_count1; i++) {
585		for (j = 0; j < pmap_ptc_e_count2; j++) {
586			ia64_ptc_e(addr);
587			addr += pmap_ptc_e_stride2;
588		}
589		addr += pmap_ptc_e_stride1;
590	}
591	intr_restore(psr);
592}
593
594static void
595pmap_invalidate_all(pmap_t pmap)
596{
597	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
598		("invalidating TLB for non-current pmap"));
599
600
601#ifdef SMP
602	smp_rendezvous(0, pmap_invalidate_all_1, 0, 0);
603#else
604	pmap_invalidate_all_1(0);
605#endif
606}
607
608static u_int32_t
609pmap_allocate_rid(void)
610{
611	int rid;
612
613	if (pmap_ridcount == pmap_ridmax)
614		panic("pmap_allocate_rid: All Region IDs used");
615
616	do {
617		rid = arc4random() & (pmap_ridmax - 1);
618	} while (pmap_ridbusy[rid / 64] & (1L << (rid & 63)));
619	pmap_ridbusy[rid / 64] |= (1L << (rid & 63));
620	pmap_ridcount++;
621
622	return rid;
623}
624
625static void
626pmap_free_rid(u_int32_t rid)
627{
628	mtx_lock(&pmap_ridmutex);
629	pmap_ridbusy[rid / 64] &= ~(1L << (rid & 63));
630	pmap_ridcount--;
631	mtx_unlock(&pmap_ridmutex);
632}
633
634static void
635pmap_ensure_rid(pmap_t pmap, vm_offset_t va)
636{
637	int rr;
638
639	rr = va >> 61;
640
641	/*
642	 * We get called for virtual addresses that may just as well be
643	 * kernel addresses (ie region 5, 6 or 7). Since the pm_rid field
644	 * only holds region IDs for user regions, we have to make sure
645	 * the region is within bounds.
646	 */
647	if (rr >= 5)
648		return;
649
650	if (pmap->pm_rid[rr])
651		return;
652
653	mtx_lock(&pmap_ridmutex);
654	pmap->pm_rid[rr] = pmap_allocate_rid();
655	if (pmap == PCPU_GET(current_pmap))
656		ia64_set_rr(IA64_RR_BASE(rr),
657			    (pmap->pm_rid[rr] << 8)|(PAGE_SHIFT << 2)|1);
658	mtx_unlock(&pmap_ridmutex);
659}
660
661/***************************************************
662 * Low level helper routines.....
663 ***************************************************/
664
665/*
666 * Install a pte into the VHPT
667 */
668static PMAP_INLINE void
669pmap_install_pte(struct ia64_lpte *vhpte, struct ia64_lpte *pte)
670{
671	u_int64_t *vhp, *p;
672
673	/* invalidate the pte */
674	atomic_set_64(&vhpte->pte_tag, 1L << 63);
675	ia64_mf();			/* make sure everyone sees */
676
677	vhp = (u_int64_t *) vhpte;
678	p = (u_int64_t *) pte;
679
680	vhp[0] = p[0];
681	vhp[1] = p[1];
682	vhp[2] = p[2];			/* sets ti to one */
683
684	ia64_mf();
685}
686
687/*
688 * Compare essential parts of pte.
689 */
690static PMAP_INLINE int
691pmap_equal_pte(struct ia64_lpte *pte1, struct ia64_lpte *pte2)
692{
693	return *(u_int64_t *) pte1 == *(u_int64_t *) pte2;
694}
695
696/*
697 * this routine defines the region(s) of memory that should
698 * not be tested for the modified bit.
699 */
700static PMAP_INLINE int
701pmap_track_modified(vm_offset_t va)
702{
703	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
704		return 1;
705	else
706		return 0;
707}
708
709/*
710 * Create the KSTACK for a new thread.
711 * This routine directly affects the fork perf for a process/thread.
712 */
713void
714pmap_new_thread(struct thread *td)
715{
716	vm_offset_t *ks;
717
718	/*
719	 * Use contigmalloc for user area so that we can use a region
720	 * 7 address for it which makes it impossible to accidentally
721	 * lose when recording a trapframe.
722	 */
723	ks = contigmalloc(KSTACK_PAGES * PAGE_SIZE, M_PMAP,
724			  M_WAITOK,
725			  0ul,
726			  256*1024*1024 - 1,
727			  PAGE_SIZE,
728			  256*1024*1024);
729
730	if (ks == NULL)
731		panic("pmap_new_thread: could not contigmalloc %d pages\n",
732		    KSTACK_PAGES);
733	td->td_md.md_kstackvirt = ks;
734	td->td_kstack = IA64_PHYS_TO_RR7(ia64_tpa((u_int64_t)ks));
735}
736
737/*
738 * Dispose the KSTACK for a thread that has exited.
739 * This routine directly impacts the exit perf of a process/thread.
740 */
741void
742pmap_dispose_thread(struct thread *td)
743{
744	contigfree(td->td_md.md_kstackvirt, KSTACK_PAGES * PAGE_SIZE, M_PMAP);
745	td->td_md.md_kstackvirt = 0;
746	td->td_kstack = 0;
747}
748
749/*
750 * Allow the KSTACK for a thread to be prejudicially paged out.
751 */
752void
753pmap_swapout_thread(struct thread *td)
754{
755}
756
757/*
758 * Bring the KSTACK for a specified thread back in.
759 */
760void
761pmap_swapin_thread(struct thread *td)
762{
763}
764
765/***************************************************
766 * Page table page management routines.....
767 ***************************************************/
768
769void
770pmap_pinit0(struct pmap *pmap)
771{
772	/* kernel_pmap is the same as any other pmap. */
773	pmap_pinit(pmap);
774}
775
776/*
777 * Initialize a preallocated and zeroed pmap structure,
778 * such as one in a vmspace structure.
779 */
780void
781pmap_pinit(struct pmap *pmap)
782{
783	int i;
784
785	pmap->pm_flags = 0;
786	for (i = 0; i < 5; i++)
787		pmap->pm_rid[i] = 0;
788	pmap->pm_ptphint = NULL;
789	pmap->pm_active = 0;
790	TAILQ_INIT(&pmap->pm_pvlist);
791	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
792}
793
794/*
795 * Wire in kernel global address entries.  To avoid a race condition
796 * between pmap initialization and pmap_growkernel, this procedure
797 * should be called after the vmspace is attached to the process
798 * but before this pmap is activated.
799 */
800void
801pmap_pinit2(struct pmap *pmap)
802{
803}
804
805/***************************************************
806* Pmap allocation/deallocation routines.
807 ***************************************************/
808
809/*
810 * Release any resources held by the given physical map.
811 * Called when a pmap initialized by pmap_pinit is being released.
812 * Should only be called if the map contains no valid mappings.
813 */
814void
815pmap_release(pmap_t pmap)
816{
817	int i;
818
819	for (i = 0; i < 5; i++)
820		if (pmap->pm_rid[i])
821			pmap_free_rid(pmap->pm_rid[i]);
822}
823
824/*
825 * grow the number of kernel page table entries, if needed
826 */
827void
828pmap_growkernel(vm_offset_t addr)
829{
830	struct ia64_lpte *ptepage;
831	vm_page_t nkpg;
832
833	if (kernel_vm_end == 0) {
834		kernel_vm_end = nkpt * PAGE_SIZE * NKPTEPG
835			+ IA64_RR_BASE(5);
836	}
837	addr = (addr + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
838	while (kernel_vm_end < addr) {
839		if (kptdir[KPTE_DIR_INDEX(kernel_vm_end)]) {
840			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG)
841				& ~(PAGE_SIZE * NKPTEPG - 1);
842			continue;
843		}
844
845		/*
846		 * We could handle more by increasing the size of kptdir.
847		 */
848		if (nkpt == MAXKPT)
849			panic("pmap_growkernel: out of kernel address space");
850
851		/*
852		 * This index is bogus, but out of the way
853		 */
854		nkpg = vm_page_alloc(kptobj, nkpt, VM_ALLOC_SYSTEM);
855		if (!nkpg)
856			panic("pmap_growkernel: no memory to grow kernel");
857
858		nkpt++;
859
860		vm_page_lock_queues();
861		vm_page_wire(nkpg);
862		vm_page_unlock_queues();
863		ptepage = (struct ia64_lpte *)
864			IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
865		bzero(ptepage, PAGE_SIZE);
866		kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage;
867
868		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
869	}
870}
871
872/***************************************************
873 * page management routines.
874 ***************************************************/
875
876/*
877 * free the pv_entry back to the free list
878 */
879static PMAP_INLINE void
880free_pv_entry(pv_entry_t pv)
881{
882	pv_entry_count--;
883	uma_zfree(pvzone, pv);
884}
885
886/*
887 * get a new pv_entry, allocating a block from the system
888 * when needed.
889 * the memory allocation is performed bypassing the malloc code
890 * because of the possibility of allocations at interrupt time.
891 */
892static pv_entry_t
893get_pv_entry(void)
894{
895	pv_entry_count++;
896	if (pv_entry_high_water &&
897		(pv_entry_count > pv_entry_high_water) &&
898		(pmap_pagedaemon_waken == 0)) {
899		pmap_pagedaemon_waken = 1;
900		wakeup (&vm_pages_needed);
901	}
902	return uma_zalloc(pvzone, M_WAITOK);
903}
904
905/*
906 * Add an ia64_lpte to the VHPT.
907 */
908static void
909pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
910{
911	struct ia64_lpte *vhpte;
912
913	pmap_vhpt_inserts++;
914	pmap_vhpt_resident++;
915
916	vhpte = (struct ia64_lpte *) ia64_thash(va);
917
918	if (vhpte->pte_chain)
919		pmap_vhpt_collisions++;
920
921	pte->pte_chain = vhpte->pte_chain;
922	vhpte->pte_chain = ia64_tpa((vm_offset_t) pte);
923
924	if (!vhpte->pte_p && pte->pte_p)
925		pmap_install_pte(vhpte, pte);
926	else
927		ia64_mf();
928}
929
930/*
931 * Update VHPT after a pte has changed.
932 */
933static void
934pmap_update_vhpt(struct ia64_lpte *pte, vm_offset_t va)
935{
936	struct ia64_lpte *vhpte;
937
938	vhpte = (struct ia64_lpte *) ia64_thash(va);
939
940	if ((!vhpte->pte_p || vhpte->pte_tag == pte->pte_tag)
941	    && pte->pte_p)
942		pmap_install_pte(vhpte, pte);
943}
944
945/*
946 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
947 * worked or an appropriate error code otherwise.
948 */
949static int
950pmap_remove_vhpt(vm_offset_t va)
951{
952	struct ia64_lpte *pte;
953	struct ia64_lpte *lpte;
954	struct ia64_lpte *vhpte;
955	u_int64_t tag;
956	int error = ENOENT;
957
958	vhpte = (struct ia64_lpte *) ia64_thash(va);
959
960	/*
961	 * If the VHPTE is invalid, there can't be a collision chain.
962	 */
963	if (!vhpte->pte_p) {
964		KASSERT(!vhpte->pte_chain, ("bad vhpte"));
965		printf("can't remove vhpt entry for 0x%lx\n", va);
966		goto done;
967	}
968
969	lpte = vhpte;
970	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(vhpte->pte_chain);
971	tag = ia64_ttag(va);
972
973	while (pte->pte_tag != tag) {
974		lpte = pte;
975		if (pte->pte_chain)
976			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
977		else {
978			printf("can't remove vhpt entry for 0x%lx\n", va);
979			goto done;
980		}
981	}
982
983	/*
984	 * Snip this pv_entry out of the collision chain.
985	 */
986	lpte->pte_chain = pte->pte_chain;
987
988	/*
989	 * If the VHPTE matches as well, change it to map the first
990	 * element from the chain if there is one.
991	 */
992	if (vhpte->pte_tag == tag) {
993		if (vhpte->pte_chain) {
994			pte = (struct ia64_lpte *)
995				IA64_PHYS_TO_RR7(vhpte->pte_chain);
996			pmap_install_pte(vhpte, pte);
997		} else {
998			vhpte->pte_p = 0;
999			ia64_mf();
1000		}
1001	}
1002
1003	pmap_vhpt_resident--;
1004	error = 0;
1005 done:
1006	return error;
1007}
1008
1009/*
1010 * Find the ia64_lpte for the given va, if any.
1011 */
1012static struct ia64_lpte *
1013pmap_find_vhpt(vm_offset_t va)
1014{
1015	struct ia64_lpte *pte;
1016	u_int64_t tag;
1017
1018	pte = (struct ia64_lpte *) ia64_thash(va);
1019	if (!pte->pte_chain) {
1020		pte = 0;
1021		goto done;
1022	}
1023
1024	tag = ia64_ttag(va);
1025	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1026
1027	while (pte->pte_tag != tag) {
1028		if (pte->pte_chain) {
1029			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1030		} else {
1031			pte = 0;
1032			break;
1033		}
1034	}
1035
1036 done:
1037	return pte;
1038}
1039
1040/*
1041 * Remove an entry from the list of managed mappings.
1042 */
1043static int
1044pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1045{
1046	if (!pv) {
1047		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1048			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1049				if (pmap == pv->pv_pmap && va == pv->pv_va)
1050					break;
1051			}
1052		} else {
1053			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1054				if (va == pv->pv_va)
1055					break;
1056			}
1057		}
1058	}
1059
1060	if (pv) {
1061		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1062		m->md.pv_list_count--;
1063		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1064			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
1065
1066		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1067		free_pv_entry(pv);
1068		return 0;
1069	} else {
1070		return ENOENT;
1071	}
1072}
1073
1074/*
1075 * Create a pv entry for page at pa for
1076 * (pmap, va).
1077 */
1078static void
1079pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1080{
1081	pv_entry_t pv;
1082
1083	pv = get_pv_entry();
1084	pv->pv_pmap = pmap;
1085	pv->pv_va = va;
1086
1087	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1088	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1089	m->md.pv_list_count++;
1090}
1091
1092/*
1093 *	Routine:	pmap_extract
1094 *	Function:
1095 *		Extract the physical page address associated
1096 *		with the given map/virtual_address pair.
1097 */
1098vm_offset_t
1099pmap_extract(pmap, va)
1100	register pmap_t pmap;
1101	vm_offset_t va;
1102{
1103	pmap_t oldpmap;
1104	vm_offset_t pa;
1105
1106	oldpmap = pmap_install(pmap);
1107	pa = ia64_tpa(va);
1108	pmap_install(oldpmap);
1109	return pa;
1110}
1111
1112/***************************************************
1113 * Low level mapping routines.....
1114 ***************************************************/
1115
1116/*
1117 * Find the kernel lpte for mapping the given virtual address, which
1118 * must be in the part of region 5 which we can cover with our kernel
1119 * 'page tables'.
1120 */
1121static struct ia64_lpte *
1122pmap_find_kpte(vm_offset_t va)
1123{
1124	KASSERT((va >> 61) == 5,
1125		("kernel mapping 0x%lx not in region 5", va));
1126	KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG),
1127		("kernel mapping 0x%lx out of range", va));
1128	return &kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)];
1129}
1130
1131/*
1132 * Find a pte suitable for mapping a user-space address. If one exists
1133 * in the VHPT, that one will be returned, otherwise a new pte is
1134 * allocated.
1135 */
1136static struct ia64_lpte *
1137pmap_find_pte(vm_offset_t va)
1138{
1139	struct ia64_lpte *pte;
1140
1141	if (va >= VM_MAXUSER_ADDRESS)
1142		return pmap_find_kpte(va);
1143
1144	pte = pmap_find_vhpt(va);
1145	if (!pte) {
1146		pte = uma_zalloc(ptezone, M_WAITOK);
1147		pte->pte_p = 0;
1148	}
1149	return pte;
1150}
1151
1152/*
1153 * Free a pte which is now unused. This simply returns it to the zone
1154 * allocator if it is a user mapping. For kernel mappings, clear the
1155 * valid bit to make it clear that the mapping is not currently used.
1156 */
1157static void
1158pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1159{
1160	if (va < VM_MAXUSER_ADDRESS)
1161		uma_zfree(ptezone, pte);
1162	else
1163		pte->pte_p = 0;
1164}
1165
1166/*
1167 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1168 * the pte was orginally valid, then its assumed to already be in the
1169 * VHPT.
1170 */
1171static void
1172pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1173	     int ig, int pl, int ar)
1174{
1175	int wasvalid = pte->pte_p;
1176
1177	pte->pte_p = 1;
1178	pte->pte_ma = PTE_MA_WB;
1179	if (ig & PTE_IG_MANAGED) {
1180		pte->pte_a = 0;
1181		pte->pte_d = 0;
1182	} else {
1183		pte->pte_a = 1;
1184		pte->pte_d = 1;
1185	}
1186	pte->pte_pl = pl;
1187	pte->pte_ar = ar;
1188	pte->pte_ppn = pa >> 12;
1189	pte->pte_ed = 0;
1190	pte->pte_ig = ig;
1191
1192	pte->pte_ps = PAGE_SHIFT;
1193	pte->pte_key = 0;
1194
1195	pte->pte_tag = ia64_ttag(va);
1196
1197	if (wasvalid) {
1198		pmap_update_vhpt(pte, va);
1199	} else {
1200		pmap_enter_vhpt(pte, va);
1201	}
1202}
1203
1204/*
1205 * If a pte contains a valid mapping, clear it and update the VHPT.
1206 */
1207static void
1208pmap_clear_pte(struct ia64_lpte *pte, vm_offset_t va)
1209{
1210	if (pte->pte_p) {
1211		pmap_remove_vhpt(va);
1212		ia64_ptc_g(va, PAGE_SHIFT << 2);
1213		pte->pte_p = 0;
1214	}
1215}
1216
1217/*
1218 * Remove the (possibly managed) mapping represented by pte from the
1219 * given pmap.
1220 */
1221static int
1222pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1223		pv_entry_t pv, int freepte)
1224{
1225	int error;
1226	vm_page_t m;
1227
1228	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1229		("removing pte for non-current pmap"));
1230
1231	/*
1232	 * First remove from the VHPT.
1233	 */
1234	error = pmap_remove_vhpt(va);
1235	if (error)
1236		return error;
1237
1238	/*
1239	 * Make sure pmap_set_pte() knows it isn't in the VHPT.
1240	 */
1241	pte->pte_p = 0;
1242
1243	if (pte->pte_ig & PTE_IG_WIRED)
1244		pmap->pm_stats.wired_count -= 1;
1245
1246	pmap->pm_stats.resident_count -= 1;
1247	if (pte->pte_ig & PTE_IG_MANAGED) {
1248		m = PHYS_TO_VM_PAGE(pmap_pte_pa(pte));
1249		if (pte->pte_d)
1250			if (pmap_track_modified(va))
1251				vm_page_dirty(m);
1252		if (pte->pte_a)
1253			vm_page_flag_set(m, PG_REFERENCED);
1254
1255		if (freepte)
1256			pmap_free_pte(pte, va);
1257		return pmap_remove_entry(pmap, m, va, pv);
1258	} else {
1259		if (freepte)
1260			pmap_free_pte(pte, va);
1261		return 0;
1262	}
1263}
1264
1265/*
1266 * Add a list of wired pages to the kva
1267 * this routine is only used for temporary
1268 * kernel mappings that do not need to have
1269 * page modification or references recorded.
1270 * Note that old mappings are simply written
1271 * over.  The page *must* be wired.
1272 */
1273void
1274pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1275{
1276	int i;
1277	struct ia64_lpte *pte;
1278
1279	for (i = 0; i < count; i++) {
1280		vm_offset_t tva = va + i * PAGE_SIZE;
1281		int wasvalid;
1282		pte = pmap_find_kpte(tva);
1283		wasvalid = pte->pte_p;
1284		pmap_set_pte(pte, tva, VM_PAGE_TO_PHYS(m[i]),
1285			     0, PTE_PL_KERN, PTE_AR_RWX);
1286		if (wasvalid)
1287			ia64_ptc_g(tva, PAGE_SHIFT << 2);
1288	}
1289}
1290
1291/*
1292 * this routine jerks page mappings from the
1293 * kernel -- it is meant only for temporary mappings.
1294 */
1295void
1296pmap_qremove(vm_offset_t va, int count)
1297{
1298	int i;
1299	struct ia64_lpte *pte;
1300
1301	for (i = 0; i < count; i++) {
1302		pte = pmap_find_kpte(va);
1303		pmap_clear_pte(pte, va);
1304		va += PAGE_SIZE;
1305	}
1306}
1307
1308/*
1309 * Add a wired page to the kva.
1310 */
1311void
1312pmap_kenter(vm_offset_t va, vm_offset_t pa)
1313{
1314	struct ia64_lpte *pte;
1315	int wasvalid;
1316
1317	pte = pmap_find_kpte(va);
1318	wasvalid = pte->pte_p;
1319	pmap_set_pte(pte, va, pa, 0, PTE_PL_KERN, PTE_AR_RWX);
1320	if (wasvalid)
1321		ia64_ptc_g(va, PAGE_SHIFT << 2);
1322}
1323
1324/*
1325 * Remove a page from the kva
1326 */
1327void
1328pmap_kremove(vm_offset_t va)
1329{
1330	struct ia64_lpte *pte;
1331
1332	pte = pmap_find_kpte(va);
1333	pmap_clear_pte(pte, va);
1334}
1335
1336/*
1337 *	Used to map a range of physical addresses into kernel
1338 *	virtual address space.
1339 *
1340 *	The value passed in '*virt' is a suggested virtual address for
1341 *	the mapping. Architectures which can support a direct-mapped
1342 *	physical to virtual region can return the appropriate address
1343 *	within that region, leaving '*virt' unchanged. Other
1344 *	architectures should map the pages starting at '*virt' and
1345 *	update '*virt' with the first usable address after the mapped
1346 *	region.
1347 */
1348vm_offset_t
1349pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1350{
1351	return IA64_PHYS_TO_RR7(start);
1352}
1353
1354/*
1355 * This routine is very drastic, but can save the system
1356 * in a pinch.
1357 */
1358void
1359pmap_collect()
1360{
1361	int i;
1362	vm_page_t m;
1363	static int warningdone = 0;
1364
1365	if (pmap_pagedaemon_waken == 0)
1366		return;
1367
1368	if (warningdone < 5) {
1369		printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
1370		warningdone++;
1371	}
1372
1373	for(i = 0; i < vm_page_array_size; i++) {
1374		m = &vm_page_array[i];
1375		if (m->wire_count || m->hold_count || m->busy ||
1376		    (m->flags & (PG_BUSY | PG_UNMANAGED)))
1377			continue;
1378		pmap_remove_all(m);
1379	}
1380	pmap_pagedaemon_waken = 0;
1381}
1382
1383/*
1384 * Remove a single page from a process address space
1385 */
1386static void
1387pmap_remove_page(pmap_t pmap, vm_offset_t va)
1388{
1389	struct ia64_lpte *pte;
1390
1391	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1392		("removing page for non-current pmap"));
1393
1394	pte = pmap_find_vhpt(va);
1395	if (pte) {
1396		pmap_remove_pte(pmap, pte, va, 0, 1);
1397		pmap_invalidate_page(pmap, va);
1398	}
1399	return;
1400}
1401
1402/*
1403 *	Remove the given range of addresses from the specified map.
1404 *
1405 *	It is assumed that the start and end are properly
1406 *	rounded to the page size.
1407 */
1408void
1409pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1410{
1411	pmap_t oldpmap;
1412	vm_offset_t va;
1413	pv_entry_t pv;
1414	struct ia64_lpte *pte;
1415
1416	if (pmap == NULL)
1417		return;
1418
1419	if (pmap->pm_stats.resident_count == 0)
1420		return;
1421
1422	oldpmap = pmap_install(pmap);
1423
1424	/*
1425	 * special handling of removing one page.  a very
1426	 * common operation and easy to short circuit some
1427	 * code.
1428	 */
1429	if (sva + PAGE_SIZE == eva) {
1430		pmap_remove_page(pmap, sva);
1431		pmap_install(oldpmap);
1432		return;
1433	}
1434
1435	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1436		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1437			va = pv->pv_va;
1438			if (va >= sva && va < eva) {
1439				pte = pmap_find_vhpt(va);
1440				pmap_remove_pte(pmap, pte, va, pv, 1);
1441				pmap_invalidate_page(pmap, va);
1442			}
1443		}
1444
1445	} else {
1446		for (va = sva; va < eva; va = va += PAGE_SIZE) {
1447			pte = pmap_find_vhpt(va);
1448			if (pte) {
1449				pmap_remove_pte(pmap, pte, va, 0, 1);
1450				pmap_invalidate_page(pmap, va);
1451			}
1452		}
1453	}
1454
1455	pmap_install(oldpmap);
1456}
1457
1458/*
1459 *	Routine:	pmap_remove_all
1460 *	Function:
1461 *		Removes this physical page from
1462 *		all physical maps in which it resides.
1463 *		Reflects back modify bits to the pager.
1464 *
1465 *	Notes:
1466 *		Original versions of this routine were very
1467 *		inefficient because they iteratively called
1468 *		pmap_remove (slow...)
1469 */
1470
1471static void
1472pmap_remove_all(vm_page_t m)
1473{
1474	pmap_t oldpmap;
1475	pv_entry_t pv;
1476	int nmodify;
1477	int s;
1478
1479	nmodify = 0;
1480#if defined(PMAP_DIAGNOSTIC)
1481	/*
1482	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1483	 * pages!
1484	 */
1485	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1486		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1487	}
1488#endif
1489
1490	s = splvm();
1491
1492	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1493		struct ia64_lpte *pte;
1494		pmap_t pmap = pv->pv_pmap;
1495		vm_offset_t va = pv->pv_va;
1496
1497		oldpmap = pmap_install(pmap);
1498		pte = pmap_find_vhpt(va);
1499		if (pmap_pte_pa(pte) != VM_PAGE_TO_PHYS(m))
1500			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1501		pmap_remove_pte(pmap, pte, va, pv, 1);
1502		pmap_invalidate_page(pmap, va);
1503		pmap_install(oldpmap);
1504	}
1505
1506	vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
1507
1508	splx(s);
1509	return;
1510}
1511
1512/*
1513 *	Set the physical protection on the
1514 *	specified range of this map as requested.
1515 */
1516void
1517pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1518{
1519	pmap_t oldpmap;
1520	struct ia64_lpte *pte;
1521	int newprot;
1522
1523	if (pmap == NULL)
1524		return;
1525
1526	oldpmap = pmap_install(pmap);
1527
1528	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1529		pmap_remove(pmap, sva, eva);
1530		pmap_install(oldpmap);
1531		return;
1532	}
1533
1534	if (prot & VM_PROT_WRITE) {
1535		pmap_install(oldpmap);
1536		return;
1537	}
1538
1539	newprot = pte_prot(pmap, prot);
1540
1541	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1542		panic("pmap_protect: unaligned addresses");
1543
1544	while (sva < eva) {
1545		/*
1546		 * If page is invalid, skip this page
1547		 */
1548		pte = pmap_find_vhpt(sva);
1549		if (!pte) {
1550			sva += PAGE_SIZE;
1551			continue;
1552		}
1553
1554		if (pmap_pte_prot(pte) != newprot) {
1555			if (pte->pte_ig & PTE_IG_MANAGED) {
1556				vm_offset_t pa = pmap_pte_pa(pte);
1557				vm_page_t m = PHYS_TO_VM_PAGE(pa);
1558				if (pte->pte_d) {
1559					if (pmap_track_modified(sva))
1560						vm_page_dirty(m);
1561					pte->pte_d = 0;
1562				}
1563				if (pte->pte_a) {
1564					vm_page_flag_set(m, PG_REFERENCED);
1565					pte->pte_a = 0;
1566				}
1567			}
1568			pmap_pte_set_prot(pte, newprot);
1569			pmap_update_vhpt(pte, sva);
1570			pmap_invalidate_page(pmap, sva);
1571		}
1572
1573		sva += PAGE_SIZE;
1574	}
1575	pmap_install(oldpmap);
1576}
1577
1578/*
1579 *	Insert the given physical page (p) at
1580 *	the specified virtual address (v) in the
1581 *	target physical map with the protection requested.
1582 *
1583 *	If specified, the page will be wired down, meaning
1584 *	that the related pte can not be reclaimed.
1585 *
1586 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1587 *	or lose information.  That is, this routine must actually
1588 *	insert this page into the given map NOW.
1589 */
1590void
1591pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1592	   boolean_t wired)
1593{
1594	pmap_t oldpmap;
1595	vm_offset_t pa;
1596	vm_offset_t opa;
1597	struct ia64_lpte origpte;
1598	struct ia64_lpte *pte;
1599	int managed;
1600
1601	if (pmap == NULL)
1602		return;
1603
1604	pmap_ensure_rid(pmap, va);
1605
1606	oldpmap = pmap_install(pmap);
1607
1608	va &= ~PAGE_MASK;
1609#ifdef PMAP_DIAGNOSTIC
1610	if (va > VM_MAX_KERNEL_ADDRESS)
1611		panic("pmap_enter: toobig");
1612#endif
1613
1614	/*
1615	 * Find (or create) a pte for the given mapping.
1616	 */
1617	pte = pmap_find_pte(va);
1618	origpte = *pte;
1619
1620	if (origpte.pte_p)
1621		opa = pmap_pte_pa(&origpte);
1622	else
1623		opa = 0;
1624	managed = 0;
1625
1626	pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK;
1627
1628	/*
1629	 * Mapping has not changed, must be protection or wiring change.
1630	 */
1631	if (origpte.pte_p && (opa == pa)) {
1632		/*
1633		 * Wiring change, just update stats. We don't worry about
1634		 * wiring PT pages as they remain resident as long as there
1635		 * are valid mappings in them. Hence, if a user page is wired,
1636		 * the PT page will be also.
1637		 */
1638		if (wired && ((origpte.pte_ig & PTE_IG_WIRED) == 0))
1639			pmap->pm_stats.wired_count++;
1640		else if (!wired && (origpte.pte_ig & PTE_IG_WIRED))
1641			pmap->pm_stats.wired_count--;
1642
1643		/*
1644		 * We might be turning off write access to the page,
1645		 * so we go ahead and sense modify status.
1646		 */
1647		if (origpte.pte_ig & PTE_IG_MANAGED) {
1648			if (origpte.pte_d && pmap_track_modified(va)) {
1649				vm_page_t om;
1650				om = PHYS_TO_VM_PAGE(opa);
1651				vm_page_dirty(om);
1652			}
1653		}
1654
1655		managed = origpte.pte_ig & PTE_IG_MANAGED;
1656		goto validate;
1657	}
1658	/*
1659	 * Mapping has changed, invalidate old range and fall
1660	 * through to handle validating new mapping.
1661	 */
1662	if (opa) {
1663		int error;
1664		error = pmap_remove_pte(pmap, pte, va, 0, 0);
1665		if (error)
1666			panic("pmap_enter: pte vanished, va: 0x%lx", va);
1667	}
1668
1669	/*
1670	 * Enter on the PV list if part of our managed memory.
1671	 */
1672	if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
1673		pmap_insert_entry(pmap, va, m);
1674		managed |= PTE_IG_MANAGED;
1675	}
1676
1677	/*
1678	 * Increment counters
1679	 */
1680	pmap->pm_stats.resident_count++;
1681	if (wired)
1682		pmap->pm_stats.wired_count++;
1683
1684validate:
1685
1686	/*
1687	 * Now validate mapping with desired protection/wiring. This
1688	 * adds the pte to the VHPT if necessary.
1689	 */
1690	pmap_set_pte(pte, va, pa, managed | (wired ? PTE_IG_WIRED : 0),
1691		     pte_prot_pl(pmap, prot), pte_prot_ar(pmap, prot));
1692
1693	/*
1694	 * if the mapping or permission bits are different, we need
1695	 * to invalidate the page.
1696	 */
1697	if (!pmap_equal_pte(&origpte, pte))
1698		pmap_invalidate_page(pmap, va);
1699
1700	pmap_install(oldpmap);
1701}
1702
1703/*
1704 * this code makes some *MAJOR* assumptions:
1705 * 1. Current pmap & pmap exists.
1706 * 2. Not wired.
1707 * 3. Read access.
1708 * 4. No page table pages.
1709 * 5. Tlbflush is deferred to calling procedure.
1710 * 6. Page IS managed.
1711 * but is *MUCH* faster than pmap_enter...
1712 */
1713
1714static void
1715pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
1716{
1717	struct ia64_lpte *pte;
1718	pmap_t oldpmap;
1719
1720	pmap_ensure_rid(pmap, va);
1721
1722	oldpmap = pmap_install(pmap);
1723
1724	pte = pmap_find_pte(va);
1725	if (pte->pte_p)
1726		return;
1727
1728	/*
1729	 * Enter on the PV list since its part of our managed memory.
1730	 */
1731	pmap_insert_entry(pmap, va, m);
1732
1733	/*
1734	 * Increment counters
1735	 */
1736	pmap->pm_stats.resident_count++;
1737
1738	/*
1739	 * Initialise PTE with read-only protection and enter into VHPT.
1740	 */
1741	pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m),
1742		     PTE_IG_MANAGED,
1743		     PTE_PL_USER, PTE_AR_R);
1744
1745	pmap_install(oldpmap);
1746}
1747
1748/*
1749 * Make temporary mapping for a physical address. This is called
1750 * during dump.
1751 */
1752void *
1753pmap_kenter_temporary(vm_offset_t pa, int i)
1754{
1755	return (void *) IA64_PHYS_TO_RR7(pa - (i * PAGE_SIZE));
1756}
1757
1758#define MAX_INIT_PT (96)
1759/*
1760 * pmap_object_init_pt preloads the ptes for a given object
1761 * into the specified pmap.  This eliminates the blast of soft
1762 * faults on process startup and immediately after an mmap.
1763 */
1764void
1765pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1766		    vm_object_t object, vm_pindex_t pindex,
1767		    vm_size_t size, int limit)
1768{
1769	pmap_t oldpmap;
1770	vm_offset_t tmpidx;
1771	int psize;
1772	vm_page_t p;
1773	int objpgs;
1774
1775	if (pmap == NULL || object == NULL)
1776		return;
1777
1778	oldpmap = pmap_install(pmap);
1779
1780	psize = ia64_btop(size);
1781
1782	if ((object->type != OBJT_VNODE) ||
1783		((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
1784			(object->resident_page_count > MAX_INIT_PT))) {
1785		pmap_install(oldpmap);
1786		return;
1787	}
1788
1789	if (psize + pindex > object->size) {
1790		if (object->size < pindex)
1791			return;
1792		psize = object->size - pindex;
1793	}
1794
1795	/*
1796	 * if we are processing a major portion of the object, then scan the
1797	 * entire thing.
1798	 */
1799	if (psize > (object->resident_page_count >> 2)) {
1800		objpgs = psize;
1801
1802		for (p = TAILQ_FIRST(&object->memq);
1803		    ((objpgs > 0) && (p != NULL));
1804		    p = TAILQ_NEXT(p, listq)) {
1805
1806			tmpidx = p->pindex;
1807			if (tmpidx < pindex) {
1808				continue;
1809			}
1810			tmpidx -= pindex;
1811			if (tmpidx >= psize) {
1812				continue;
1813			}
1814			/*
1815			 * don't allow an madvise to blow away our really
1816			 * free pages allocating pv entries.
1817			 */
1818			if ((limit & MAP_PREFAULT_MADVISE) &&
1819			    cnt.v_free_count < cnt.v_free_reserved) {
1820				break;
1821			}
1822			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1823				(p->busy == 0) &&
1824			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1825				if ((p->queue - p->pc) == PQ_CACHE)
1826					vm_page_deactivate(p);
1827				vm_page_busy(p);
1828				pmap_enter_quick(pmap,
1829						 addr + ia64_ptob(tmpidx), p);
1830				vm_page_flag_set(p, PG_MAPPED);
1831				vm_page_wakeup(p);
1832			}
1833			objpgs -= 1;
1834		}
1835	} else {
1836		/*
1837		 * else lookup the pages one-by-one.
1838		 */
1839		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
1840			/*
1841			 * don't allow an madvise to blow away our really
1842			 * free pages allocating pv entries.
1843			 */
1844			if ((limit & MAP_PREFAULT_MADVISE) &&
1845			    cnt.v_free_count < cnt.v_free_reserved) {
1846				break;
1847			}
1848			p = vm_page_lookup(object, tmpidx + pindex);
1849			if (p &&
1850			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1851				(p->busy == 0) &&
1852			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1853				if ((p->queue - p->pc) == PQ_CACHE)
1854					vm_page_deactivate(p);
1855				vm_page_busy(p);
1856				pmap_enter_quick(pmap,
1857						 addr + ia64_ptob(tmpidx), p);
1858				vm_page_flag_set(p, PG_MAPPED);
1859				vm_page_wakeup(p);
1860			}
1861		}
1862	}
1863	pmap_install(oldpmap);
1864	return;
1865}
1866
1867/*
1868 * pmap_prefault provides a quick way of clustering
1869 * pagefaults into a processes address space.  It is a "cousin"
1870 * of pmap_object_init_pt, except it runs at page fault time instead
1871 * of mmap time.
1872 */
1873#define PFBAK 4
1874#define PFFOR 4
1875#define PAGEORDER_SIZE (PFBAK+PFFOR)
1876
1877static int pmap_prefault_pageorder[] = {
1878	-PAGE_SIZE, PAGE_SIZE,
1879	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
1880	-3 * PAGE_SIZE, 3 * PAGE_SIZE
1881	-4 * PAGE_SIZE, 4 * PAGE_SIZE
1882};
1883
1884void
1885pmap_prefault(pmap, addra, entry)
1886	pmap_t pmap;
1887	vm_offset_t addra;
1888	vm_map_entry_t entry;
1889{
1890	int i;
1891	vm_offset_t starta;
1892	vm_offset_t addr;
1893	vm_pindex_t pindex;
1894	vm_page_t m, mpte;
1895	vm_object_t object;
1896
1897	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)))
1898		return;
1899
1900	object = entry->object.vm_object;
1901
1902	starta = addra - PFBAK * PAGE_SIZE;
1903	if (starta < entry->start) {
1904		starta = entry->start;
1905	} else if (starta > addra) {
1906		starta = 0;
1907	}
1908
1909	mpte = NULL;
1910	for (i = 0; i < PAGEORDER_SIZE; i++) {
1911		vm_object_t lobject;
1912		struct ia64_lpte *pte;
1913
1914		addr = addra + pmap_prefault_pageorder[i];
1915		if (addr > addra + (PFFOR * PAGE_SIZE))
1916			addr = 0;
1917
1918		if (addr < starta || addr >= entry->end)
1919			continue;
1920
1921		pte = pmap_find_vhpt(addr);
1922		if (pte && pte->pte_p)
1923			continue;
1924
1925		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
1926		lobject = object;
1927		for (m = vm_page_lookup(lobject, pindex);
1928		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
1929		    lobject = lobject->backing_object) {
1930			if (lobject->backing_object_offset & PAGE_MASK)
1931				break;
1932			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
1933			m = vm_page_lookup(lobject->backing_object, pindex);
1934		}
1935
1936		/*
1937		 * give-up when a page is not in memory
1938		 */
1939		if (m == NULL)
1940			break;
1941
1942		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1943			(m->busy == 0) &&
1944		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1945
1946			if ((m->queue - m->pc) == PQ_CACHE) {
1947				vm_page_deactivate(m);
1948			}
1949			vm_page_busy(m);
1950			pmap_enter_quick(pmap, addr, m);
1951			vm_page_flag_set(m, PG_MAPPED);
1952			vm_page_wakeup(m);
1953		}
1954	}
1955}
1956
1957/*
1958 *	Routine:	pmap_change_wiring
1959 *	Function:	Change the wiring attribute for a map/virtual-address
1960 *			pair.
1961 *	In/out conditions:
1962 *			The mapping must already exist in the pmap.
1963 */
1964void
1965pmap_change_wiring(pmap, va, wired)
1966	register pmap_t pmap;
1967	vm_offset_t va;
1968	boolean_t wired;
1969{
1970	pmap_t oldpmap;
1971	struct ia64_lpte *pte;
1972
1973	if (pmap == NULL)
1974		return;
1975
1976	oldpmap = pmap_install(pmap);
1977
1978	pte = pmap_find_vhpt(va);
1979
1980	if (wired && !pmap_pte_w(pte))
1981		pmap->pm_stats.wired_count++;
1982	else if (!wired && pmap_pte_w(pte))
1983		pmap->pm_stats.wired_count--;
1984
1985	/*
1986	 * Wiring is not a hardware characteristic so there is no need to
1987	 * invalidate TLB.
1988	 */
1989	pmap_pte_set_w(pte, wired);
1990
1991	pmap_install(oldpmap);
1992}
1993
1994
1995
1996/*
1997 *	Copy the range specified by src_addr/len
1998 *	from the source map to the range dst_addr/len
1999 *	in the destination map.
2000 *
2001 *	This routine is only advisory and need not do anything.
2002 */
2003
2004void
2005pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2006	  vm_offset_t src_addr)
2007{
2008}
2009
2010
2011/*
2012 *	pmap_zero_page zeros the specified hardware page by
2013 *	mapping it into virtual memory and using bzero to clear
2014 *	its contents.
2015 */
2016
2017void
2018pmap_zero_page(vm_page_t m)
2019{
2020	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2021	bzero((caddr_t) va, PAGE_SIZE);
2022}
2023
2024
2025/*
2026 *	pmap_zero_page_area zeros the specified hardware page by
2027 *	mapping it into virtual memory and using bzero to clear
2028 *	its contents.
2029 *
2030 *	off and size must reside within a single page.
2031 */
2032
2033void
2034pmap_zero_page_area(vm_page_t m, int off, int size)
2035{
2036	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2037	bzero((char *)(caddr_t)va + off, size);
2038}
2039
2040
2041/*
2042 *	pmap_zero_page_idle zeros the specified hardware page by
2043 *	mapping it into virtual memory and using bzero to clear
2044 *	its contents.  This is for the vm_idlezero process.
2045 */
2046
2047void
2048pmap_zero_page_area(vm_page_t m)
2049{
2050	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2051	bzero((caddr_t) va, PAGE_SIZE);
2052}
2053
2054
2055/*
2056/*
2057 *	pmap_copy_page copies the specified (machine independent)
2058 *	page by mapping the page into virtual memory and using
2059 *	bcopy to copy the page, one machine dependent page at a
2060 *	time.
2061 */
2062void
2063pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2064{
2065	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
2066	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
2067	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
2068}
2069
2070
2071/*
2072 *	Routine:	pmap_pageable
2073 *	Function:
2074 *		Make the specified pages (by pmap, offset)
2075 *		pageable (or not) as requested.
2076 *
2077 *		A page which is not pageable may not take
2078 *		a fault; therefore, its page table entry
2079 *		must remain valid for the duration.
2080 *
2081 *		This routine is merely advisory; pmap_enter
2082 *		will specify that these pages are to be wired
2083 *		down (or not) as appropriate.
2084 */
2085void
2086pmap_pageable(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
2087	      boolean_t pageable)
2088{
2089}
2090
2091/*
2092 * Returns true if the pmap's pv is one of the first
2093 * 16 pvs linked to from this page.  This count may
2094 * be changed upwards or downwards in the future; it
2095 * is only necessary that true be returned for a small
2096 * subset of pmaps for proper page aging.
2097 */
2098boolean_t
2099pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2100{
2101	pv_entry_t pv;
2102	int loops = 0;
2103	int s;
2104
2105	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2106		return FALSE;
2107
2108	s = splvm();
2109
2110	/*
2111	 * Not found, check current mappings returning immediately if found.
2112	 */
2113	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2114		if (pv->pv_pmap == pmap) {
2115			splx(s);
2116			return TRUE;
2117		}
2118		loops++;
2119		if (loops >= 16)
2120			break;
2121	}
2122	splx(s);
2123	return (FALSE);
2124}
2125
2126#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2127/*
2128 * Remove all pages from specified address space
2129 * this aids process exit speeds.  Also, this code
2130 * is special cased for current process only, but
2131 * can have the more generic (and slightly slower)
2132 * mode enabled.  This is much faster than pmap_remove
2133 * in the case of running down an entire address space.
2134 */
2135void
2136pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2137{
2138	pv_entry_t pv, npv;
2139	int s;
2140
2141#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2142	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
2143		printf("warning: pmap_remove_pages called with non-current pmap\n");
2144		return;
2145	}
2146#endif
2147
2148	s = splvm();
2149	for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
2150		pv;
2151		pv = npv) {
2152		struct ia64_lpte *pte;
2153
2154		npv = TAILQ_NEXT(pv, pv_plist);
2155
2156		if (pv->pv_va >= eva || pv->pv_va < sva) {
2157			continue;
2158		}
2159
2160		pte = pmap_find_vhpt(pv->pv_va);
2161		if (!pte)
2162			panic("pmap_remove_pages: page on pm_pvlist has no pte\n");
2163
2164
2165/*
2166 * We cannot remove wired pages from a process' mapping at this time
2167 */
2168		if (pte->pte_ig & PTE_IG_WIRED) {
2169			continue;
2170		}
2171
2172		pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
2173	}
2174	splx(s);
2175
2176	pmap_invalidate_all(pmap);
2177}
2178
2179/*
2180 *      pmap_page_protect:
2181 *
2182 *      Lower the permission for all mappings to a given page.
2183 */
2184void
2185pmap_page_protect(vm_page_t m, vm_prot_t prot)
2186{
2187	pv_entry_t pv;
2188
2189	if ((prot & VM_PROT_WRITE) != 0)
2190		return;
2191	if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
2192		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2193			int newprot = pte_prot(pv->pv_pmap, prot);
2194			pmap_t oldpmap = pmap_install(pv->pv_pmap);
2195			struct ia64_lpte *pte;
2196			pte = pmap_find_vhpt(pv->pv_va);
2197			pmap_pte_set_prot(pte, newprot);
2198			pmap_update_vhpt(pte, pv->pv_va);
2199			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2200			pmap_install(oldpmap);
2201		}
2202	} else {
2203		pmap_remove_all(m);
2204	}
2205}
2206
2207vm_offset_t
2208pmap_phys_address(int ppn)
2209{
2210	return (ia64_ptob(ppn));
2211}
2212
2213/*
2214 *	pmap_ts_referenced:
2215 *
2216 *	Return a count of reference bits for a page, clearing those bits.
2217 *	It is not necessary for every reference bit to be cleared, but it
2218 *	is necessary that 0 only be returned when there are truly no
2219 *	reference bits set.
2220 *
2221 *	XXX: The exact number of bits to check and clear is a matter that
2222 *	should be tested and standardized at some point in the future for
2223 *	optimal aging of shared pages.
2224 */
2225int
2226pmap_ts_referenced(vm_page_t m)
2227{
2228	pv_entry_t pv;
2229	int count = 0;
2230
2231	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2232		return 0;
2233
2234	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2235		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2236		struct ia64_lpte *pte;
2237		pte = pmap_find_vhpt(pv->pv_va);
2238		if (pte->pte_a) {
2239			count++;
2240			pte->pte_a = 0;
2241			pmap_update_vhpt(pte, pv->pv_va);
2242			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2243		}
2244		pmap_install(oldpmap);
2245	}
2246
2247	return count;
2248}
2249
2250#if 0
2251/*
2252 *	pmap_is_referenced:
2253 *
2254 *	Return whether or not the specified physical page was referenced
2255 *	in any physical maps.
2256 */
2257static boolean_t
2258pmap_is_referenced(vm_page_t m)
2259{
2260	pv_entry_t pv;
2261
2262	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2263		return FALSE;
2264
2265	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2266		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2267		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2268		pmap_install(oldpmap);
2269		if (pte->pte_a)
2270			return 1;
2271	}
2272
2273	return 0;
2274}
2275#endif
2276
2277/*
2278 *	pmap_is_modified:
2279 *
2280 *	Return whether or not the specified physical page was modified
2281 *	in any physical maps.
2282 */
2283boolean_t
2284pmap_is_modified(vm_page_t m)
2285{
2286	pv_entry_t pv;
2287
2288	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2289		return FALSE;
2290
2291	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2292		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2293		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2294		pmap_install(oldpmap);
2295		if (pte->pte_d)
2296			return 1;
2297	}
2298
2299	return 0;
2300}
2301
2302/*
2303 *	Clear the modify bits on the specified physical page.
2304 */
2305void
2306pmap_clear_modify(vm_page_t m)
2307{
2308	pv_entry_t pv;
2309
2310	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2311		return;
2312
2313	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2314		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2315		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2316		if (pte->pte_d) {
2317			pte->pte_d = 0;
2318			pmap_update_vhpt(pte, pv->pv_va);
2319			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2320		}
2321		pmap_install(oldpmap);
2322	}
2323}
2324
2325/*
2326 *	pmap_clear_reference:
2327 *
2328 *	Clear the reference bit on the specified physical page.
2329 */
2330void
2331pmap_clear_reference(vm_page_t m)
2332{
2333	pv_entry_t pv;
2334
2335	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2336		return;
2337
2338	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2339		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2340		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2341		if (pte->pte_a) {
2342			pte->pte_a = 0;
2343			pmap_update_vhpt(pte, pv->pv_va);
2344			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2345		}
2346		pmap_install(oldpmap);
2347	}
2348}
2349
2350/*
2351 * Miscellaneous support routines follow
2352 */
2353
2354static void
2355ia64_protection_init()
2356{
2357	int prot, *kp, *up;
2358
2359	kp = protection_codes[0];
2360	up = protection_codes[1];
2361
2362	for (prot = 0; prot < 8; prot++) {
2363		switch (prot) {
2364		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
2365			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2366			*up++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2367			break;
2368
2369		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
2370			*kp++ = (PTE_AR_X_RX << 2) | PTE_PL_KERN;
2371			*up++ = (PTE_AR_X_RX << 2) | PTE_PL_USER;
2372			break;
2373
2374		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
2375			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2376			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2377			break;
2378
2379		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
2380			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2381			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2382			break;
2383
2384		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
2385			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2386			*up++ = (PTE_AR_R << 2) | PTE_PL_USER;
2387			break;
2388
2389		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
2390			*kp++ = (PTE_AR_RX << 2) | PTE_PL_KERN;
2391			*up++ = (PTE_AR_RX << 2) | PTE_PL_USER;
2392			break;
2393
2394		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
2395			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2396			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2397			break;
2398
2399		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
2400			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2401			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2402			break;
2403		}
2404	}
2405}
2406
2407/*
2408 * Map a set of physical memory pages into the kernel virtual
2409 * address space. Return a pointer to where it is mapped. This
2410 * routine is intended to be used for mapping device memory,
2411 * NOT real memory.
2412 */
2413void *
2414pmap_mapdev(vm_offset_t pa, vm_size_t size)
2415{
2416	return (void*) IA64_PHYS_TO_RR6(pa);
2417}
2418
2419/*
2420 * 'Unmap' a range mapped by pmap_mapdev().
2421 */
2422void
2423pmap_unmapdev(vm_offset_t va, vm_size_t size)
2424{
2425	return;
2426}
2427
2428/*
2429 * perform the pmap work for mincore
2430 */
2431int
2432pmap_mincore(pmap_t pmap, vm_offset_t addr)
2433{
2434	pmap_t oldpmap;
2435	struct ia64_lpte *pte;
2436	int val = 0;
2437
2438	oldpmap = pmap_install(pmap);
2439	pte = pmap_find_vhpt(addr);
2440	pmap_install(oldpmap);
2441
2442	if (!pte)
2443		return 0;
2444
2445	if (pmap_pte_v(pte)) {
2446		vm_page_t m;
2447		vm_offset_t pa;
2448
2449		val = MINCORE_INCORE;
2450		if ((pte->pte_ig & PTE_IG_MANAGED) == 0)
2451			return val;
2452
2453		pa = pmap_pte_pa(pte);
2454
2455		m = PHYS_TO_VM_PAGE(pa);
2456
2457		/*
2458		 * Modified by us
2459		 */
2460		if (pte->pte_d)
2461			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2462		/*
2463		 * Modified by someone
2464		 */
2465		else if (pmap_is_modified(m))
2466			val |= MINCORE_MODIFIED_OTHER;
2467		/*
2468		 * Referenced by us
2469		 */
2470		if (pte->pte_a)
2471			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2472
2473		/*
2474		 * Referenced by someone
2475		 */
2476		else if (pmap_ts_referenced(m)) {
2477			val |= MINCORE_REFERENCED_OTHER;
2478			vm_page_flag_set(m, PG_REFERENCED);
2479		}
2480	}
2481	return val;
2482}
2483
2484void
2485pmap_activate(struct thread *td)
2486{
2487	pmap_install(vmspace_pmap(td->td_proc->p_vmspace));
2488}
2489
2490pmap_t
2491pmap_install(pmap_t pmap)
2492{
2493	pmap_t oldpmap;
2494	int i;
2495
2496	critical_enter();
2497
2498	oldpmap = PCPU_GET(current_pmap);
2499
2500	if (pmap == oldpmap || pmap == kernel_pmap) {
2501		critical_exit();
2502		return pmap;
2503	}
2504
2505	if (oldpmap) {
2506		atomic_clear_32(&pmap->pm_active, PCPU_GET(cpumask));
2507	}
2508
2509	PCPU_SET(current_pmap, pmap);
2510	if (!pmap) {
2511		/*
2512		 * RIDs 0..4 have no mappings to make sure we generate
2513		 * page faults on accesses.
2514		 */
2515		ia64_set_rr(IA64_RR_BASE(0), (0 << 8)|(PAGE_SHIFT << 2)|1);
2516		ia64_set_rr(IA64_RR_BASE(1), (1 << 8)|(PAGE_SHIFT << 2)|1);
2517		ia64_set_rr(IA64_RR_BASE(2), (2 << 8)|(PAGE_SHIFT << 2)|1);
2518		ia64_set_rr(IA64_RR_BASE(3), (3 << 8)|(PAGE_SHIFT << 2)|1);
2519		ia64_set_rr(IA64_RR_BASE(4), (4 << 8)|(PAGE_SHIFT << 2)|1);
2520		critical_exit();
2521		return oldpmap;
2522	}
2523
2524	atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask));
2525
2526	for (i = 0; i < 5; i++)
2527		ia64_set_rr(IA64_RR_BASE(i),
2528			    (pmap->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2529
2530	critical_exit();
2531	return oldpmap;
2532}
2533
2534vm_offset_t
2535pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2536{
2537
2538	return addr;
2539}
2540
2541#include "opt_ddb.h"
2542
2543#ifdef DDB
2544
2545#include <ddb/ddb.h>
2546
2547static const char*	psnames[] = {
2548	"1B",	"2B",	"4B",	"8B",
2549	"16B",	"32B",	"64B",	"128B",
2550	"256B",	"512B",	"1K",	"2K",
2551	"4K",	"8K",	"16K",	"32K",
2552	"64K",	"128K",	"256K",	"512K",
2553	"1M",	"2M",	"4M",	"8M",
2554	"16M",	"32M",	"64M",	"128M",
2555	"256M",	"512M",	"1G",	"2G"
2556};
2557
2558static void
2559print_trs(int type)
2560{
2561	struct ia64_pal_result	res;
2562	int			i, maxtr;
2563	struct {
2564		struct ia64_pte	pte;
2565		struct ia64_itir itir;
2566		struct ia64_ifa ifa;
2567		struct ia64_rr	rr;
2568	}			buf;
2569	static const char*	manames[] = {
2570		"WB",	"bad",	"bad",	"bad",
2571		"UC",	"UCE",	"WC",	"NaT",
2572
2573	};
2574
2575	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2576	if (res.pal_status != 0) {
2577		db_printf("Can't get VM summary\n");
2578		return;
2579	}
2580
2581	if (type == 0)
2582		maxtr = (res.pal_result[0] >> 40) & 0xff;
2583	else
2584		maxtr = (res.pal_result[0] >> 32) & 0xff;
2585
2586	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2587	for (i = 0; i <= maxtr; i++) {
2588		bzero(&buf, sizeof(buf));
2589		res = ia64_call_pal_stacked_physical
2590			(PAL_VM_TR_READ, i, type, ia64_tpa((u_int64_t) &buf));
2591		if (!(res.pal_result[0] & 1))
2592			buf.pte.pte_ar = 0;
2593		if (!(res.pal_result[0] & 2))
2594			buf.pte.pte_pl = 0;
2595		if (!(res.pal_result[0] & 4))
2596			buf.pte.pte_d = 0;
2597		if (!(res.pal_result[0] & 8))
2598			buf.pte.pte_ma = 0;
2599		db_printf(
2600			"%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s %d %06x\n",
2601			buf.ifa.ifa_ig & 1,
2602			buf.rr.rr_rid,
2603			buf.ifa.ifa_vpn,
2604			buf.pte.pte_ppn,
2605			psnames[buf.itir.itir_ps],
2606			buf.pte.pte_ed,
2607			buf.pte.pte_ar,
2608			buf.pte.pte_pl,
2609			buf.pte.pte_d,
2610			buf.pte.pte_a,
2611			manames[buf.pte.pte_ma],
2612			buf.pte.pte_p,
2613			buf.itir.itir_key);
2614	}
2615}
2616
2617DB_COMMAND(itr, db_itr)
2618{
2619	print_trs(0);
2620}
2621
2622DB_COMMAND(dtr, db_dtr)
2623{
2624	print_trs(1);
2625}
2626
2627DB_COMMAND(rr, db_rr)
2628{
2629	int i;
2630	u_int64_t t;
2631	struct ia64_rr rr;
2632
2633	printf("RR RID    PgSz VE\n");
2634	for (i = 0; i < 8; i++) {
2635		__asm __volatile ("mov %0=rr[%1]"
2636				  : "=r"(t)
2637				  : "r"(IA64_RR_BASE(i)));
2638		*(u_int64_t *) &rr = t;
2639		printf("%d  %06x %4s %d\n",
2640		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2641	}
2642}
2643
2644DB_COMMAND(thash, db_thash)
2645{
2646	if (!have_addr)
2647		return;
2648
2649	db_printf("%p\n", (void *) ia64_thash(addr));
2650}
2651
2652DB_COMMAND(ttag, db_ttag)
2653{
2654	if (!have_addr)
2655		return;
2656
2657	db_printf("0x%lx\n", ia64_ttag(addr));
2658}
2659
2660#endif
2661