pmap.c revision 110784
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 * $FreeBSD: head/sys/ia64/ia64/pmap.c 110784 2003-02-13 07:03:44Z alc $
47 */
48
49/*
50 *	Manages physical address maps.
51 *
52 *	In addition to hardware address maps, this
53 *	module is called upon to provide software-use-only
54 *	maps which may or may not be stored in the same
55 *	form as hardware maps.  These pseudo-maps are
56 *	used to store intermediate results from copy
57 *	operations to and from address spaces.
58 *
59 *	Since the information managed by this module is
60 *	also stored by the logical address mapping module,
61 *	this module may throw away valid virtual-to-physical
62 *	mappings at almost any time.  However, invalidations
63 *	of virtual-to-physical mappings must be done as
64 *	requested.
65 *
66 *	In order to cope with hardware architectures which
67 *	make virtual-to-physical map invalidates expensive,
68 *	this module may delay invalidate or reduced protection
69 *	operations until such time as they are actually
70 *	necessary.  This module is given full information as
71 *	to which processors are currently using which maps,
72 *	and to when physical maps must be made correct.
73 */
74
75/*
76 * Following the Linux model, region IDs are allocated in groups of
77 * eight so that a single region ID can be used for as many RRs as we
78 * want by encoding the RR number into the low bits of the ID.
79 *
80 * We reserve region ID 0 for the kernel and allocate the remaining
81 * IDs for user pmaps.
82 *
83 * Region 0..4
84 *	User virtually mapped
85 *
86 * Region 5
87 *	Kernel virtually mapped
88 *
89 * Region 6
90 *	Kernel physically mapped uncacheable
91 *
92 * Region 7
93 *	Kernel physically mapped cacheable
94 */
95
96#include <sys/param.h>
97#include <sys/kernel.h>
98#include <sys/lock.h>
99#include <sys/malloc.h>
100#include <sys/mman.h>
101#include <sys/msgbuf.h>
102#include <sys/mutex.h>
103#include <sys/proc.h>
104#include <sys/sx.h>
105#include <sys/systm.h>
106#include <sys/vmmeter.h>
107#include <sys/smp.h>
108#include <sys/sysctl.h>
109
110#include <vm/vm.h>
111#include <vm/vm_param.h>
112#include <vm/vm_kern.h>
113#include <vm/vm_page.h>
114#include <vm/vm_map.h>
115#include <vm/vm_object.h>
116#include <vm/vm_extern.h>
117#include <vm/vm_pageout.h>
118#include <vm/vm_pager.h>
119#include <vm/uma.h>
120#include <vm/uma_int.h>
121
122#include <sys/user.h>
123
124#include <machine/pal.h>
125#include <machine/md_var.h>
126
127MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
128
129#ifndef PMAP_SHPGPERPROC
130#define PMAP_SHPGPERPROC 200
131#endif
132
133#if defined(DIAGNOSTIC)
134#define PMAP_DIAGNOSTIC
135#endif
136
137#define MINPV 2048	/* Preallocate at least this many */
138#define MAXPV 20480	/* But no more than this */
139
140#if 0
141#define PMAP_DIAGNOSTIC
142#define PMAP_DEBUG
143#endif
144
145#if !defined(PMAP_DIAGNOSTIC)
146#define PMAP_INLINE __inline
147#else
148#define PMAP_INLINE
149#endif
150
151/*
152 * Get PDEs and PTEs for user/kernel address space
153 */
154#define pmap_pte_w(pte)		((pte)->pte_ig & PTE_IG_WIRED)
155#define pmap_pte_managed(pte)	((pte)->pte_ig & PTE_IG_MANAGED)
156#define pmap_pte_v(pte)		((pte)->pte_p)
157#define pmap_pte_pa(pte)	(((pte)->pte_ppn) << 12)
158#define pmap_pte_prot(pte)	(((pte)->pte_ar << 2) | (pte)->pte_pl)
159
160#define pmap_pte_set_w(pte, v) ((v)?((pte)->pte_ig |= PTE_IG_WIRED) \
161				:((pte)->pte_ig &= ~PTE_IG_WIRED))
162#define pmap_pte_set_prot(pte, v) do {		\
163    (pte)->pte_ar = v >> 2;			\
164    (pte)->pte_pl = v & 3;			\
165} while (0)
166
167/*
168 * Given a map and a machine independent protection code,
169 * convert to an ia64 protection code.
170 */
171#define pte_prot(m, p)		(protection_codes[m == kernel_pmap ? 0 : 1][p])
172#define pte_prot_pl(m, p)	(pte_prot(m, p) & 3)
173#define pte_prot_ar(m, p)	(pte_prot(m, p) >> 2)
174int	protection_codes[2][8];
175
176/*
177 * Return non-zero if this pmap is currently active
178 */
179#define pmap_isactive(pmap)	(pmap->pm_active)
180
181/*
182 * Statically allocated kernel pmap
183 */
184struct pmap kernel_pmap_store;
185
186vm_offset_t avail_start;	/* PA of first available physical page */
187vm_offset_t avail_end;		/* PA of last available physical page */
188vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
189vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
190static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
191
192vm_offset_t vhpt_base, vhpt_size;
193
194/*
195 * We use an object to own the kernel's 'page tables'. For simplicity,
196 * we use one page directory to index a set of pages containing
197 * ia64_lptes. This gives us up to 2Gb of kernel virtual space.
198 */
199static int nkpt;
200static struct ia64_lpte **kptdir;
201#define KPTE_DIR_INDEX(va) \
202	((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
203#define KPTE_PTE_INDEX(va) \
204	((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
205#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
206
207vm_offset_t kernel_vm_end;
208
209/* Values for ptc.e. XXX values for SKI. */
210static u_int64_t pmap_ptc_e_base = 0x100000000;
211static u_int64_t pmap_ptc_e_count1 = 3;
212static u_int64_t pmap_ptc_e_count2 = 2;
213static u_int64_t pmap_ptc_e_stride1 = 0x2000;
214static u_int64_t pmap_ptc_e_stride2 = 0x100000000;
215
216/*
217 * Data for the RID allocator
218 */
219static u_int64_t *pmap_ridbusy;
220static int pmap_ridmax, pmap_ridcount;
221struct mtx pmap_ridmutex;
222
223/*
224 * Data for the pv entry allocation mechanism
225 */
226static uma_zone_t pvzone;
227static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
228int pmap_pagedaemon_waken;
229static struct pv_entry *pvbootentries;
230static int pvbootnext, pvbootmax;
231
232/*
233 * Data for allocating PTEs for user processes.
234 */
235static uma_zone_t ptezone;
236
237/*
238 * VHPT instrumentation.
239 */
240static int pmap_vhpt_inserts;
241static int pmap_vhpt_collisions;
242static int pmap_vhpt_resident;
243SYSCTL_DECL(_vm_stats);
244SYSCTL_NODE(_vm_stats, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
245SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
246	   &pmap_vhpt_inserts, 0, "");
247SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, collisions, CTLFLAG_RD,
248	   &pmap_vhpt_collisions, 0, "");
249SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, resident, CTLFLAG_RD,
250	   &pmap_vhpt_resident, 0, "");
251
252static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
253static pv_entry_t get_pv_entry(void);
254static void	ia64_protection_init(void);
255
256static void	pmap_invalidate_all(pmap_t pmap);
257static void	pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m);
258
259vm_offset_t
260pmap_steal_memory(vm_size_t size)
261{
262	vm_size_t bank_size;
263	vm_offset_t pa, va;
264
265	size = round_page(size);
266
267	bank_size = phys_avail[1] - phys_avail[0];
268	while (size > bank_size) {
269		int i;
270		for (i = 0; phys_avail[i+2]; i+= 2) {
271			phys_avail[i] = phys_avail[i+2];
272			phys_avail[i+1] = phys_avail[i+3];
273		}
274		phys_avail[i] = 0;
275		phys_avail[i+1] = 0;
276		if (!phys_avail[0])
277			panic("pmap_steal_memory: out of memory");
278		bank_size = phys_avail[1] - phys_avail[0];
279	}
280
281	pa = phys_avail[0];
282	phys_avail[0] += size;
283
284	va = IA64_PHYS_TO_RR7(pa);
285	bzero((caddr_t) va, size);
286	return va;
287}
288
289/*
290 *	Bootstrap the system enough to run with virtual memory.
291 */
292void
293pmap_bootstrap()
294{
295	int i, j, count, ridbits;
296	struct ia64_pal_result res;
297
298	/*
299	 * Query the PAL Code to find the loop parameters for the
300	 * ptc.e instruction.
301	 */
302	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
303	if (res.pal_status != 0)
304		panic("Can't configure ptc.e parameters");
305	pmap_ptc_e_base = res.pal_result[0];
306	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
307	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
308	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
309	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
310	if (bootverbose)
311		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
312		       "stride1=0x%lx, stride2=0x%lx\n",
313		       pmap_ptc_e_base,
314		       pmap_ptc_e_count1,
315		       pmap_ptc_e_count2,
316		       pmap_ptc_e_stride1,
317		       pmap_ptc_e_stride2);
318
319	/*
320	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
321	 */
322	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
323	if (res.pal_status != 0) {
324		if (bootverbose)
325			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
326		ridbits = 18; /* guaranteed minimum */
327	} else {
328		ridbits = (res.pal_result[1] >> 8) & 0xff;
329		if (bootverbose)
330			printf("Processor supports %d Region ID bits\n",
331			       ridbits);
332	}
333	pmap_ridmax = (1 << ridbits);
334	pmap_ridcount = 8;
335	pmap_ridbusy = (u_int64_t *)
336		pmap_steal_memory(pmap_ridmax / 8);
337	bzero(pmap_ridbusy, pmap_ridmax / 8);
338	pmap_ridbusy[0] |= 0xff;
339	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
340
341	/*
342	 * Allocate some memory for initial kernel 'page tables'.
343	 */
344	kptdir = (struct ia64_lpte **) pmap_steal_memory(PAGE_SIZE);
345	for (i = 0; i < NKPT; i++) {
346		kptdir[i] = (struct ia64_lpte *) pmap_steal_memory(PAGE_SIZE);
347	}
348	nkpt = NKPT;
349
350	avail_start = phys_avail[0];
351	for (i = 0; phys_avail[i+2]; i+= 2) ;
352	avail_end = phys_avail[i+1];
353	count = i+2;
354
355	/*
356	 * Figure out a useful size for the VHPT, based on the size of
357	 * physical memory and try to locate a region which is large
358	 * enough to contain the VHPT (which must be a power of two in
359	 * size and aligned to a natural boundary).
360	 * Don't use the difference between avail_start and avail_end
361	 * as a measure for memory size. The address space is often
362	 * enough sparse, causing us to (try to) create a huge VHPT.
363	 */
364	vhpt_size = 15;
365	while ((1<<vhpt_size) < ia64_btop(Maxmem) * 32)
366		vhpt_size++;
367
368	vhpt_base = 0;
369	while (!vhpt_base) {
370		vm_offset_t mask;
371		if (bootverbose)
372			printf("Trying VHPT size 0x%lx\n", (1L<<vhpt_size));
373		mask = (1L << vhpt_size) - 1;
374		for (i = 0; i < count; i += 2) {
375			vm_offset_t base, limit;
376			base = (phys_avail[i] + mask) & ~mask;
377			limit = base + (1L << vhpt_size);
378			if (limit <= phys_avail[i+1])
379				/*
380				 * VHPT can fit in this region
381				 */
382				break;
383		}
384		if (!phys_avail[i]) {
385			/*
386			 * Can't fit, try next smaller size.
387			 */
388			vhpt_size--;
389		} else {
390			vhpt_base = (phys_avail[i] + mask) & ~mask;
391		}
392	}
393	if (vhpt_size < 15)
394		panic("Can't find space for VHPT");
395
396	if (bootverbose)
397		printf("Putting VHPT at %p\n", (void *) vhpt_base);
398	if (vhpt_base != phys_avail[i]) {
399		/*
400		 * Split this region.
401		 */
402		if (bootverbose)
403			printf("Splitting [%p-%p]\n",
404			       (void *) phys_avail[i],
405			       (void *) phys_avail[i+1]);
406		for (j = count; j > i; j -= 2) {
407			phys_avail[j] = phys_avail[j-2];
408			phys_avail[j+1] = phys_avail[j-2+1];
409		}
410		phys_avail[count+2] = 0;
411		phys_avail[count+3] = 0;
412		phys_avail[i+1] = vhpt_base;
413		phys_avail[i+2] = vhpt_base + (1L << vhpt_size);
414	} else {
415		phys_avail[i] = vhpt_base + (1L << vhpt_size);
416	}
417
418	vhpt_base = IA64_PHYS_TO_RR7(vhpt_base);
419	bzero((void *) vhpt_base, (1L << vhpt_size));
420	__asm __volatile("mov cr.pta=%0;; srlz.i;;"
421			 :: "r" (vhpt_base + (1<<8) + (vhpt_size<<2) + 1));
422
423	virtual_avail = IA64_RR_BASE(5);
424	virtual_end = IA64_RR_BASE(6)-1;
425
426	/*
427	 * Initialize protection array.
428	 */
429	ia64_protection_init();
430
431	/*
432	 * Initialize the kernel pmap (which is statically allocated).
433	 */
434	for (i = 0; i < 5; i++)
435		kernel_pmap->pm_rid[i] = 0;
436	kernel_pmap->pm_active = 1;
437	TAILQ_INIT(&kernel_pmap->pm_pvlist);
438	PCPU_SET(current_pmap, kernel_pmap);
439
440	/*
441	 * Region 5 is mapped via the vhpt.
442	 */
443	ia64_set_rr(IA64_RR_BASE(5),
444		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
445
446	/*
447	 * Region 6 is direct mapped UC and region 7 is direct mapped
448	 * WC. The details of this is controlled by the Alt {I,D}TLB
449	 * handlers. Here we just make sure that they have the largest
450	 * possible page size to minimise TLB usage.
451	 */
452	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (28 << 2));
453	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (28 << 2));
454
455	/*
456	 * Reserve some memory for allocating pvs while bootstrapping
457	 * the pv allocator. We need to have enough to cover mapping
458	 * the kmem_alloc region used to allocate the initial_pvs in
459	 * pmap_init. In general, the size of this region is
460	 * approximately (# physical pages) * (size of pv entry).
461	 */
462	pvbootmax = ((physmem * sizeof(struct pv_entry)) >> PAGE_SHIFT) + 128;
463	pvbootentries = (struct pv_entry *)
464		pmap_steal_memory(pvbootmax * sizeof(struct pv_entry));
465	pvbootnext = 0;
466
467	/*
468	 * Clear out any random TLB entries left over from booting.
469	 */
470	pmap_invalidate_all(kernel_pmap);
471}
472
473void *
474uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
475{
476	static vm_pindex_t color;
477	vm_page_t m;
478	int pflags;
479	void *va;
480
481	*flags = UMA_SLAB_PRIV;
482	if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT)
483		pflags = VM_ALLOC_INTERRUPT;
484	else
485		pflags = VM_ALLOC_SYSTEM;
486	if (wait & M_ZERO)
487		pflags |= VM_ALLOC_ZERO;
488
489	for (;;) {
490		m = vm_page_alloc(NULL, color++, pflags | VM_ALLOC_NOOBJ);
491		if (m == NULL) {
492			if (wait & M_NOWAIT)
493				return (NULL);
494			else
495				VM_WAIT;
496		} else
497			break;
498	}
499
500	va = (void *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
501	if ((m->flags & PG_ZERO) == 0)
502		bzero(va, PAGE_SIZE);
503	return (va);
504}
505
506void
507uma_small_free(void *mem, int size, u_int8_t flags)
508{
509	vm_page_t m;
510
511	m = PHYS_TO_VM_PAGE(IA64_RR_MASK((u_int64_t)mem));
512	vm_page_lock_queues();
513	vm_page_free(m);
514	vm_page_unlock_queues();
515}
516
517/*
518 *	Initialize the pmap module.
519 *	Called by vm_init, to initialize any structures that the pmap
520 *	system needs to map virtual memory.
521 *	pmap_init has been enhanced to support in a fairly consistant
522 *	way, discontiguous physical memory.
523 */
524void
525pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
526{
527	int i;
528	int initial_pvs;
529
530	/*
531	 * Allocate memory for random pmap data structures.  Includes the
532	 * pv_head_table.
533	 */
534
535	for(i = 0; i < vm_page_array_size; i++) {
536		vm_page_t m;
537
538		m = &vm_page_array[i];
539		TAILQ_INIT(&m->md.pv_list);
540		m->md.pv_list_count = 0;
541 	}
542
543	/*
544	 * Init the pv free list and the PTE free list.
545	 */
546	initial_pvs = vm_page_array_size;
547	if (initial_pvs < MINPV)
548		initial_pvs = MINPV;
549	if (initial_pvs > MAXPV)
550		initial_pvs = MAXPV;
551	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry),
552	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
553	uma_prealloc(pvzone, initial_pvs);
554
555	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
556	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
557	uma_prealloc(ptezone, initial_pvs);
558
559	/*
560	 * Now it is safe to enable pv_table recording.
561	 */
562	pmap_initialized = TRUE;
563}
564
565/*
566 * Initialize the address space (zone) for the pv_entries.  Set a
567 * high water mark so that the system can recover from excessive
568 * numbers of pv entries.
569 */
570void
571pmap_init2()
572{
573	int shpgperproc = PMAP_SHPGPERPROC;
574
575	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
576	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
577	pv_entry_high_water = 9 * (pv_entry_max / 10);
578}
579
580
581/***************************************************
582 * Manipulate TLBs for a pmap
583 ***************************************************/
584
585static void
586pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
587{
588	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
589		("invalidating TLB for non-current pmap"));
590	ia64_ptc_g(va, PAGE_SHIFT << 2);
591}
592
593static void
594pmap_invalidate_all_1(void *arg)
595{
596	u_int64_t addr;
597	int i, j;
598	register_t psr;
599
600	psr = intr_disable();
601	addr = pmap_ptc_e_base;
602	for (i = 0; i < pmap_ptc_e_count1; i++) {
603		for (j = 0; j < pmap_ptc_e_count2; j++) {
604			ia64_ptc_e(addr);
605			addr += pmap_ptc_e_stride2;
606		}
607		addr += pmap_ptc_e_stride1;
608	}
609	intr_restore(psr);
610}
611
612static void
613pmap_invalidate_all(pmap_t pmap)
614{
615	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
616		("invalidating TLB for non-current pmap"));
617
618
619#ifdef SMP
620	smp_rendezvous(0, pmap_invalidate_all_1, 0, 0);
621#else
622	pmap_invalidate_all_1(0);
623#endif
624}
625
626static u_int32_t
627pmap_allocate_rid(void)
628{
629	int rid;
630
631	if (pmap_ridcount == pmap_ridmax)
632		panic("pmap_allocate_rid: All Region IDs used");
633
634	do {
635		rid = arc4random() & (pmap_ridmax - 1);
636	} while (pmap_ridbusy[rid / 64] & (1L << (rid & 63)));
637	pmap_ridbusy[rid / 64] |= (1L << (rid & 63));
638	pmap_ridcount++;
639
640	return rid;
641}
642
643static void
644pmap_free_rid(u_int32_t rid)
645{
646	mtx_lock(&pmap_ridmutex);
647	pmap_ridbusy[rid / 64] &= ~(1L << (rid & 63));
648	pmap_ridcount--;
649	mtx_unlock(&pmap_ridmutex);
650}
651
652static void
653pmap_ensure_rid(pmap_t pmap, vm_offset_t va)
654{
655	int rr;
656
657	rr = va >> 61;
658
659	/*
660	 * We get called for virtual addresses that may just as well be
661	 * kernel addresses (ie region 5, 6 or 7). Since the pm_rid field
662	 * only holds region IDs for user regions, we have to make sure
663	 * the region is within bounds.
664	 */
665	if (rr >= 5)
666		return;
667
668	if (pmap->pm_rid[rr])
669		return;
670
671	mtx_lock(&pmap_ridmutex);
672	pmap->pm_rid[rr] = pmap_allocate_rid();
673	if (pmap == PCPU_GET(current_pmap))
674		ia64_set_rr(IA64_RR_BASE(rr),
675			    (pmap->pm_rid[rr] << 8)|(PAGE_SHIFT << 2)|1);
676	mtx_unlock(&pmap_ridmutex);
677}
678
679/***************************************************
680 * Low level helper routines.....
681 ***************************************************/
682
683/*
684 * Install a pte into the VHPT
685 */
686static PMAP_INLINE void
687pmap_install_pte(struct ia64_lpte *vhpte, struct ia64_lpte *pte)
688{
689	u_int64_t *vhp, *p;
690
691	/* invalidate the pte */
692	atomic_set_64(&vhpte->pte_tag, 1L << 63);
693	ia64_mf();			/* make sure everyone sees */
694
695	vhp = (u_int64_t *) vhpte;
696	p = (u_int64_t *) pte;
697
698	vhp[0] = p[0];
699	vhp[1] = p[1];
700	vhp[2] = p[2];			/* sets ti to one */
701
702	ia64_mf();
703}
704
705/*
706 * Compare essential parts of pte.
707 */
708static PMAP_INLINE int
709pmap_equal_pte(struct ia64_lpte *pte1, struct ia64_lpte *pte2)
710{
711	return *(u_int64_t *) pte1 == *(u_int64_t *) pte2;
712}
713
714/*
715 * this routine defines the region(s) of memory that should
716 * not be tested for the modified bit.
717 */
718static PMAP_INLINE int
719pmap_track_modified(vm_offset_t va)
720{
721	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
722		return 1;
723	else
724		return 0;
725}
726
727#ifndef KSTACK_MAX_PAGES
728#define KSTACK_MAX_PAGES 32
729#endif
730
731/*
732 * Create the KSTACK for a new thread.
733 * This routine directly affects the fork perf for a process/thread.
734 */
735void
736pmap_new_thread(struct thread *td, int pages)
737{
738	vm_offset_t *ks;
739
740	/* Bounds check */
741	if (pages <= 1)
742		pages = KSTACK_PAGES;
743	else if (pages > KSTACK_MAX_PAGES)
744		pages = KSTACK_MAX_PAGES;
745
746	/*
747	 * Use contigmalloc for user area so that we can use a region
748	 * 7 address for it which makes it impossible to accidentally
749	 * lose when recording a trapframe.
750	 */
751	ks = contigmalloc(pages * PAGE_SIZE, M_PMAP, 0, 0ul,
752	    256*1024*1024 - 1, PAGE_SIZE, 256*1024*1024);
753	if (ks == NULL)
754		panic("pmap_new_thread: could not contigmalloc %d pages\n",
755		    pages);
756
757	td->td_md.md_kstackvirt = ks;
758	td->td_kstack = IA64_PHYS_TO_RR7(ia64_tpa((u_int64_t)ks));
759	td->td_kstack_pages = pages;
760}
761
762/*
763 * Dispose the KSTACK for a thread that has exited.
764 * This routine directly impacts the exit perf of a process/thread.
765 */
766void
767pmap_dispose_thread(struct thread *td)
768{
769	int pages;
770
771	pages = td->td_kstack_pages;
772	contigfree(td->td_md.md_kstackvirt, pages * PAGE_SIZE, M_PMAP);
773	td->td_md.md_kstackvirt = NULL;
774	td->td_kstack = 0;
775}
776
777/*
778 * Set up a variable sized alternate kstack.  This appears to be MI.
779 */
780void
781pmap_new_altkstack(struct thread *td, int pages)
782{
783
784	/*
785	 * Shuffle the original stack. Save the virtual kstack address
786	 * instead of the physical address because 1) we can derive the
787	 * physical address from the virtual address and 2) we need the
788	 * virtual address in pmap_dispose_thread.
789	 */
790	td->td_altkstack_obj = td->td_kstack_obj;
791	td->td_altkstack = (vm_offset_t)td->td_md.md_kstackvirt;
792	td->td_altkstack_pages = td->td_kstack_pages;
793
794	pmap_new_thread(td, pages);
795}
796
797void
798pmap_dispose_altkstack(struct thread *td)
799{
800
801	pmap_dispose_thread(td);
802
803	/*
804	 * Restore the original kstack. Note that td_altkstack holds the
805	 * virtual kstack address of the previous kstack.
806	 */
807	td->td_md.md_kstackvirt = (void*)td->td_altkstack;
808	td->td_kstack = IA64_PHYS_TO_RR7(ia64_tpa(td->td_altkstack));
809	td->td_kstack_obj = td->td_altkstack_obj;
810	td->td_kstack_pages = td->td_altkstack_pages;
811	td->td_altkstack = 0;
812	td->td_altkstack_obj = NULL;
813	td->td_altkstack_pages = 0;
814}
815
816/*
817 * Allow the KSTACK for a thread to be prejudicially paged out.
818 */
819void
820pmap_swapout_thread(struct thread *td)
821{
822}
823
824/*
825 * Bring the KSTACK for a specified thread back in.
826 */
827void
828pmap_swapin_thread(struct thread *td)
829{
830}
831
832/***************************************************
833 * Page table page management routines.....
834 ***************************************************/
835
836void
837pmap_pinit0(struct pmap *pmap)
838{
839	/* kernel_pmap is the same as any other pmap. */
840	pmap_pinit(pmap);
841}
842
843/*
844 * Initialize a preallocated and zeroed pmap structure,
845 * such as one in a vmspace structure.
846 */
847void
848pmap_pinit(struct pmap *pmap)
849{
850	int i;
851
852	pmap->pm_flags = 0;
853	for (i = 0; i < 5; i++)
854		pmap->pm_rid[i] = 0;
855	pmap->pm_ptphint = NULL;
856	pmap->pm_active = 0;
857	TAILQ_INIT(&pmap->pm_pvlist);
858	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
859}
860
861/*
862 * Wire in kernel global address entries.  To avoid a race condition
863 * between pmap initialization and pmap_growkernel, this procedure
864 * should be called after the vmspace is attached to the process
865 * but before this pmap is activated.
866 */
867void
868pmap_pinit2(struct pmap *pmap)
869{
870}
871
872/***************************************************
873* Pmap allocation/deallocation routines.
874 ***************************************************/
875
876/*
877 * Release any resources held by the given physical map.
878 * Called when a pmap initialized by pmap_pinit is being released.
879 * Should only be called if the map contains no valid mappings.
880 */
881void
882pmap_release(pmap_t pmap)
883{
884	int i;
885
886	for (i = 0; i < 5; i++)
887		if (pmap->pm_rid[i])
888			pmap_free_rid(pmap->pm_rid[i]);
889}
890
891/*
892 * grow the number of kernel page table entries, if needed
893 */
894void
895pmap_growkernel(vm_offset_t addr)
896{
897	struct ia64_lpte *ptepage;
898	vm_page_t nkpg;
899
900	if (kernel_vm_end == 0) {
901		kernel_vm_end = nkpt * PAGE_SIZE * NKPTEPG
902			+ IA64_RR_BASE(5);
903	}
904	addr = (addr + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
905	while (kernel_vm_end < addr) {
906		if (kptdir[KPTE_DIR_INDEX(kernel_vm_end)]) {
907			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG)
908				& ~(PAGE_SIZE * NKPTEPG - 1);
909			continue;
910		}
911
912		/*
913		 * We could handle more by increasing the size of kptdir.
914		 */
915		if (nkpt == MAXKPT)
916			panic("pmap_growkernel: out of kernel address space");
917
918		/*
919		 * This index is bogus, but out of the way
920		 */
921		nkpg = vm_page_alloc(NULL, nkpt,
922		    VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
923		if (!nkpg)
924			panic("pmap_growkernel: no memory to grow kernel");
925
926		nkpt++;
927		ptepage = (struct ia64_lpte *)
928			IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
929		bzero(ptepage, PAGE_SIZE);
930		kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage;
931
932		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
933	}
934}
935
936/***************************************************
937 * page management routines.
938 ***************************************************/
939
940/*
941 * free the pv_entry back to the free list
942 */
943static PMAP_INLINE void
944free_pv_entry(pv_entry_t pv)
945{
946	pv_entry_count--;
947	uma_zfree(pvzone, pv);
948}
949
950/*
951 * get a new pv_entry, allocating a block from the system
952 * when needed.
953 * the memory allocation is performed bypassing the malloc code
954 * because of the possibility of allocations at interrupt time.
955 */
956static pv_entry_t
957get_pv_entry(void)
958{
959	pv_entry_count++;
960	if (pv_entry_high_water &&
961		(pv_entry_count > pv_entry_high_water) &&
962		(pmap_pagedaemon_waken == 0)) {
963		pmap_pagedaemon_waken = 1;
964		wakeup (&vm_pages_needed);
965	}
966	return uma_zalloc(pvzone, M_NOWAIT);
967}
968
969/*
970 * Add an ia64_lpte to the VHPT.
971 */
972static void
973pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
974{
975	struct ia64_lpte *vhpte;
976
977	pmap_vhpt_inserts++;
978	pmap_vhpt_resident++;
979
980	vhpte = (struct ia64_lpte *) ia64_thash(va);
981
982	if (vhpte->pte_chain)
983		pmap_vhpt_collisions++;
984
985	pte->pte_chain = vhpte->pte_chain;
986	vhpte->pte_chain = ia64_tpa((vm_offset_t) pte);
987
988	if (!vhpte->pte_p && pte->pte_p)
989		pmap_install_pte(vhpte, pte);
990	else
991		ia64_mf();
992}
993
994/*
995 * Update VHPT after a pte has changed.
996 */
997static void
998pmap_update_vhpt(struct ia64_lpte *pte, vm_offset_t va)
999{
1000	struct ia64_lpte *vhpte;
1001
1002	vhpte = (struct ia64_lpte *) ia64_thash(va);
1003
1004	if ((!vhpte->pte_p || vhpte->pte_tag == pte->pte_tag)
1005	    && pte->pte_p)
1006		pmap_install_pte(vhpte, pte);
1007}
1008
1009/*
1010 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
1011 * worked or an appropriate error code otherwise.
1012 */
1013static int
1014pmap_remove_vhpt(vm_offset_t va)
1015{
1016	struct ia64_lpte *pte;
1017	struct ia64_lpte *lpte;
1018	struct ia64_lpte *vhpte;
1019	u_int64_t tag;
1020	int error = ENOENT;
1021
1022	vhpte = (struct ia64_lpte *) ia64_thash(va);
1023
1024	/*
1025	 * If the VHPTE is invalid, there can't be a collision chain.
1026	 */
1027	if (!vhpte->pte_p) {
1028		KASSERT(!vhpte->pte_chain, ("bad vhpte"));
1029		printf("can't remove vhpt entry for 0x%lx\n", va);
1030		goto done;
1031	}
1032
1033	lpte = vhpte;
1034	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(vhpte->pte_chain);
1035	tag = ia64_ttag(va);
1036
1037	while (pte->pte_tag != tag) {
1038		lpte = pte;
1039		if (pte->pte_chain)
1040			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1041		else {
1042			printf("can't remove vhpt entry for 0x%lx\n", va);
1043			goto done;
1044		}
1045	}
1046
1047	/*
1048	 * Snip this pv_entry out of the collision chain.
1049	 */
1050	lpte->pte_chain = pte->pte_chain;
1051
1052	/*
1053	 * If the VHPTE matches as well, change it to map the first
1054	 * element from the chain if there is one.
1055	 */
1056	if (vhpte->pte_tag == tag) {
1057		if (vhpte->pte_chain) {
1058			pte = (struct ia64_lpte *)
1059				IA64_PHYS_TO_RR7(vhpte->pte_chain);
1060			pmap_install_pte(vhpte, pte);
1061		} else {
1062			vhpte->pte_p = 0;
1063			ia64_mf();
1064		}
1065	}
1066
1067	pmap_vhpt_resident--;
1068	error = 0;
1069 done:
1070	return error;
1071}
1072
1073/*
1074 * Find the ia64_lpte for the given va, if any.
1075 */
1076static struct ia64_lpte *
1077pmap_find_vhpt(vm_offset_t va)
1078{
1079	struct ia64_lpte *pte;
1080	u_int64_t tag;
1081
1082	pte = (struct ia64_lpte *) ia64_thash(va);
1083	if (!pte->pte_chain) {
1084		pte = 0;
1085		goto done;
1086	}
1087
1088	tag = ia64_ttag(va);
1089	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1090
1091	while (pte->pte_tag != tag) {
1092		if (pte->pte_chain) {
1093			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1094		} else {
1095			pte = 0;
1096			break;
1097		}
1098	}
1099
1100 done:
1101	return pte;
1102}
1103
1104/*
1105 * Remove an entry from the list of managed mappings.
1106 */
1107static int
1108pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1109{
1110	if (!pv) {
1111		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1112			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1113				if (pmap == pv->pv_pmap && va == pv->pv_va)
1114					break;
1115			}
1116		} else {
1117			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1118				if (va == pv->pv_va)
1119					break;
1120			}
1121		}
1122	}
1123
1124	if (pv) {
1125		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1126		m->md.pv_list_count--;
1127		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1128			vm_page_flag_clear(m, PG_WRITEABLE);
1129
1130		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1131		free_pv_entry(pv);
1132		return 0;
1133	} else {
1134		return ENOENT;
1135	}
1136}
1137
1138/*
1139 * Create a pv entry for page at pa for
1140 * (pmap, va).
1141 */
1142static void
1143pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1144{
1145	pv_entry_t pv;
1146
1147	pv = get_pv_entry();
1148	pv->pv_pmap = pmap;
1149	pv->pv_va = va;
1150
1151	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1152	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1153	m->md.pv_list_count++;
1154}
1155
1156/*
1157 *	Routine:	pmap_extract
1158 *	Function:
1159 *		Extract the physical page address associated
1160 *		with the given map/virtual_address pair.
1161 */
1162vm_offset_t
1163pmap_extract(pmap, va)
1164	register pmap_t pmap;
1165	vm_offset_t va;
1166{
1167	struct ia64_lpte *pte;
1168	pmap_t oldpmap;
1169
1170	if (!pmap)
1171		return 0;
1172
1173	oldpmap = pmap_install(pmap);
1174	pte = pmap_find_vhpt(va);
1175	pmap_install(oldpmap);
1176
1177	if (!pte)
1178		return 0;
1179
1180	return pmap_pte_pa(pte);
1181}
1182
1183/***************************************************
1184 * Low level mapping routines.....
1185 ***************************************************/
1186
1187/*
1188 * Find the kernel lpte for mapping the given virtual address, which
1189 * must be in the part of region 5 which we can cover with our kernel
1190 * 'page tables'.
1191 */
1192static struct ia64_lpte *
1193pmap_find_kpte(vm_offset_t va)
1194{
1195	KASSERT((va >> 61) == 5,
1196		("kernel mapping 0x%lx not in region 5", va));
1197	KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG),
1198		("kernel mapping 0x%lx out of range", va));
1199	return &kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)];
1200}
1201
1202/*
1203 * Find a pte suitable for mapping a user-space address. If one exists
1204 * in the VHPT, that one will be returned, otherwise a new pte is
1205 * allocated.
1206 */
1207static struct ia64_lpte *
1208pmap_find_pte(vm_offset_t va)
1209{
1210	struct ia64_lpte *pte;
1211
1212	if (va >= VM_MAXUSER_ADDRESS)
1213		return pmap_find_kpte(va);
1214
1215	pte = pmap_find_vhpt(va);
1216	if (!pte) {
1217		pte = uma_zalloc(ptezone, 0);
1218		pte->pte_p = 0;
1219	}
1220	return pte;
1221}
1222
1223/*
1224 * Free a pte which is now unused. This simply returns it to the zone
1225 * allocator if it is a user mapping. For kernel mappings, clear the
1226 * valid bit to make it clear that the mapping is not currently used.
1227 */
1228static void
1229pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1230{
1231	if (va < VM_MAXUSER_ADDRESS)
1232		uma_zfree(ptezone, pte);
1233	else
1234		pte->pte_p = 0;
1235}
1236
1237/*
1238 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1239 * the pte was orginally valid, then its assumed to already be in the
1240 * VHPT.
1241 */
1242static void
1243pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1244	     int ig, int pl, int ar)
1245{
1246	int wasvalid = pte->pte_p;
1247
1248	pte->pte_p = 1;
1249	pte->pte_ma = PTE_MA_WB;
1250	if (ig & PTE_IG_MANAGED) {
1251		pte->pte_a = 0;
1252		pte->pte_d = 0;
1253	} else {
1254		pte->pte_a = 1;
1255		pte->pte_d = 1;
1256	}
1257	pte->pte_pl = pl;
1258	pte->pte_ar = ar;
1259	pte->pte_ppn = pa >> 12;
1260	pte->pte_ed = 0;
1261	pte->pte_ig = ig;
1262
1263	pte->pte_ps = PAGE_SHIFT;
1264	pte->pte_key = 0;
1265
1266	pte->pte_tag = ia64_ttag(va);
1267
1268	if (wasvalid) {
1269		pmap_update_vhpt(pte, va);
1270	} else {
1271		pmap_enter_vhpt(pte, va);
1272	}
1273}
1274
1275/*
1276 * If a pte contains a valid mapping, clear it and update the VHPT.
1277 */
1278static void
1279pmap_clear_pte(struct ia64_lpte *pte, vm_offset_t va)
1280{
1281	if (pte->pte_p) {
1282		pmap_remove_vhpt(va);
1283		ia64_ptc_g(va, PAGE_SHIFT << 2);
1284		pte->pte_p = 0;
1285	}
1286}
1287
1288/*
1289 * Remove the (possibly managed) mapping represented by pte from the
1290 * given pmap.
1291 */
1292static int
1293pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1294		pv_entry_t pv, int freepte)
1295{
1296	int error;
1297	vm_page_t m;
1298
1299	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1300		("removing pte for non-current pmap"));
1301
1302	/*
1303	 * First remove from the VHPT.
1304	 */
1305	error = pmap_remove_vhpt(va);
1306	if (error)
1307		return error;
1308
1309	/*
1310	 * Make sure pmap_set_pte() knows it isn't in the VHPT.
1311	 */
1312	pte->pte_p = 0;
1313
1314	if (pte->pte_ig & PTE_IG_WIRED)
1315		pmap->pm_stats.wired_count -= 1;
1316
1317	pmap->pm_stats.resident_count -= 1;
1318	if (pte->pte_ig & PTE_IG_MANAGED) {
1319		m = PHYS_TO_VM_PAGE(pmap_pte_pa(pte));
1320		if (pte->pte_d)
1321			if (pmap_track_modified(va))
1322				vm_page_dirty(m);
1323		if (pte->pte_a)
1324			vm_page_flag_set(m, PG_REFERENCED);
1325
1326		if (freepte)
1327			pmap_free_pte(pte, va);
1328		return pmap_remove_entry(pmap, m, va, pv);
1329	} else {
1330		if (freepte)
1331			pmap_free_pte(pte, va);
1332		return 0;
1333	}
1334}
1335
1336/*
1337 * Add a list of wired pages to the kva
1338 * this routine is only used for temporary
1339 * kernel mappings that do not need to have
1340 * page modification or references recorded.
1341 * Note that old mappings are simply written
1342 * over.  The page *must* be wired.
1343 */
1344void
1345pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1346{
1347	int i;
1348	struct ia64_lpte *pte;
1349
1350	for (i = 0; i < count; i++) {
1351		vm_offset_t tva = va + i * PAGE_SIZE;
1352		int wasvalid;
1353		pte = pmap_find_kpte(tva);
1354		wasvalid = pte->pte_p;
1355		pmap_set_pte(pte, tva, VM_PAGE_TO_PHYS(m[i]),
1356			     0, PTE_PL_KERN, PTE_AR_RWX);
1357		if (wasvalid)
1358			ia64_ptc_g(tva, PAGE_SHIFT << 2);
1359	}
1360}
1361
1362/*
1363 * this routine jerks page mappings from the
1364 * kernel -- it is meant only for temporary mappings.
1365 */
1366void
1367pmap_qremove(vm_offset_t va, int count)
1368{
1369	int i;
1370	struct ia64_lpte *pte;
1371
1372	for (i = 0; i < count; i++) {
1373		pte = pmap_find_kpte(va);
1374		pmap_clear_pte(pte, va);
1375		va += PAGE_SIZE;
1376	}
1377}
1378
1379/*
1380 * Add a wired page to the kva.
1381 */
1382void
1383pmap_kenter(vm_offset_t va, vm_offset_t pa)
1384{
1385	struct ia64_lpte *pte;
1386	int wasvalid;
1387
1388	pte = pmap_find_kpte(va);
1389	wasvalid = pte->pte_p;
1390	pmap_set_pte(pte, va, pa, 0, PTE_PL_KERN, PTE_AR_RWX);
1391	if (wasvalid)
1392		ia64_ptc_g(va, PAGE_SHIFT << 2);
1393}
1394
1395/*
1396 * Remove a page from the kva
1397 */
1398void
1399pmap_kremove(vm_offset_t va)
1400{
1401	struct ia64_lpte *pte;
1402
1403	pte = pmap_find_kpte(va);
1404	pmap_clear_pte(pte, va);
1405}
1406
1407/*
1408 *	Used to map a range of physical addresses into kernel
1409 *	virtual address space.
1410 *
1411 *	The value passed in '*virt' is a suggested virtual address for
1412 *	the mapping. Architectures which can support a direct-mapped
1413 *	physical to virtual region can return the appropriate address
1414 *	within that region, leaving '*virt' unchanged. Other
1415 *	architectures should map the pages starting at '*virt' and
1416 *	update '*virt' with the first usable address after the mapped
1417 *	region.
1418 */
1419vm_offset_t
1420pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1421{
1422	return IA64_PHYS_TO_RR7(start);
1423}
1424
1425/*
1426 * Remove a single page from a process address space
1427 */
1428static void
1429pmap_remove_page(pmap_t pmap, vm_offset_t va)
1430{
1431	struct ia64_lpte *pte;
1432
1433	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1434		("removing page for non-current pmap"));
1435
1436	pte = pmap_find_vhpt(va);
1437	if (pte) {
1438		pmap_remove_pte(pmap, pte, va, 0, 1);
1439		pmap_invalidate_page(pmap, va);
1440	}
1441	return;
1442}
1443
1444/*
1445 *	Remove the given range of addresses from the specified map.
1446 *
1447 *	It is assumed that the start and end are properly
1448 *	rounded to the page size.
1449 */
1450void
1451pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1452{
1453	pmap_t oldpmap;
1454	vm_offset_t va;
1455	pv_entry_t pv;
1456	struct ia64_lpte *pte;
1457
1458	if (pmap == NULL)
1459		return;
1460
1461	if (pmap->pm_stats.resident_count == 0)
1462		return;
1463
1464	oldpmap = pmap_install(pmap);
1465
1466	/*
1467	 * special handling of removing one page.  a very
1468	 * common operation and easy to short circuit some
1469	 * code.
1470	 */
1471	if (sva + PAGE_SIZE == eva) {
1472		pmap_remove_page(pmap, sva);
1473		pmap_install(oldpmap);
1474		return;
1475	}
1476
1477	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1478		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1479			va = pv->pv_va;
1480			if (va >= sva && va < eva) {
1481				pte = pmap_find_vhpt(va);
1482				pmap_remove_pte(pmap, pte, va, pv, 1);
1483				pmap_invalidate_page(pmap, va);
1484			}
1485		}
1486
1487	} else {
1488		for (va = sva; va < eva; va = va += PAGE_SIZE) {
1489			pte = pmap_find_vhpt(va);
1490			if (pte) {
1491				pmap_remove_pte(pmap, pte, va, 0, 1);
1492				pmap_invalidate_page(pmap, va);
1493			}
1494		}
1495	}
1496
1497	pmap_install(oldpmap);
1498}
1499
1500/*
1501 *	Routine:	pmap_remove_all
1502 *	Function:
1503 *		Removes this physical page from
1504 *		all physical maps in which it resides.
1505 *		Reflects back modify bits to the pager.
1506 *
1507 *	Notes:
1508 *		Original versions of this routine were very
1509 *		inefficient because they iteratively called
1510 *		pmap_remove (slow...)
1511 */
1512
1513void
1514pmap_remove_all(vm_page_t m)
1515{
1516	pmap_t oldpmap;
1517	pv_entry_t pv;
1518	int s;
1519
1520#if defined(PMAP_DIAGNOSTIC)
1521	/*
1522	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1523	 * pages!
1524	 */
1525	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1526		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1527	}
1528#endif
1529
1530	s = splvm();
1531
1532	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1533		struct ia64_lpte *pte;
1534		pmap_t pmap = pv->pv_pmap;
1535		vm_offset_t va = pv->pv_va;
1536
1537		oldpmap = pmap_install(pmap);
1538		pte = pmap_find_vhpt(va);
1539		if (pmap_pte_pa(pte) != VM_PAGE_TO_PHYS(m))
1540			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1541		pmap_remove_pte(pmap, pte, va, pv, 1);
1542		pmap_invalidate_page(pmap, va);
1543		pmap_install(oldpmap);
1544	}
1545
1546	vm_page_flag_clear(m, PG_WRITEABLE);
1547
1548	splx(s);
1549	return;
1550}
1551
1552/*
1553 *	Set the physical protection on the
1554 *	specified range of this map as requested.
1555 */
1556void
1557pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1558{
1559	pmap_t oldpmap;
1560	struct ia64_lpte *pte;
1561	int newprot;
1562
1563	if (pmap == NULL)
1564		return;
1565
1566	oldpmap = pmap_install(pmap);
1567
1568	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1569		pmap_remove(pmap, sva, eva);
1570		pmap_install(oldpmap);
1571		return;
1572	}
1573
1574	if (prot & VM_PROT_WRITE) {
1575		pmap_install(oldpmap);
1576		return;
1577	}
1578
1579	newprot = pte_prot(pmap, prot);
1580
1581	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1582		panic("pmap_protect: unaligned addresses");
1583
1584	while (sva < eva) {
1585		/*
1586		 * If page is invalid, skip this page
1587		 */
1588		pte = pmap_find_vhpt(sva);
1589		if (!pte) {
1590			sva += PAGE_SIZE;
1591			continue;
1592		}
1593
1594		if (pmap_pte_prot(pte) != newprot) {
1595			if (pte->pte_ig & PTE_IG_MANAGED) {
1596				vm_offset_t pa = pmap_pte_pa(pte);
1597				vm_page_t m = PHYS_TO_VM_PAGE(pa);
1598				if (pte->pte_d) {
1599					if (pmap_track_modified(sva))
1600						vm_page_dirty(m);
1601					pte->pte_d = 0;
1602				}
1603				if (pte->pte_a) {
1604					vm_page_flag_set(m, PG_REFERENCED);
1605					pte->pte_a = 0;
1606				}
1607			}
1608			pmap_pte_set_prot(pte, newprot);
1609			pmap_update_vhpt(pte, sva);
1610			pmap_invalidate_page(pmap, sva);
1611		}
1612
1613		sva += PAGE_SIZE;
1614	}
1615	pmap_install(oldpmap);
1616}
1617
1618/*
1619 *	Insert the given physical page (p) at
1620 *	the specified virtual address (v) in the
1621 *	target physical map with the protection requested.
1622 *
1623 *	If specified, the page will be wired down, meaning
1624 *	that the related pte can not be reclaimed.
1625 *
1626 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1627 *	or lose information.  That is, this routine must actually
1628 *	insert this page into the given map NOW.
1629 */
1630void
1631pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1632	   boolean_t wired)
1633{
1634	pmap_t oldpmap;
1635	vm_offset_t pa;
1636	vm_offset_t opa;
1637	struct ia64_lpte origpte;
1638	struct ia64_lpte *pte;
1639	int managed;
1640
1641	if (pmap == NULL)
1642		return;
1643
1644	pmap_ensure_rid(pmap, va);
1645
1646	oldpmap = pmap_install(pmap);
1647
1648	va &= ~PAGE_MASK;
1649#ifdef PMAP_DIAGNOSTIC
1650	if (va > VM_MAX_KERNEL_ADDRESS)
1651		panic("pmap_enter: toobig");
1652#endif
1653
1654	/*
1655	 * Find (or create) a pte for the given mapping.
1656	 */
1657	pte = pmap_find_pte(va);
1658	origpte = *pte;
1659
1660	if (origpte.pte_p)
1661		opa = pmap_pte_pa(&origpte);
1662	else
1663		opa = 0;
1664	managed = 0;
1665
1666	pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK;
1667
1668	/*
1669	 * Mapping has not changed, must be protection or wiring change.
1670	 */
1671	if (origpte.pte_p && (opa == pa)) {
1672		/*
1673		 * Wiring change, just update stats. We don't worry about
1674		 * wiring PT pages as they remain resident as long as there
1675		 * are valid mappings in them. Hence, if a user page is wired,
1676		 * the PT page will be also.
1677		 */
1678		if (wired && ((origpte.pte_ig & PTE_IG_WIRED) == 0))
1679			pmap->pm_stats.wired_count++;
1680		else if (!wired && (origpte.pte_ig & PTE_IG_WIRED))
1681			pmap->pm_stats.wired_count--;
1682
1683		/*
1684		 * We might be turning off write access to the page,
1685		 * so we go ahead and sense modify status.
1686		 */
1687		if (origpte.pte_ig & PTE_IG_MANAGED) {
1688			if (origpte.pte_d && pmap_track_modified(va)) {
1689				vm_page_t om;
1690				om = PHYS_TO_VM_PAGE(opa);
1691				vm_page_dirty(om);
1692			}
1693		}
1694
1695		managed = origpte.pte_ig & PTE_IG_MANAGED;
1696		goto validate;
1697	}
1698	/*
1699	 * Mapping has changed, invalidate old range and fall
1700	 * through to handle validating new mapping.
1701	 */
1702	if (opa) {
1703		int error;
1704		vm_page_lock_queues();
1705		error = pmap_remove_pte(pmap, pte, va, 0, 0);
1706		vm_page_unlock_queues();
1707		if (error)
1708			panic("pmap_enter: pte vanished, va: 0x%lx", va);
1709	}
1710
1711	/*
1712	 * Enter on the PV list if part of our managed memory.
1713	 */
1714	if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
1715		pmap_insert_entry(pmap, va, m);
1716		managed |= PTE_IG_MANAGED;
1717	}
1718
1719	/*
1720	 * Increment counters
1721	 */
1722	pmap->pm_stats.resident_count++;
1723	if (wired)
1724		pmap->pm_stats.wired_count++;
1725
1726validate:
1727
1728	/*
1729	 * Now validate mapping with desired protection/wiring. This
1730	 * adds the pte to the VHPT if necessary.
1731	 */
1732	pmap_set_pte(pte, va, pa, managed | (wired ? PTE_IG_WIRED : 0),
1733		     pte_prot_pl(pmap, prot), pte_prot_ar(pmap, prot));
1734
1735	/*
1736	 * if the mapping or permission bits are different, we need
1737	 * to invalidate the page.
1738	 */
1739	if (!pmap_equal_pte(&origpte, pte))
1740		pmap_invalidate_page(pmap, va);
1741
1742	pmap_install(oldpmap);
1743}
1744
1745/*
1746 * this code makes some *MAJOR* assumptions:
1747 * 1. Current pmap & pmap exists.
1748 * 2. Not wired.
1749 * 3. Read access.
1750 * 4. No page table pages.
1751 * 5. Tlbflush is deferred to calling procedure.
1752 * 6. Page IS managed.
1753 * but is *MUCH* faster than pmap_enter...
1754 */
1755
1756static void
1757pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
1758{
1759	struct ia64_lpte *pte;
1760	pmap_t oldpmap;
1761
1762	pmap_ensure_rid(pmap, va);
1763
1764	oldpmap = pmap_install(pmap);
1765
1766	pte = pmap_find_pte(va);
1767	if (pte->pte_p)
1768		return;
1769
1770	/*
1771	 * Enter on the PV list since its part of our managed memory.
1772	 */
1773	pmap_insert_entry(pmap, va, m);
1774
1775	/*
1776	 * Increment counters
1777	 */
1778	pmap->pm_stats.resident_count++;
1779
1780	/*
1781	 * Initialise PTE with read-only protection and enter into VHPT.
1782	 */
1783	pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m),
1784		     PTE_IG_MANAGED,
1785		     PTE_PL_USER, PTE_AR_R);
1786
1787	pmap_install(oldpmap);
1788}
1789
1790/*
1791 * Make temporary mapping for a physical address. This is called
1792 * during dump.
1793 */
1794void *
1795pmap_kenter_temporary(vm_offset_t pa, int i)
1796{
1797	return (void *) IA64_PHYS_TO_RR7(pa - (i * PAGE_SIZE));
1798}
1799
1800#define MAX_INIT_PT (96)
1801/*
1802 * pmap_object_init_pt preloads the ptes for a given object
1803 * into the specified pmap.  This eliminates the blast of soft
1804 * faults on process startup and immediately after an mmap.
1805 */
1806void
1807pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1808		    vm_object_t object, vm_pindex_t pindex,
1809		    vm_size_t size, int limit)
1810{
1811	pmap_t oldpmap;
1812	vm_offset_t tmpidx;
1813	int psize;
1814	vm_page_t p;
1815	int objpgs;
1816
1817	if (pmap == NULL || object == NULL)
1818		return;
1819
1820	oldpmap = pmap_install(pmap);
1821
1822	psize = ia64_btop(size);
1823
1824	if ((object->type != OBJT_VNODE) ||
1825		((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
1826			(object->resident_page_count > MAX_INIT_PT))) {
1827		pmap_install(oldpmap);
1828		return;
1829	}
1830
1831	if (psize + pindex > object->size) {
1832		if (object->size < pindex)
1833			return;
1834		psize = object->size - pindex;
1835	}
1836
1837	/*
1838	 * if we are processing a major portion of the object, then scan the
1839	 * entire thing.
1840	 */
1841	if (psize > (object->resident_page_count >> 2)) {
1842		objpgs = psize;
1843
1844		for (p = TAILQ_FIRST(&object->memq);
1845		    ((objpgs > 0) && (p != NULL));
1846		    p = TAILQ_NEXT(p, listq)) {
1847
1848			tmpidx = p->pindex;
1849			if (tmpidx < pindex) {
1850				continue;
1851			}
1852			tmpidx -= pindex;
1853			if (tmpidx >= psize) {
1854				continue;
1855			}
1856			/*
1857			 * don't allow an madvise to blow away our really
1858			 * free pages allocating pv entries.
1859			 */
1860			if ((limit & MAP_PREFAULT_MADVISE) &&
1861			    cnt.v_free_count < cnt.v_free_reserved) {
1862				break;
1863			}
1864			vm_page_lock_queues();
1865			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1866				(p->busy == 0) &&
1867			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1868				if ((p->queue - p->pc) == PQ_CACHE)
1869					vm_page_deactivate(p);
1870				vm_page_busy(p);
1871				vm_page_unlock_queues();
1872				pmap_enter_quick(pmap,
1873						 addr + ia64_ptob(tmpidx), p);
1874				vm_page_lock_queues();
1875				vm_page_wakeup(p);
1876			}
1877			vm_page_unlock_queues();
1878			objpgs -= 1;
1879		}
1880	} else {
1881		/*
1882		 * else lookup the pages one-by-one.
1883		 */
1884		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
1885			/*
1886			 * don't allow an madvise to blow away our really
1887			 * free pages allocating pv entries.
1888			 */
1889			if ((limit & MAP_PREFAULT_MADVISE) &&
1890			    cnt.v_free_count < cnt.v_free_reserved) {
1891				break;
1892			}
1893			p = vm_page_lookup(object, tmpidx + pindex);
1894			if (p == NULL)
1895				continue;
1896			vm_page_lock_queues();
1897			if ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL &&
1898				(p->busy == 0) &&
1899			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1900				if ((p->queue - p->pc) == PQ_CACHE)
1901					vm_page_deactivate(p);
1902				vm_page_busy(p);
1903				vm_page_unlock_queues();
1904				pmap_enter_quick(pmap,
1905						 addr + ia64_ptob(tmpidx), p);
1906				vm_page_lock_queues();
1907				vm_page_wakeup(p);
1908			}
1909			vm_page_unlock_queues();
1910		}
1911	}
1912	pmap_install(oldpmap);
1913	return;
1914}
1915
1916/*
1917 * pmap_prefault provides a quick way of clustering
1918 * pagefaults into a processes address space.  It is a "cousin"
1919 * of pmap_object_init_pt, except it runs at page fault time instead
1920 * of mmap time.
1921 */
1922#define PFBAK 4
1923#define PFFOR 4
1924#define PAGEORDER_SIZE (PFBAK+PFFOR)
1925
1926static int pmap_prefault_pageorder[] = {
1927	-1 * PAGE_SIZE, 1 * PAGE_SIZE,
1928	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
1929	-3 * PAGE_SIZE, 3 * PAGE_SIZE,
1930	-4 * PAGE_SIZE, 4 * PAGE_SIZE
1931};
1932
1933void
1934pmap_prefault(pmap, addra, entry)
1935	pmap_t pmap;
1936	vm_offset_t addra;
1937	vm_map_entry_t entry;
1938{
1939	int i;
1940	vm_offset_t starta;
1941	vm_offset_t addr;
1942	vm_pindex_t pindex;
1943	vm_page_t m, mpte;
1944	vm_object_t object;
1945
1946	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)))
1947		return;
1948
1949	object = entry->object.vm_object;
1950
1951	starta = addra - PFBAK * PAGE_SIZE;
1952	if (starta < entry->start) {
1953		starta = entry->start;
1954	} else if (starta > addra) {
1955		starta = 0;
1956	}
1957
1958	mpte = NULL;
1959	for (i = 0; i < PAGEORDER_SIZE; i++) {
1960		vm_object_t lobject;
1961		struct ia64_lpte *pte;
1962
1963		addr = addra + pmap_prefault_pageorder[i];
1964		if (addr > addra + (PFFOR * PAGE_SIZE))
1965			addr = 0;
1966
1967		if (addr < starta || addr >= entry->end)
1968			continue;
1969
1970		pte = pmap_find_vhpt(addr);
1971		if (pte && pte->pte_p)
1972			continue;
1973
1974		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
1975		lobject = object;
1976		for (m = vm_page_lookup(lobject, pindex);
1977		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
1978		    lobject = lobject->backing_object) {
1979			if (lobject->backing_object_offset & PAGE_MASK)
1980				break;
1981			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
1982			m = vm_page_lookup(lobject->backing_object, pindex);
1983		}
1984
1985		/*
1986		 * give-up when a page is not in memory
1987		 */
1988		if (m == NULL)
1989			break;
1990		vm_page_lock_queues();
1991		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1992			(m->busy == 0) &&
1993		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1994
1995			if ((m->queue - m->pc) == PQ_CACHE) {
1996				vm_page_deactivate(m);
1997			}
1998			vm_page_busy(m);
1999			vm_page_unlock_queues();
2000			pmap_enter_quick(pmap, addr, m);
2001			vm_page_lock_queues();
2002			vm_page_wakeup(m);
2003		}
2004		vm_page_unlock_queues();
2005	}
2006}
2007
2008/*
2009 *	Routine:	pmap_change_wiring
2010 *	Function:	Change the wiring attribute for a map/virtual-address
2011 *			pair.
2012 *	In/out conditions:
2013 *			The mapping must already exist in the pmap.
2014 */
2015void
2016pmap_change_wiring(pmap, va, wired)
2017	register pmap_t pmap;
2018	vm_offset_t va;
2019	boolean_t wired;
2020{
2021	pmap_t oldpmap;
2022	struct ia64_lpte *pte;
2023
2024	if (pmap == NULL)
2025		return;
2026
2027	oldpmap = pmap_install(pmap);
2028
2029	pte = pmap_find_vhpt(va);
2030
2031	if (wired && !pmap_pte_w(pte))
2032		pmap->pm_stats.wired_count++;
2033	else if (!wired && pmap_pte_w(pte))
2034		pmap->pm_stats.wired_count--;
2035
2036	/*
2037	 * Wiring is not a hardware characteristic so there is no need to
2038	 * invalidate TLB.
2039	 */
2040	pmap_pte_set_w(pte, wired);
2041
2042	pmap_install(oldpmap);
2043}
2044
2045
2046
2047/*
2048 *	Copy the range specified by src_addr/len
2049 *	from the source map to the range dst_addr/len
2050 *	in the destination map.
2051 *
2052 *	This routine is only advisory and need not do anything.
2053 */
2054
2055void
2056pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2057	  vm_offset_t src_addr)
2058{
2059}
2060
2061
2062/*
2063 *	pmap_zero_page zeros the specified hardware page by
2064 *	mapping it into virtual memory and using bzero to clear
2065 *	its contents.
2066 */
2067
2068void
2069pmap_zero_page(vm_page_t m)
2070{
2071	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2072	bzero((caddr_t) va, PAGE_SIZE);
2073}
2074
2075
2076/*
2077 *	pmap_zero_page_area zeros the specified hardware page by
2078 *	mapping it into virtual memory and using bzero to clear
2079 *	its contents.
2080 *
2081 *	off and size must reside within a single page.
2082 */
2083
2084void
2085pmap_zero_page_area(vm_page_t m, int off, int size)
2086{
2087	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2088	bzero((char *)(caddr_t)va + off, size);
2089}
2090
2091
2092/*
2093 *	pmap_zero_page_idle zeros the specified hardware page by
2094 *	mapping it into virtual memory and using bzero to clear
2095 *	its contents.  This is for the vm_idlezero process.
2096 */
2097
2098void
2099pmap_zero_page_idle(vm_page_t m)
2100{
2101	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2102	bzero((caddr_t) va, PAGE_SIZE);
2103}
2104
2105
2106/*
2107 *	pmap_copy_page copies the specified (machine independent)
2108 *	page by mapping the page into virtual memory and using
2109 *	bcopy to copy the page, one machine dependent page at a
2110 *	time.
2111 */
2112void
2113pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2114{
2115	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
2116	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
2117	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
2118}
2119
2120/*
2121 * Returns true if the pmap's pv is one of the first
2122 * 16 pvs linked to from this page.  This count may
2123 * be changed upwards or downwards in the future; it
2124 * is only necessary that true be returned for a small
2125 * subset of pmaps for proper page aging.
2126 */
2127boolean_t
2128pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2129{
2130	pv_entry_t pv;
2131	int loops = 0;
2132	int s;
2133
2134	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2135		return FALSE;
2136
2137	s = splvm();
2138
2139	/*
2140	 * Not found, check current mappings returning immediately if found.
2141	 */
2142	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2143		if (pv->pv_pmap == pmap) {
2144			splx(s);
2145			return TRUE;
2146		}
2147		loops++;
2148		if (loops >= 16)
2149			break;
2150	}
2151	splx(s);
2152	return (FALSE);
2153}
2154
2155#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2156/*
2157 * Remove all pages from specified address space
2158 * this aids process exit speeds.  Also, this code
2159 * is special cased for current process only, but
2160 * can have the more generic (and slightly slower)
2161 * mode enabled.  This is much faster than pmap_remove
2162 * in the case of running down an entire address space.
2163 */
2164void
2165pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2166{
2167	pv_entry_t pv, npv;
2168	int s;
2169
2170#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2171	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
2172		printf("warning: pmap_remove_pages called with non-current pmap\n");
2173		return;
2174	}
2175#endif
2176
2177	s = splvm();
2178	for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
2179		pv;
2180		pv = npv) {
2181		struct ia64_lpte *pte;
2182
2183		npv = TAILQ_NEXT(pv, pv_plist);
2184
2185		if (pv->pv_va >= eva || pv->pv_va < sva) {
2186			continue;
2187		}
2188
2189		pte = pmap_find_vhpt(pv->pv_va);
2190		if (!pte)
2191			panic("pmap_remove_pages: page on pm_pvlist has no pte\n");
2192
2193
2194/*
2195 * We cannot remove wired pages from a process' mapping at this time
2196 */
2197		if (pte->pte_ig & PTE_IG_WIRED) {
2198			continue;
2199		}
2200
2201		pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
2202	}
2203	splx(s);
2204
2205	pmap_invalidate_all(pmap);
2206}
2207
2208/*
2209 *      pmap_page_protect:
2210 *
2211 *      Lower the permission for all mappings to a given page.
2212 */
2213void
2214pmap_page_protect(vm_page_t m, vm_prot_t prot)
2215{
2216	pv_entry_t pv;
2217
2218	if ((prot & VM_PROT_WRITE) != 0)
2219		return;
2220	if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
2221		if ((m->flags & PG_WRITEABLE) == 0)
2222			return;
2223		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2224			int newprot = pte_prot(pv->pv_pmap, prot);
2225			pmap_t oldpmap = pmap_install(pv->pv_pmap);
2226			struct ia64_lpte *pte;
2227			pte = pmap_find_vhpt(pv->pv_va);
2228			pmap_pte_set_prot(pte, newprot);
2229			pmap_update_vhpt(pte, pv->pv_va);
2230			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2231			pmap_install(oldpmap);
2232		}
2233		vm_page_flag_clear(m, PG_WRITEABLE);
2234	} else {
2235		pmap_remove_all(m);
2236	}
2237}
2238
2239vm_offset_t
2240pmap_phys_address(int ppn)
2241{
2242	return (ia64_ptob(ppn));
2243}
2244
2245/*
2246 *	pmap_ts_referenced:
2247 *
2248 *	Return a count of reference bits for a page, clearing those bits.
2249 *	It is not necessary for every reference bit to be cleared, but it
2250 *	is necessary that 0 only be returned when there are truly no
2251 *	reference bits set.
2252 *
2253 *	XXX: The exact number of bits to check and clear is a matter that
2254 *	should be tested and standardized at some point in the future for
2255 *	optimal aging of shared pages.
2256 */
2257int
2258pmap_ts_referenced(vm_page_t m)
2259{
2260	pv_entry_t pv;
2261	int count = 0;
2262
2263	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2264		return 0;
2265
2266	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2267		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2268		struct ia64_lpte *pte;
2269		pte = pmap_find_vhpt(pv->pv_va);
2270		if (pte->pte_a) {
2271			count++;
2272			pte->pte_a = 0;
2273			pmap_update_vhpt(pte, pv->pv_va);
2274			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2275		}
2276		pmap_install(oldpmap);
2277	}
2278
2279	return count;
2280}
2281
2282#if 0
2283/*
2284 *	pmap_is_referenced:
2285 *
2286 *	Return whether or not the specified physical page was referenced
2287 *	in any physical maps.
2288 */
2289static boolean_t
2290pmap_is_referenced(vm_page_t m)
2291{
2292	pv_entry_t pv;
2293
2294	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2295		return FALSE;
2296
2297	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2298		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2299		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2300		pmap_install(oldpmap);
2301		if (pte->pte_a)
2302			return 1;
2303	}
2304
2305	return 0;
2306}
2307#endif
2308
2309/*
2310 *	pmap_is_modified:
2311 *
2312 *	Return whether or not the specified physical page was modified
2313 *	in any physical maps.
2314 */
2315boolean_t
2316pmap_is_modified(vm_page_t m)
2317{
2318	pv_entry_t pv;
2319
2320	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2321		return FALSE;
2322
2323	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2324		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2325		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2326		pmap_install(oldpmap);
2327		if (pte->pte_d)
2328			return 1;
2329	}
2330
2331	return 0;
2332}
2333
2334/*
2335 *	Clear the modify bits on the specified physical page.
2336 */
2337void
2338pmap_clear_modify(vm_page_t m)
2339{
2340	pv_entry_t pv;
2341
2342	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2343		return;
2344
2345	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2346		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2347		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2348		if (pte->pte_d) {
2349			pte->pte_d = 0;
2350			pmap_update_vhpt(pte, pv->pv_va);
2351			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2352		}
2353		pmap_install(oldpmap);
2354	}
2355}
2356
2357/*
2358 *	pmap_clear_reference:
2359 *
2360 *	Clear the reference bit on the specified physical page.
2361 */
2362void
2363pmap_clear_reference(vm_page_t m)
2364{
2365	pv_entry_t pv;
2366
2367	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2368		return;
2369
2370	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2371		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2372		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2373		if (pte->pte_a) {
2374			pte->pte_a = 0;
2375			pmap_update_vhpt(pte, pv->pv_va);
2376			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2377		}
2378		pmap_install(oldpmap);
2379	}
2380}
2381
2382/*
2383 * Miscellaneous support routines follow
2384 */
2385
2386static void
2387ia64_protection_init()
2388{
2389	int prot, *kp, *up;
2390
2391	kp = protection_codes[0];
2392	up = protection_codes[1];
2393
2394	for (prot = 0; prot < 8; prot++) {
2395		switch (prot) {
2396		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
2397			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2398			*up++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2399			break;
2400
2401		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
2402			*kp++ = (PTE_AR_X_RX << 2) | PTE_PL_KERN;
2403			*up++ = (PTE_AR_X_RX << 2) | PTE_PL_USER;
2404			break;
2405
2406		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
2407			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2408			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2409			break;
2410
2411		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
2412			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2413			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2414			break;
2415
2416		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
2417			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2418			*up++ = (PTE_AR_R << 2) | PTE_PL_USER;
2419			break;
2420
2421		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
2422			*kp++ = (PTE_AR_RX << 2) | PTE_PL_KERN;
2423			*up++ = (PTE_AR_RX << 2) | PTE_PL_USER;
2424			break;
2425
2426		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
2427			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2428			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2429			break;
2430
2431		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
2432			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2433			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2434			break;
2435		}
2436	}
2437}
2438
2439/*
2440 * Map a set of physical memory pages into the kernel virtual
2441 * address space. Return a pointer to where it is mapped. This
2442 * routine is intended to be used for mapping device memory,
2443 * NOT real memory.
2444 */
2445void *
2446pmap_mapdev(vm_offset_t pa, vm_size_t size)
2447{
2448	return (void*) IA64_PHYS_TO_RR6(pa);
2449}
2450
2451/*
2452 * 'Unmap' a range mapped by pmap_mapdev().
2453 */
2454void
2455pmap_unmapdev(vm_offset_t va, vm_size_t size)
2456{
2457	return;
2458}
2459
2460/*
2461 * perform the pmap work for mincore
2462 */
2463int
2464pmap_mincore(pmap_t pmap, vm_offset_t addr)
2465{
2466	pmap_t oldpmap;
2467	struct ia64_lpte *pte;
2468	int val = 0;
2469
2470	oldpmap = pmap_install(pmap);
2471	pte = pmap_find_vhpt(addr);
2472	pmap_install(oldpmap);
2473
2474	if (!pte)
2475		return 0;
2476
2477	if (pmap_pte_v(pte)) {
2478		vm_page_t m;
2479		vm_offset_t pa;
2480
2481		val = MINCORE_INCORE;
2482		if ((pte->pte_ig & PTE_IG_MANAGED) == 0)
2483			return val;
2484
2485		pa = pmap_pte_pa(pte);
2486
2487		m = PHYS_TO_VM_PAGE(pa);
2488
2489		/*
2490		 * Modified by us
2491		 */
2492		if (pte->pte_d)
2493			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2494		/*
2495		 * Modified by someone
2496		 */
2497		else if (pmap_is_modified(m))
2498			val |= MINCORE_MODIFIED_OTHER;
2499		/*
2500		 * Referenced by us
2501		 */
2502		if (pte->pte_a)
2503			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2504
2505		/*
2506		 * Referenced by someone
2507		 */
2508		else if (pmap_ts_referenced(m)) {
2509			val |= MINCORE_REFERENCED_OTHER;
2510			vm_page_flag_set(m, PG_REFERENCED);
2511		}
2512	}
2513	return val;
2514}
2515
2516void
2517pmap_activate(struct thread *td)
2518{
2519	pmap_install(vmspace_pmap(td->td_proc->p_vmspace));
2520}
2521
2522pmap_t
2523pmap_install(pmap_t pmap)
2524{
2525	pmap_t oldpmap;
2526	int i;
2527
2528	critical_enter();
2529
2530	oldpmap = PCPU_GET(current_pmap);
2531
2532	if (pmap == oldpmap || pmap == kernel_pmap) {
2533		critical_exit();
2534		return pmap;
2535	}
2536
2537	if (oldpmap) {
2538		atomic_clear_32(&pmap->pm_active, PCPU_GET(cpumask));
2539	}
2540
2541	PCPU_SET(current_pmap, pmap);
2542	if (!pmap) {
2543		/*
2544		 * RIDs 0..4 have no mappings to make sure we generate
2545		 * page faults on accesses.
2546		 */
2547		ia64_set_rr(IA64_RR_BASE(0), (0 << 8)|(PAGE_SHIFT << 2)|1);
2548		ia64_set_rr(IA64_RR_BASE(1), (1 << 8)|(PAGE_SHIFT << 2)|1);
2549		ia64_set_rr(IA64_RR_BASE(2), (2 << 8)|(PAGE_SHIFT << 2)|1);
2550		ia64_set_rr(IA64_RR_BASE(3), (3 << 8)|(PAGE_SHIFT << 2)|1);
2551		ia64_set_rr(IA64_RR_BASE(4), (4 << 8)|(PAGE_SHIFT << 2)|1);
2552		critical_exit();
2553		return oldpmap;
2554	}
2555
2556	atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask));
2557
2558	for (i = 0; i < 5; i++)
2559		ia64_set_rr(IA64_RR_BASE(i),
2560			    (pmap->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2561
2562	critical_exit();
2563	return oldpmap;
2564}
2565
2566vm_offset_t
2567pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2568{
2569
2570	return addr;
2571}
2572
2573#include "opt_ddb.h"
2574
2575#ifdef DDB
2576
2577#include <ddb/ddb.h>
2578
2579static const char*	psnames[] = {
2580	"1B",	"2B",	"4B",	"8B",
2581	"16B",	"32B",	"64B",	"128B",
2582	"256B",	"512B",	"1K",	"2K",
2583	"4K",	"8K",	"16K",	"32K",
2584	"64K",	"128K",	"256K",	"512K",
2585	"1M",	"2M",	"4M",	"8M",
2586	"16M",	"32M",	"64M",	"128M",
2587	"256M",	"512M",	"1G",	"2G"
2588};
2589
2590static void
2591print_trs(int type)
2592{
2593	struct ia64_pal_result	res;
2594	int			i, maxtr;
2595	struct {
2596		struct ia64_pte	pte;
2597		struct ia64_itir itir;
2598		struct ia64_ifa ifa;
2599		struct ia64_rr	rr;
2600	}			buf;
2601	static const char*	manames[] = {
2602		"WB",	"bad",	"bad",	"bad",
2603		"UC",	"UCE",	"WC",	"NaT",
2604
2605	};
2606
2607	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2608	if (res.pal_status != 0) {
2609		db_printf("Can't get VM summary\n");
2610		return;
2611	}
2612
2613	if (type == 0)
2614		maxtr = (res.pal_result[0] >> 40) & 0xff;
2615	else
2616		maxtr = (res.pal_result[0] >> 32) & 0xff;
2617
2618	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2619	for (i = 0; i <= maxtr; i++) {
2620		bzero(&buf, sizeof(buf));
2621		res = ia64_call_pal_stacked_physical
2622			(PAL_VM_TR_READ, i, type, ia64_tpa((u_int64_t) &buf));
2623		if (!(res.pal_result[0] & 1))
2624			buf.pte.pte_ar = 0;
2625		if (!(res.pal_result[0] & 2))
2626			buf.pte.pte_pl = 0;
2627		if (!(res.pal_result[0] & 4))
2628			buf.pte.pte_d = 0;
2629		if (!(res.pal_result[0] & 8))
2630			buf.pte.pte_ma = 0;
2631		db_printf(
2632			"%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s %d %06x\n",
2633			buf.ifa.ifa_ig & 1,
2634			buf.rr.rr_rid,
2635			buf.ifa.ifa_vpn,
2636			buf.pte.pte_ppn,
2637			psnames[buf.itir.itir_ps],
2638			buf.pte.pte_ed,
2639			buf.pte.pte_ar,
2640			buf.pte.pte_pl,
2641			buf.pte.pte_d,
2642			buf.pte.pte_a,
2643			manames[buf.pte.pte_ma],
2644			buf.pte.pte_p,
2645			buf.itir.itir_key);
2646	}
2647}
2648
2649DB_COMMAND(itr, db_itr)
2650{
2651	print_trs(0);
2652}
2653
2654DB_COMMAND(dtr, db_dtr)
2655{
2656	print_trs(1);
2657}
2658
2659DB_COMMAND(rr, db_rr)
2660{
2661	int i;
2662	u_int64_t t;
2663	struct ia64_rr rr;
2664
2665	printf("RR RID    PgSz VE\n");
2666	for (i = 0; i < 8; i++) {
2667		__asm __volatile ("mov %0=rr[%1]"
2668				  : "=r"(t)
2669				  : "r"(IA64_RR_BASE(i)));
2670		*(u_int64_t *) &rr = t;
2671		printf("%d  %06x %4s %d\n",
2672		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2673	}
2674}
2675
2676DB_COMMAND(thash, db_thash)
2677{
2678	if (!have_addr)
2679		return;
2680
2681	db_printf("%p\n", (void *) ia64_thash(addr));
2682}
2683
2684DB_COMMAND(ttag, db_ttag)
2685{
2686	if (!have_addr)
2687		return;
2688
2689	db_printf("0x%lx\n", ia64_ttag(addr));
2690}
2691
2692#endif
2693