pmap.c revision 107394
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 * $FreeBSD: head/sys/ia64/ia64/pmap.c 107394 2002-11-29 20:10:21Z marcel $
47 */
48
49/*
50 *	Manages physical address maps.
51 *
52 *	In addition to hardware address maps, this
53 *	module is called upon to provide software-use-only
54 *	maps which may or may not be stored in the same
55 *	form as hardware maps.  These pseudo-maps are
56 *	used to store intermediate results from copy
57 *	operations to and from address spaces.
58 *
59 *	Since the information managed by this module is
60 *	also stored by the logical address mapping module,
61 *	this module may throw away valid virtual-to-physical
62 *	mappings at almost any time.  However, invalidations
63 *	of virtual-to-physical mappings must be done as
64 *	requested.
65 *
66 *	In order to cope with hardware architectures which
67 *	make virtual-to-physical map invalidates expensive,
68 *	this module may delay invalidate or reduced protection
69 *	operations until such time as they are actually
70 *	necessary.  This module is given full information as
71 *	to which processors are currently using which maps,
72 *	and to when physical maps must be made correct.
73 */
74
75/*
76 * Following the Linux model, region IDs are allocated in groups of
77 * eight so that a single region ID can be used for as many RRs as we
78 * want by encoding the RR number into the low bits of the ID.
79 *
80 * We reserve region ID 0 for the kernel and allocate the remaining
81 * IDs for user pmaps.
82 *
83 * Region 0..4
84 *	User virtually mapped
85 *
86 * Region 5
87 *	Kernel virtually mapped
88 *
89 * Region 6
90 *	Kernel physically mapped uncacheable
91 *
92 * Region 7
93 *	Kernel physically mapped cacheable
94 */
95
96#include <sys/param.h>
97#include <sys/kernel.h>
98#include <sys/lock.h>
99#include <sys/malloc.h>
100#include <sys/mman.h>
101#include <sys/msgbuf.h>
102#include <sys/mutex.h>
103#include <sys/proc.h>
104#include <sys/sx.h>
105#include <sys/systm.h>
106#include <sys/vmmeter.h>
107#include <sys/smp.h>
108#include <sys/sysctl.h>
109
110#include <vm/vm.h>
111#include <vm/vm_param.h>
112#include <vm/vm_kern.h>
113#include <vm/vm_page.h>
114#include <vm/vm_map.h>
115#include <vm/vm_object.h>
116#include <vm/vm_extern.h>
117#include <vm/vm_pageout.h>
118#include <vm/vm_pager.h>
119#include <vm/uma.h>
120#include <vm/uma_int.h>
121
122#include <sys/user.h>
123
124#include <machine/pal.h>
125#include <machine/md_var.h>
126
127MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
128
129#ifndef PMAP_SHPGPERPROC
130#define PMAP_SHPGPERPROC 200
131#endif
132
133#if defined(DIAGNOSTIC)
134#define PMAP_DIAGNOSTIC
135#endif
136
137#define MINPV 2048	/* Preallocate at least this many */
138#define MAXPV 20480	/* But no more than this */
139
140#if 0
141#define PMAP_DIAGNOSTIC
142#define PMAP_DEBUG
143#endif
144
145#if !defined(PMAP_DIAGNOSTIC)
146#define PMAP_INLINE __inline
147#else
148#define PMAP_INLINE
149#endif
150
151/*
152 * Get PDEs and PTEs for user/kernel address space
153 */
154#define pmap_pte_w(pte)		((pte)->pte_ig & PTE_IG_WIRED)
155#define pmap_pte_managed(pte)	((pte)->pte_ig & PTE_IG_MANAGED)
156#define pmap_pte_v(pte)		((pte)->pte_p)
157#define pmap_pte_pa(pte)	(((pte)->pte_ppn) << 12)
158#define pmap_pte_prot(pte)	(((pte)->pte_ar << 2) | (pte)->pte_pl)
159
160#define pmap_pte_set_w(pte, v) ((v)?((pte)->pte_ig |= PTE_IG_WIRED) \
161				:((pte)->pte_ig &= ~PTE_IG_WIRED))
162#define pmap_pte_set_prot(pte, v) do {		\
163    (pte)->pte_ar = v >> 2;			\
164    (pte)->pte_pl = v & 3;			\
165} while (0)
166
167/*
168 * Given a map and a machine independent protection code,
169 * convert to an ia64 protection code.
170 */
171#define pte_prot(m, p)		(protection_codes[m == kernel_pmap ? 0 : 1][p])
172#define pte_prot_pl(m, p)	(pte_prot(m, p) & 3)
173#define pte_prot_ar(m, p)	(pte_prot(m, p) >> 2)
174int	protection_codes[2][8];
175
176/*
177 * Return non-zero if this pmap is currently active
178 */
179#define pmap_isactive(pmap)	(pmap->pm_active)
180
181/*
182 * Statically allocated kernel pmap
183 */
184struct pmap kernel_pmap_store;
185
186vm_offset_t avail_start;	/* PA of first available physical page */
187vm_offset_t avail_end;		/* PA of last available physical page */
188vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
189vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
190static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
191
192vm_offset_t vhpt_base, vhpt_size;
193
194/*
195 * We use an object to own the kernel's 'page tables'. For simplicity,
196 * we use one page directory to index a set of pages containing
197 * ia64_lptes. This gives us up to 2Gb of kernel virtual space.
198 */
199static vm_object_t kptobj;
200static int nkpt;
201static struct ia64_lpte **kptdir;
202#define KPTE_DIR_INDEX(va) \
203	((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
204#define KPTE_PTE_INDEX(va) \
205	((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
206#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
207
208vm_offset_t kernel_vm_end;
209
210/*
211 * Values for ptc.e. XXX values for SKI.
212 */
213static u_int64_t pmap_ptc_e_base = 0x100000000;
214static u_int64_t pmap_ptc_e_count1 = 3;
215static u_int64_t pmap_ptc_e_count2 = 2;
216static u_int64_t pmap_ptc_e_stride1 = 0x2000;
217static u_int64_t pmap_ptc_e_stride2 = 0x100000000;
218
219/*
220 * Data for the RID allocator
221 */
222static u_int64_t *pmap_ridbusy;
223static int pmap_ridmax, pmap_ridcount;
224struct mtx pmap_ridmutex;
225
226/*
227 * Data for the pv entry allocation mechanism
228 */
229static uma_zone_t pvzone;
230static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
231int pmap_pagedaemon_waken;
232static struct pv_entry *pvbootentries;
233static int pvbootnext, pvbootmax;
234
235/*
236 * Data for allocating PTEs for user processes.
237 */
238static uma_zone_t ptezone;
239
240/*
241 * VHPT instrumentation.
242 */
243static int pmap_vhpt_inserts;
244static int pmap_vhpt_collisions;
245static int pmap_vhpt_resident;
246SYSCTL_DECL(_vm_stats);
247SYSCTL_NODE(_vm_stats, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
248SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
249	   &pmap_vhpt_inserts, 0, "");
250SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, collisions, CTLFLAG_RD,
251	   &pmap_vhpt_collisions, 0, "");
252SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, resident, CTLFLAG_RD,
253	   &pmap_vhpt_resident, 0, "");
254
255static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
256static pv_entry_t get_pv_entry(void);
257static void	ia64_protection_init(void);
258
259static void	pmap_invalidate_all(pmap_t pmap);
260static void	pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m);
261
262vm_offset_t
263pmap_steal_memory(vm_size_t size)
264{
265	vm_size_t bank_size;
266	vm_offset_t pa, va;
267
268	size = round_page(size);
269
270	bank_size = phys_avail[1] - phys_avail[0];
271	while (size > bank_size) {
272		int i;
273		for (i = 0; phys_avail[i+2]; i+= 2) {
274			phys_avail[i] = phys_avail[i+2];
275			phys_avail[i+1] = phys_avail[i+3];
276		}
277		phys_avail[i] = 0;
278		phys_avail[i+1] = 0;
279		if (!phys_avail[0])
280			panic("pmap_steal_memory: out of memory");
281		bank_size = phys_avail[1] - phys_avail[0];
282	}
283
284	pa = phys_avail[0];
285	phys_avail[0] += size;
286
287	va = IA64_PHYS_TO_RR7(pa);
288	bzero((caddr_t) va, size);
289	return va;
290}
291
292/*
293 *	Bootstrap the system enough to run with virtual memory.
294 */
295void
296pmap_bootstrap()
297{
298	int i, j, count, ridbits;
299	struct ia64_pal_result res;
300
301	/*
302	 * Query the PAL Code to find the loop parameters for the
303	 * ptc.e instruction.
304	 */
305	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
306	if (res.pal_status != 0)
307		panic("Can't configure ptc.e parameters");
308	pmap_ptc_e_base = res.pal_result[0];
309	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
310	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
311	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
312	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
313	if (bootverbose)
314		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
315		       "stride1=0x%lx, stride2=0x%lx\n",
316		       pmap_ptc_e_base,
317		       pmap_ptc_e_count1,
318		       pmap_ptc_e_count2,
319		       pmap_ptc_e_stride1,
320		       pmap_ptc_e_stride2);
321
322	/*
323	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
324	 */
325	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
326	if (res.pal_status != 0) {
327		if (bootverbose)
328			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
329		ridbits = 18; /* guaranteed minimum */
330	} else {
331		ridbits = (res.pal_result[1] >> 8) & 0xff;
332		if (bootverbose)
333			printf("Processor supports %d Region ID bits\n",
334			       ridbits);
335	}
336	pmap_ridmax = (1 << ridbits);
337	pmap_ridcount = 8;
338	pmap_ridbusy = (u_int64_t *)
339		pmap_steal_memory(pmap_ridmax / 8);
340	bzero(pmap_ridbusy, pmap_ridmax / 8);
341	pmap_ridbusy[0] |= 0xff;
342	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
343
344	/*
345	 * Allocate some memory for initial kernel 'page tables'.
346	 */
347	kptdir = (struct ia64_lpte **) pmap_steal_memory(PAGE_SIZE);
348	for (i = 0; i < NKPT; i++) {
349		kptdir[i] = (struct ia64_lpte *) pmap_steal_memory(PAGE_SIZE);
350	}
351	nkpt = NKPT;
352
353	avail_start = phys_avail[0];
354	for (i = 0; phys_avail[i+2]; i+= 2) ;
355	avail_end = phys_avail[i+1];
356	count = i+2;
357
358	/*
359	 * Figure out a useful size for the VHPT, based on the size of
360	 * physical memory and try to locate a region which is large
361	 * enough to contain the VHPT (which must be a power of two in
362	 * size and aligned to a natural boundary).
363	 * Don't use the difference between avail_start and avail_end
364	 * as a measure for memory size. The address space is often
365	 * enough sparse, causing us to (try to) create a huge VHPT.
366	 */
367	vhpt_size = 15;
368	while ((1<<vhpt_size) < ia64_btop(Maxmem) * 32)
369		vhpt_size++;
370
371	vhpt_base = 0;
372	while (!vhpt_base) {
373		vm_offset_t mask;
374		if (bootverbose)
375			printf("Trying VHPT size 0x%lx\n", (1L<<vhpt_size));
376		mask = (1L << vhpt_size) - 1;
377		for (i = 0; i < count; i += 2) {
378			vm_offset_t base, limit;
379			base = (phys_avail[i] + mask) & ~mask;
380			limit = base + (1L << vhpt_size);
381			if (limit <= phys_avail[i+1])
382				/*
383				 * VHPT can fit in this region
384				 */
385				break;
386		}
387		if (!phys_avail[i]) {
388			/*
389			 * Can't fit, try next smaller size.
390			 */
391			vhpt_size--;
392		} else {
393			vhpt_base = (phys_avail[i] + mask) & ~mask;
394		}
395	}
396	if (vhpt_size < 15)
397		panic("Can't find space for VHPT");
398
399	if (bootverbose)
400		printf("Putting VHPT at %p\n", (void *) vhpt_base);
401	if (vhpt_base != phys_avail[i]) {
402		/*
403		 * Split this region.
404		 */
405		if (bootverbose)
406			printf("Splitting [%p-%p]\n",
407			       (void *) phys_avail[i],
408			       (void *) phys_avail[i+1]);
409		for (j = count; j > i; j -= 2) {
410			phys_avail[j] = phys_avail[j-2];
411			phys_avail[j+1] = phys_avail[j-2+1];
412		}
413		phys_avail[count+2] = 0;
414		phys_avail[count+3] = 0;
415		phys_avail[i+1] = vhpt_base;
416		phys_avail[i+2] = vhpt_base + (1L << vhpt_size);
417	} else {
418		phys_avail[i] = vhpt_base + (1L << vhpt_size);
419	}
420
421	vhpt_base = IA64_PHYS_TO_RR7(vhpt_base);
422	bzero((void *) vhpt_base, (1L << vhpt_size));
423	__asm __volatile("mov cr.pta=%0;; srlz.i;;"
424			 :: "r" (vhpt_base + (1<<8) + (vhpt_size<<2) + 1));
425
426	virtual_avail = IA64_RR_BASE(5);
427	virtual_end = IA64_RR_BASE(6)-1;
428
429	/*
430	 * Initialize protection array.
431	 */
432	ia64_protection_init();
433
434	/*
435	 * Initialize the kernel pmap (which is statically allocated).
436	 */
437	for (i = 0; i < 5; i++)
438		kernel_pmap->pm_rid[i] = 0;
439	kernel_pmap->pm_active = 1;
440	TAILQ_INIT(&kernel_pmap->pm_pvlist);
441	PCPU_SET(current_pmap, kernel_pmap);
442
443	/*
444	 * Region 5 is mapped via the vhpt.
445	 */
446	ia64_set_rr(IA64_RR_BASE(5),
447		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
448
449	/*
450	 * Region 6 is direct mapped UC and region 7 is direct mapped
451	 * WC. The details of this is controlled by the Alt {I,D}TLB
452	 * handlers. Here we just make sure that they have the largest
453	 * possible page size to minimise TLB usage.
454	 */
455	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (28 << 2));
456	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (28 << 2));
457
458	/*
459	 * Reserve some memory for allocating pvs while bootstrapping
460	 * the pv allocator. We need to have enough to cover mapping
461	 * the kmem_alloc region used to allocate the initial_pvs in
462	 * pmap_init. In general, the size of this region is
463	 * approximately (# physical pages) * (size of pv entry).
464	 */
465	pvbootmax = ((physmem * sizeof(struct pv_entry)) >> PAGE_SHIFT) + 128;
466	pvbootentries = (struct pv_entry *)
467		pmap_steal_memory(pvbootmax * sizeof(struct pv_entry));
468	pvbootnext = 0;
469
470	/*
471	 * Clear out any random TLB entries left over from booting.
472	 */
473	pmap_invalidate_all(kernel_pmap);
474}
475
476void *
477uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
478{
479	static vm_pindex_t color;
480	vm_page_t m;
481	int pflags;
482	void *va;
483
484	*flags = UMA_SLAB_PRIV;
485	if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT)
486		pflags = VM_ALLOC_INTERRUPT;
487	else
488		pflags = VM_ALLOC_SYSTEM;
489	if (wait & M_ZERO)
490		pflags |= VM_ALLOC_ZERO;
491	m = vm_page_alloc(NULL, color++, pflags | VM_ALLOC_NOOBJ);
492	if (m) {
493		va = (void *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
494		if ((m->flags & PG_ZERO) == 0)
495			bzero(va, PAGE_SIZE);
496		return (va);
497	}
498	return (NULL);
499}
500
501void
502uma_small_free(void *mem, int size, u_int8_t flags)
503{
504	vm_page_t m;
505
506	m = PHYS_TO_VM_PAGE(IA64_RR_MASK((u_int64_t)mem));
507	vm_page_lock_queues();
508	vm_page_free(m);
509	vm_page_unlock_queues();
510}
511
512/*
513 *	Initialize the pmap module.
514 *	Called by vm_init, to initialize any structures that the pmap
515 *	system needs to map virtual memory.
516 *	pmap_init has been enhanced to support in a fairly consistant
517 *	way, discontiguous physical memory.
518 */
519void
520pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
521{
522	int i;
523	int initial_pvs;
524
525	/*
526	 * Allocate memory for random pmap data structures.  Includes the
527	 * pv_head_table.
528	 */
529
530	for(i = 0; i < vm_page_array_size; i++) {
531		vm_page_t m;
532
533		m = &vm_page_array[i];
534		TAILQ_INIT(&m->md.pv_list);
535		m->md.pv_list_count = 0;
536 	}
537
538	/*
539	 * Init the pv free list and the PTE free list.
540	 */
541	initial_pvs = vm_page_array_size;
542	if (initial_pvs < MINPV)
543		initial_pvs = MINPV;
544	if (initial_pvs > MAXPV)
545		initial_pvs = MAXPV;
546	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry),
547	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
548	uma_prealloc(pvzone, initial_pvs);
549
550	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
551	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
552	uma_prealloc(ptezone, initial_pvs);
553
554	/*
555	 * Create the object for the kernel's page tables.
556	 */
557	kptobj = vm_object_allocate(OBJT_DEFAULT, MAXKPT);
558
559	/*
560	 * Now it is safe to enable pv_table recording.
561	 */
562	pmap_initialized = TRUE;
563}
564
565/*
566 * Initialize the address space (zone) for the pv_entries.  Set a
567 * high water mark so that the system can recover from excessive
568 * numbers of pv entries.
569 */
570void
571pmap_init2()
572{
573	int shpgperproc = PMAP_SHPGPERPROC;
574
575	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
576	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
577	pv_entry_high_water = 9 * (pv_entry_max / 10);
578}
579
580
581/***************************************************
582 * Manipulate TLBs for a pmap
583 ***************************************************/
584
585static void
586pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
587{
588	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
589		("invalidating TLB for non-current pmap"));
590	ia64_ptc_g(va, PAGE_SHIFT << 2);
591}
592
593static void
594pmap_invalidate_all_1(void *arg)
595{
596	u_int64_t addr;
597	int i, j;
598	register_t psr;
599
600	psr = intr_disable();
601	addr = pmap_ptc_e_base;
602	for (i = 0; i < pmap_ptc_e_count1; i++) {
603		for (j = 0; j < pmap_ptc_e_count2; j++) {
604			ia64_ptc_e(addr);
605			addr += pmap_ptc_e_stride2;
606		}
607		addr += pmap_ptc_e_stride1;
608	}
609	intr_restore(psr);
610}
611
612static void
613pmap_invalidate_all(pmap_t pmap)
614{
615	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
616		("invalidating TLB for non-current pmap"));
617
618
619#ifdef SMP
620	smp_rendezvous(0, pmap_invalidate_all_1, 0, 0);
621#else
622	pmap_invalidate_all_1(0);
623#endif
624}
625
626static u_int32_t
627pmap_allocate_rid(void)
628{
629	int rid;
630
631	if (pmap_ridcount == pmap_ridmax)
632		panic("pmap_allocate_rid: All Region IDs used");
633
634	do {
635		rid = arc4random() & (pmap_ridmax - 1);
636	} while (pmap_ridbusy[rid / 64] & (1L << (rid & 63)));
637	pmap_ridbusy[rid / 64] |= (1L << (rid & 63));
638	pmap_ridcount++;
639
640	return rid;
641}
642
643static void
644pmap_free_rid(u_int32_t rid)
645{
646	mtx_lock(&pmap_ridmutex);
647	pmap_ridbusy[rid / 64] &= ~(1L << (rid & 63));
648	pmap_ridcount--;
649	mtx_unlock(&pmap_ridmutex);
650}
651
652static void
653pmap_ensure_rid(pmap_t pmap, vm_offset_t va)
654{
655	int rr;
656
657	rr = va >> 61;
658
659	/*
660	 * We get called for virtual addresses that may just as well be
661	 * kernel addresses (ie region 5, 6 or 7). Since the pm_rid field
662	 * only holds region IDs for user regions, we have to make sure
663	 * the region is within bounds.
664	 */
665	if (rr >= 5)
666		return;
667
668	if (pmap->pm_rid[rr])
669		return;
670
671	mtx_lock(&pmap_ridmutex);
672	pmap->pm_rid[rr] = pmap_allocate_rid();
673	if (pmap == PCPU_GET(current_pmap))
674		ia64_set_rr(IA64_RR_BASE(rr),
675			    (pmap->pm_rid[rr] << 8)|(PAGE_SHIFT << 2)|1);
676	mtx_unlock(&pmap_ridmutex);
677}
678
679/***************************************************
680 * Low level helper routines.....
681 ***************************************************/
682
683/*
684 * Install a pte into the VHPT
685 */
686static PMAP_INLINE void
687pmap_install_pte(struct ia64_lpte *vhpte, struct ia64_lpte *pte)
688{
689	u_int64_t *vhp, *p;
690
691	/* invalidate the pte */
692	atomic_set_64(&vhpte->pte_tag, 1L << 63);
693	ia64_mf();			/* make sure everyone sees */
694
695	vhp = (u_int64_t *) vhpte;
696	p = (u_int64_t *) pte;
697
698	vhp[0] = p[0];
699	vhp[1] = p[1];
700	vhp[2] = p[2];			/* sets ti to one */
701
702	ia64_mf();
703}
704
705/*
706 * Compare essential parts of pte.
707 */
708static PMAP_INLINE int
709pmap_equal_pte(struct ia64_lpte *pte1, struct ia64_lpte *pte2)
710{
711	return *(u_int64_t *) pte1 == *(u_int64_t *) pte2;
712}
713
714/*
715 * this routine defines the region(s) of memory that should
716 * not be tested for the modified bit.
717 */
718static PMAP_INLINE int
719pmap_track_modified(vm_offset_t va)
720{
721	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
722		return 1;
723	else
724		return 0;
725}
726
727#ifndef KSTACK_MAX_PAGES
728#define KSTACK_MAX_PAGES 32
729#endif
730
731/*
732 * Create the KSTACK for a new thread.
733 * This routine directly affects the fork perf for a process/thread.
734 */
735void
736pmap_new_thread(struct thread *td, int pages)
737{
738	vm_offset_t *ks;
739
740	/* Bounds check */
741	if (pages <= 1)
742		pages = KSTACK_PAGES;
743	else if (pages > KSTACK_MAX_PAGES)
744		pages = KSTACK_MAX_PAGES;
745
746	/*
747	 * Use contigmalloc for user area so that we can use a region
748	 * 7 address for it which makes it impossible to accidentally
749	 * lose when recording a trapframe.
750	 */
751	ks = contigmalloc(pages * PAGE_SIZE, M_PMAP, M_WAITOK, 0ul,
752	    256*1024*1024 - 1, PAGE_SIZE, 256*1024*1024);
753	if (ks == NULL)
754		panic("pmap_new_thread: could not contigmalloc %d pages\n",
755		    pages);
756
757	td->td_md.md_kstackvirt = ks;
758	td->td_kstack = IA64_PHYS_TO_RR7(ia64_tpa((u_int64_t)ks));
759	td->td_kstack_pages = pages;
760}
761
762/*
763 * Dispose the KSTACK for a thread that has exited.
764 * This routine directly impacts the exit perf of a process/thread.
765 */
766void
767pmap_dispose_thread(struct thread *td)
768{
769	int pages;
770
771	pages = td->td_kstack_pages;
772	contigfree(td->td_md.md_kstackvirt, pages * PAGE_SIZE, M_PMAP);
773	td->td_md.md_kstackvirt = NULL;
774	td->td_kstack = 0;
775}
776
777/*
778 * Set up a variable sized alternate kstack.  This appears to be MI.
779 */
780void
781pmap_new_altkstack(struct thread *td, int pages)
782{
783
784	/*
785	 * Shuffle the original stack. Save the virtual kstack address
786	 * instead of the physical address because 1) we can derive the
787	 * physical address from the virtual address and 2) we need the
788	 * virtual address in pmap_dispose_thread.
789	 */
790	td->td_altkstack_obj = td->td_kstack_obj;
791	td->td_altkstack = (vm_offset_t)td->td_md.md_kstackvirt;
792	td->td_altkstack_pages = td->td_kstack_pages;
793
794	pmap_new_thread(td, pages);
795}
796
797void
798pmap_dispose_altkstack(struct thread *td)
799{
800
801	pmap_dispose_thread(td);
802
803	/*
804	 * Restore the original kstack. Note that td_altkstack holds the
805	 * virtual kstack address of the previous kstack.
806	 */
807	td->td_md.md_kstackvirt = (void*)td->td_altkstack;
808	td->td_kstack = IA64_PHYS_TO_RR7(ia64_tpa(td->td_altkstack));
809	td->td_kstack_obj = td->td_altkstack_obj;
810	td->td_kstack_pages = td->td_altkstack_pages;
811	td->td_altkstack = 0;
812	td->td_altkstack_obj = NULL;
813	td->td_altkstack_pages = 0;
814}
815
816/*
817 * Allow the KSTACK for a thread to be prejudicially paged out.
818 */
819void
820pmap_swapout_thread(struct thread *td)
821{
822}
823
824/*
825 * Bring the KSTACK for a specified thread back in.
826 */
827void
828pmap_swapin_thread(struct thread *td)
829{
830}
831
832/***************************************************
833 * Page table page management routines.....
834 ***************************************************/
835
836void
837pmap_pinit0(struct pmap *pmap)
838{
839	/* kernel_pmap is the same as any other pmap. */
840	pmap_pinit(pmap);
841}
842
843/*
844 * Initialize a preallocated and zeroed pmap structure,
845 * such as one in a vmspace structure.
846 */
847void
848pmap_pinit(struct pmap *pmap)
849{
850	int i;
851
852	pmap->pm_flags = 0;
853	for (i = 0; i < 5; i++)
854		pmap->pm_rid[i] = 0;
855	pmap->pm_ptphint = NULL;
856	pmap->pm_active = 0;
857	TAILQ_INIT(&pmap->pm_pvlist);
858	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
859}
860
861/*
862 * Wire in kernel global address entries.  To avoid a race condition
863 * between pmap initialization and pmap_growkernel, this procedure
864 * should be called after the vmspace is attached to the process
865 * but before this pmap is activated.
866 */
867void
868pmap_pinit2(struct pmap *pmap)
869{
870}
871
872/***************************************************
873* Pmap allocation/deallocation routines.
874 ***************************************************/
875
876/*
877 * Release any resources held by the given physical map.
878 * Called when a pmap initialized by pmap_pinit is being released.
879 * Should only be called if the map contains no valid mappings.
880 */
881void
882pmap_release(pmap_t pmap)
883{
884	int i;
885
886	for (i = 0; i < 5; i++)
887		if (pmap->pm_rid[i])
888			pmap_free_rid(pmap->pm_rid[i]);
889}
890
891/*
892 * grow the number of kernel page table entries, if needed
893 */
894void
895pmap_growkernel(vm_offset_t addr)
896{
897	struct ia64_lpte *ptepage;
898	vm_page_t nkpg;
899
900	if (kernel_vm_end == 0) {
901		kernel_vm_end = nkpt * PAGE_SIZE * NKPTEPG
902			+ IA64_RR_BASE(5);
903	}
904	addr = (addr + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
905	while (kernel_vm_end < addr) {
906		if (kptdir[KPTE_DIR_INDEX(kernel_vm_end)]) {
907			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG)
908				& ~(PAGE_SIZE * NKPTEPG - 1);
909			continue;
910		}
911
912		/*
913		 * We could handle more by increasing the size of kptdir.
914		 */
915		if (nkpt == MAXKPT)
916			panic("pmap_growkernel: out of kernel address space");
917
918		/*
919		 * This index is bogus, but out of the way
920		 */
921		nkpg = vm_page_alloc(kptobj, nkpt,
922		    VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
923		if (!nkpg)
924			panic("pmap_growkernel: no memory to grow kernel");
925
926		nkpt++;
927		ptepage = (struct ia64_lpte *)
928			IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
929		bzero(ptepage, PAGE_SIZE);
930		kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage;
931
932		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
933	}
934}
935
936/***************************************************
937 * page management routines.
938 ***************************************************/
939
940/*
941 * free the pv_entry back to the free list
942 */
943static PMAP_INLINE void
944free_pv_entry(pv_entry_t pv)
945{
946	pv_entry_count--;
947	uma_zfree(pvzone, pv);
948}
949
950/*
951 * get a new pv_entry, allocating a block from the system
952 * when needed.
953 * the memory allocation is performed bypassing the malloc code
954 * because of the possibility of allocations at interrupt time.
955 */
956static pv_entry_t
957get_pv_entry(void)
958{
959	pv_entry_count++;
960	if (pv_entry_high_water &&
961		(pv_entry_count > pv_entry_high_water) &&
962		(pmap_pagedaemon_waken == 0)) {
963		pmap_pagedaemon_waken = 1;
964		wakeup (&vm_pages_needed);
965	}
966	return uma_zalloc(pvzone, M_NOWAIT);
967}
968
969/*
970 * Add an ia64_lpte to the VHPT.
971 */
972static void
973pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
974{
975	struct ia64_lpte *vhpte;
976
977	pmap_vhpt_inserts++;
978	pmap_vhpt_resident++;
979
980	vhpte = (struct ia64_lpte *) ia64_thash(va);
981
982	if (vhpte->pte_chain)
983		pmap_vhpt_collisions++;
984
985	pte->pte_chain = vhpte->pte_chain;
986	vhpte->pte_chain = ia64_tpa((vm_offset_t) pte);
987
988	if (!vhpte->pte_p && pte->pte_p)
989		pmap_install_pte(vhpte, pte);
990	else
991		ia64_mf();
992}
993
994/*
995 * Update VHPT after a pte has changed.
996 */
997static void
998pmap_update_vhpt(struct ia64_lpte *pte, vm_offset_t va)
999{
1000	struct ia64_lpte *vhpte;
1001
1002	vhpte = (struct ia64_lpte *) ia64_thash(va);
1003
1004	if ((!vhpte->pte_p || vhpte->pte_tag == pte->pte_tag)
1005	    && pte->pte_p)
1006		pmap_install_pte(vhpte, pte);
1007}
1008
1009/*
1010 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
1011 * worked or an appropriate error code otherwise.
1012 */
1013static int
1014pmap_remove_vhpt(vm_offset_t va)
1015{
1016	struct ia64_lpte *pte;
1017	struct ia64_lpte *lpte;
1018	struct ia64_lpte *vhpte;
1019	u_int64_t tag;
1020	int error = ENOENT;
1021
1022	vhpte = (struct ia64_lpte *) ia64_thash(va);
1023
1024	/*
1025	 * If the VHPTE is invalid, there can't be a collision chain.
1026	 */
1027	if (!vhpte->pte_p) {
1028		KASSERT(!vhpte->pte_chain, ("bad vhpte"));
1029		printf("can't remove vhpt entry for 0x%lx\n", va);
1030		goto done;
1031	}
1032
1033	lpte = vhpte;
1034	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(vhpte->pte_chain);
1035	tag = ia64_ttag(va);
1036
1037	while (pte->pte_tag != tag) {
1038		lpte = pte;
1039		if (pte->pte_chain)
1040			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1041		else {
1042			printf("can't remove vhpt entry for 0x%lx\n", va);
1043			goto done;
1044		}
1045	}
1046
1047	/*
1048	 * Snip this pv_entry out of the collision chain.
1049	 */
1050	lpte->pte_chain = pte->pte_chain;
1051
1052	/*
1053	 * If the VHPTE matches as well, change it to map the first
1054	 * element from the chain if there is one.
1055	 */
1056	if (vhpte->pte_tag == tag) {
1057		if (vhpte->pte_chain) {
1058			pte = (struct ia64_lpte *)
1059				IA64_PHYS_TO_RR7(vhpte->pte_chain);
1060			pmap_install_pte(vhpte, pte);
1061		} else {
1062			vhpte->pte_p = 0;
1063			ia64_mf();
1064		}
1065	}
1066
1067	pmap_vhpt_resident--;
1068	error = 0;
1069 done:
1070	return error;
1071}
1072
1073/*
1074 * Find the ia64_lpte for the given va, if any.
1075 */
1076static struct ia64_lpte *
1077pmap_find_vhpt(vm_offset_t va)
1078{
1079	struct ia64_lpte *pte;
1080	u_int64_t tag;
1081
1082	pte = (struct ia64_lpte *) ia64_thash(va);
1083	if (!pte->pte_chain) {
1084		pte = 0;
1085		goto done;
1086	}
1087
1088	tag = ia64_ttag(va);
1089	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1090
1091	while (pte->pte_tag != tag) {
1092		if (pte->pte_chain) {
1093			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1094		} else {
1095			pte = 0;
1096			break;
1097		}
1098	}
1099
1100 done:
1101	return pte;
1102}
1103
1104/*
1105 * Remove an entry from the list of managed mappings.
1106 */
1107static int
1108pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1109{
1110	if (!pv) {
1111		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1112			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1113				if (pmap == pv->pv_pmap && va == pv->pv_va)
1114					break;
1115			}
1116		} else {
1117			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1118				if (va == pv->pv_va)
1119					break;
1120			}
1121		}
1122	}
1123
1124	if (pv) {
1125		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1126		m->md.pv_list_count--;
1127		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1128			vm_page_flag_clear(m, PG_WRITEABLE);
1129
1130		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1131		free_pv_entry(pv);
1132		return 0;
1133	} else {
1134		return ENOENT;
1135	}
1136}
1137
1138/*
1139 * Create a pv entry for page at pa for
1140 * (pmap, va).
1141 */
1142static void
1143pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1144{
1145	pv_entry_t pv;
1146
1147	pv = get_pv_entry();
1148	pv->pv_pmap = pmap;
1149	pv->pv_va = va;
1150
1151	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1152	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1153	m->md.pv_list_count++;
1154}
1155
1156/*
1157 *	Routine:	pmap_extract
1158 *	Function:
1159 *		Extract the physical page address associated
1160 *		with the given map/virtual_address pair.
1161 */
1162vm_offset_t
1163pmap_extract(pmap, va)
1164	register pmap_t pmap;
1165	vm_offset_t va;
1166{
1167	pmap_t oldpmap;
1168	vm_offset_t pa;
1169
1170	oldpmap = pmap_install(pmap);
1171	pa = ia64_tpa(va);
1172	pmap_install(oldpmap);
1173	return pa;
1174}
1175
1176/***************************************************
1177 * Low level mapping routines.....
1178 ***************************************************/
1179
1180/*
1181 * Find the kernel lpte for mapping the given virtual address, which
1182 * must be in the part of region 5 which we can cover with our kernel
1183 * 'page tables'.
1184 */
1185static struct ia64_lpte *
1186pmap_find_kpte(vm_offset_t va)
1187{
1188	KASSERT((va >> 61) == 5,
1189		("kernel mapping 0x%lx not in region 5", va));
1190	KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG),
1191		("kernel mapping 0x%lx out of range", va));
1192	return &kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)];
1193}
1194
1195/*
1196 * Find a pte suitable for mapping a user-space address. If one exists
1197 * in the VHPT, that one will be returned, otherwise a new pte is
1198 * allocated.
1199 */
1200static struct ia64_lpte *
1201pmap_find_pte(vm_offset_t va)
1202{
1203	struct ia64_lpte *pte;
1204
1205	if (va >= VM_MAXUSER_ADDRESS)
1206		return pmap_find_kpte(va);
1207
1208	pte = pmap_find_vhpt(va);
1209	if (!pte) {
1210		pte = uma_zalloc(ptezone, M_WAITOK);
1211		pte->pte_p = 0;
1212	}
1213	return pte;
1214}
1215
1216/*
1217 * Free a pte which is now unused. This simply returns it to the zone
1218 * allocator if it is a user mapping. For kernel mappings, clear the
1219 * valid bit to make it clear that the mapping is not currently used.
1220 */
1221static void
1222pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1223{
1224	if (va < VM_MAXUSER_ADDRESS)
1225		uma_zfree(ptezone, pte);
1226	else
1227		pte->pte_p = 0;
1228}
1229
1230/*
1231 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1232 * the pte was orginally valid, then its assumed to already be in the
1233 * VHPT.
1234 */
1235static void
1236pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1237	     int ig, int pl, int ar)
1238{
1239	int wasvalid = pte->pte_p;
1240
1241	pte->pte_p = 1;
1242	pte->pte_ma = PTE_MA_WB;
1243	if (ig & PTE_IG_MANAGED) {
1244		pte->pte_a = 0;
1245		pte->pte_d = 0;
1246	} else {
1247		pte->pte_a = 1;
1248		pte->pte_d = 1;
1249	}
1250	pte->pte_pl = pl;
1251	pte->pte_ar = ar;
1252	pte->pte_ppn = pa >> 12;
1253	pte->pte_ed = 0;
1254	pte->pte_ig = ig;
1255
1256	pte->pte_ps = PAGE_SHIFT;
1257	pte->pte_key = 0;
1258
1259	pte->pte_tag = ia64_ttag(va);
1260
1261	if (wasvalid) {
1262		pmap_update_vhpt(pte, va);
1263	} else {
1264		pmap_enter_vhpt(pte, va);
1265	}
1266}
1267
1268/*
1269 * If a pte contains a valid mapping, clear it and update the VHPT.
1270 */
1271static void
1272pmap_clear_pte(struct ia64_lpte *pte, vm_offset_t va)
1273{
1274	if (pte->pte_p) {
1275		pmap_remove_vhpt(va);
1276		ia64_ptc_g(va, PAGE_SHIFT << 2);
1277		pte->pte_p = 0;
1278	}
1279}
1280
1281/*
1282 * Remove the (possibly managed) mapping represented by pte from the
1283 * given pmap.
1284 */
1285static int
1286pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1287		pv_entry_t pv, int freepte)
1288{
1289	int error;
1290	vm_page_t m;
1291
1292	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1293		("removing pte for non-current pmap"));
1294
1295	/*
1296	 * First remove from the VHPT.
1297	 */
1298	error = pmap_remove_vhpt(va);
1299	if (error)
1300		return error;
1301
1302	/*
1303	 * Make sure pmap_set_pte() knows it isn't in the VHPT.
1304	 */
1305	pte->pte_p = 0;
1306
1307	if (pte->pte_ig & PTE_IG_WIRED)
1308		pmap->pm_stats.wired_count -= 1;
1309
1310	pmap->pm_stats.resident_count -= 1;
1311	if (pte->pte_ig & PTE_IG_MANAGED) {
1312		m = PHYS_TO_VM_PAGE(pmap_pte_pa(pte));
1313		if (pte->pte_d)
1314			if (pmap_track_modified(va))
1315				vm_page_dirty(m);
1316		if (pte->pte_a)
1317			vm_page_flag_set(m, PG_REFERENCED);
1318
1319		if (freepte)
1320			pmap_free_pte(pte, va);
1321		return pmap_remove_entry(pmap, m, va, pv);
1322	} else {
1323		if (freepte)
1324			pmap_free_pte(pte, va);
1325		return 0;
1326	}
1327}
1328
1329/*
1330 * Add a list of wired pages to the kva
1331 * this routine is only used for temporary
1332 * kernel mappings that do not need to have
1333 * page modification or references recorded.
1334 * Note that old mappings are simply written
1335 * over.  The page *must* be wired.
1336 */
1337void
1338pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1339{
1340	int i;
1341	struct ia64_lpte *pte;
1342
1343	for (i = 0; i < count; i++) {
1344		vm_offset_t tva = va + i * PAGE_SIZE;
1345		int wasvalid;
1346		pte = pmap_find_kpte(tva);
1347		wasvalid = pte->pte_p;
1348		pmap_set_pte(pte, tva, VM_PAGE_TO_PHYS(m[i]),
1349			     0, PTE_PL_KERN, PTE_AR_RWX);
1350		if (wasvalid)
1351			ia64_ptc_g(tva, PAGE_SHIFT << 2);
1352	}
1353}
1354
1355/*
1356 * this routine jerks page mappings from the
1357 * kernel -- it is meant only for temporary mappings.
1358 */
1359void
1360pmap_qremove(vm_offset_t va, int count)
1361{
1362	int i;
1363	struct ia64_lpte *pte;
1364
1365	for (i = 0; i < count; i++) {
1366		pte = pmap_find_kpte(va);
1367		pmap_clear_pte(pte, va);
1368		va += PAGE_SIZE;
1369	}
1370}
1371
1372/*
1373 * Add a wired page to the kva.
1374 */
1375void
1376pmap_kenter(vm_offset_t va, vm_offset_t pa)
1377{
1378	struct ia64_lpte *pte;
1379	int wasvalid;
1380
1381	pte = pmap_find_kpte(va);
1382	wasvalid = pte->pte_p;
1383	pmap_set_pte(pte, va, pa, 0, PTE_PL_KERN, PTE_AR_RWX);
1384	if (wasvalid)
1385		ia64_ptc_g(va, PAGE_SHIFT << 2);
1386}
1387
1388/*
1389 * Remove a page from the kva
1390 */
1391void
1392pmap_kremove(vm_offset_t va)
1393{
1394	struct ia64_lpte *pte;
1395
1396	pte = pmap_find_kpte(va);
1397	pmap_clear_pte(pte, va);
1398}
1399
1400/*
1401 *	Used to map a range of physical addresses into kernel
1402 *	virtual address space.
1403 *
1404 *	The value passed in '*virt' is a suggested virtual address for
1405 *	the mapping. Architectures which can support a direct-mapped
1406 *	physical to virtual region can return the appropriate address
1407 *	within that region, leaving '*virt' unchanged. Other
1408 *	architectures should map the pages starting at '*virt' and
1409 *	update '*virt' with the first usable address after the mapped
1410 *	region.
1411 */
1412vm_offset_t
1413pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1414{
1415	return IA64_PHYS_TO_RR7(start);
1416}
1417
1418/*
1419 * Remove a single page from a process address space
1420 */
1421static void
1422pmap_remove_page(pmap_t pmap, vm_offset_t va)
1423{
1424	struct ia64_lpte *pte;
1425
1426	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1427		("removing page for non-current pmap"));
1428
1429	pte = pmap_find_vhpt(va);
1430	if (pte) {
1431		pmap_remove_pte(pmap, pte, va, 0, 1);
1432		pmap_invalidate_page(pmap, va);
1433	}
1434	return;
1435}
1436
1437/*
1438 *	Remove the given range of addresses from the specified map.
1439 *
1440 *	It is assumed that the start and end are properly
1441 *	rounded to the page size.
1442 */
1443void
1444pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1445{
1446	pmap_t oldpmap;
1447	vm_offset_t va;
1448	pv_entry_t pv;
1449	struct ia64_lpte *pte;
1450
1451	if (pmap == NULL)
1452		return;
1453
1454	if (pmap->pm_stats.resident_count == 0)
1455		return;
1456
1457	oldpmap = pmap_install(pmap);
1458
1459	/*
1460	 * special handling of removing one page.  a very
1461	 * common operation and easy to short circuit some
1462	 * code.
1463	 */
1464	if (sva + PAGE_SIZE == eva) {
1465		pmap_remove_page(pmap, sva);
1466		pmap_install(oldpmap);
1467		return;
1468	}
1469
1470	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1471		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1472			va = pv->pv_va;
1473			if (va >= sva && va < eva) {
1474				pte = pmap_find_vhpt(va);
1475				pmap_remove_pte(pmap, pte, va, pv, 1);
1476				pmap_invalidate_page(pmap, va);
1477			}
1478		}
1479
1480	} else {
1481		for (va = sva; va < eva; va = va += PAGE_SIZE) {
1482			pte = pmap_find_vhpt(va);
1483			if (pte) {
1484				pmap_remove_pte(pmap, pte, va, 0, 1);
1485				pmap_invalidate_page(pmap, va);
1486			}
1487		}
1488	}
1489
1490	pmap_install(oldpmap);
1491}
1492
1493/*
1494 *	Routine:	pmap_remove_all
1495 *	Function:
1496 *		Removes this physical page from
1497 *		all physical maps in which it resides.
1498 *		Reflects back modify bits to the pager.
1499 *
1500 *	Notes:
1501 *		Original versions of this routine were very
1502 *		inefficient because they iteratively called
1503 *		pmap_remove (slow...)
1504 */
1505
1506void
1507pmap_remove_all(vm_page_t m)
1508{
1509	pmap_t oldpmap;
1510	pv_entry_t pv;
1511	int s;
1512
1513#if defined(PMAP_DIAGNOSTIC)
1514	/*
1515	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1516	 * pages!
1517	 */
1518	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1519		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1520	}
1521#endif
1522
1523	s = splvm();
1524
1525	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1526		struct ia64_lpte *pte;
1527		pmap_t pmap = pv->pv_pmap;
1528		vm_offset_t va = pv->pv_va;
1529
1530		oldpmap = pmap_install(pmap);
1531		pte = pmap_find_vhpt(va);
1532		if (pmap_pte_pa(pte) != VM_PAGE_TO_PHYS(m))
1533			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1534		pmap_remove_pte(pmap, pte, va, pv, 1);
1535		pmap_invalidate_page(pmap, va);
1536		pmap_install(oldpmap);
1537	}
1538
1539	vm_page_flag_clear(m, PG_WRITEABLE);
1540
1541	splx(s);
1542	return;
1543}
1544
1545/*
1546 *	Set the physical protection on the
1547 *	specified range of this map as requested.
1548 */
1549void
1550pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1551{
1552	pmap_t oldpmap;
1553	struct ia64_lpte *pte;
1554	int newprot;
1555
1556	if (pmap == NULL)
1557		return;
1558
1559	oldpmap = pmap_install(pmap);
1560
1561	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1562		pmap_remove(pmap, sva, eva);
1563		pmap_install(oldpmap);
1564		return;
1565	}
1566
1567	if (prot & VM_PROT_WRITE) {
1568		pmap_install(oldpmap);
1569		return;
1570	}
1571
1572	newprot = pte_prot(pmap, prot);
1573
1574	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1575		panic("pmap_protect: unaligned addresses");
1576
1577	while (sva < eva) {
1578		/*
1579		 * If page is invalid, skip this page
1580		 */
1581		pte = pmap_find_vhpt(sva);
1582		if (!pte) {
1583			sva += PAGE_SIZE;
1584			continue;
1585		}
1586
1587		if (pmap_pte_prot(pte) != newprot) {
1588			if (pte->pte_ig & PTE_IG_MANAGED) {
1589				vm_offset_t pa = pmap_pte_pa(pte);
1590				vm_page_t m = PHYS_TO_VM_PAGE(pa);
1591				if (pte->pte_d) {
1592					if (pmap_track_modified(sva))
1593						vm_page_dirty(m);
1594					pte->pte_d = 0;
1595				}
1596				if (pte->pte_a) {
1597					vm_page_flag_set(m, PG_REFERENCED);
1598					pte->pte_a = 0;
1599				}
1600			}
1601			pmap_pte_set_prot(pte, newprot);
1602			pmap_update_vhpt(pte, sva);
1603			pmap_invalidate_page(pmap, sva);
1604		}
1605
1606		sva += PAGE_SIZE;
1607	}
1608	pmap_install(oldpmap);
1609}
1610
1611/*
1612 *	Insert the given physical page (p) at
1613 *	the specified virtual address (v) in the
1614 *	target physical map with the protection requested.
1615 *
1616 *	If specified, the page will be wired down, meaning
1617 *	that the related pte can not be reclaimed.
1618 *
1619 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1620 *	or lose information.  That is, this routine must actually
1621 *	insert this page into the given map NOW.
1622 */
1623void
1624pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1625	   boolean_t wired)
1626{
1627	pmap_t oldpmap;
1628	vm_offset_t pa;
1629	vm_offset_t opa;
1630	struct ia64_lpte origpte;
1631	struct ia64_lpte *pte;
1632	int managed;
1633
1634	if (pmap == NULL)
1635		return;
1636
1637	pmap_ensure_rid(pmap, va);
1638
1639	oldpmap = pmap_install(pmap);
1640
1641	va &= ~PAGE_MASK;
1642#ifdef PMAP_DIAGNOSTIC
1643	if (va > VM_MAX_KERNEL_ADDRESS)
1644		panic("pmap_enter: toobig");
1645#endif
1646
1647	/*
1648	 * Find (or create) a pte for the given mapping.
1649	 */
1650	pte = pmap_find_pte(va);
1651	origpte = *pte;
1652
1653	if (origpte.pte_p)
1654		opa = pmap_pte_pa(&origpte);
1655	else
1656		opa = 0;
1657	managed = 0;
1658
1659	pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK;
1660
1661	/*
1662	 * Mapping has not changed, must be protection or wiring change.
1663	 */
1664	if (origpte.pte_p && (opa == pa)) {
1665		/*
1666		 * Wiring change, just update stats. We don't worry about
1667		 * wiring PT pages as they remain resident as long as there
1668		 * are valid mappings in them. Hence, if a user page is wired,
1669		 * the PT page will be also.
1670		 */
1671		if (wired && ((origpte.pte_ig & PTE_IG_WIRED) == 0))
1672			pmap->pm_stats.wired_count++;
1673		else if (!wired && (origpte.pte_ig & PTE_IG_WIRED))
1674			pmap->pm_stats.wired_count--;
1675
1676		/*
1677		 * We might be turning off write access to the page,
1678		 * so we go ahead and sense modify status.
1679		 */
1680		if (origpte.pte_ig & PTE_IG_MANAGED) {
1681			if (origpte.pte_d && pmap_track_modified(va)) {
1682				vm_page_t om;
1683				om = PHYS_TO_VM_PAGE(opa);
1684				vm_page_dirty(om);
1685			}
1686		}
1687
1688		managed = origpte.pte_ig & PTE_IG_MANAGED;
1689		goto validate;
1690	}
1691	/*
1692	 * Mapping has changed, invalidate old range and fall
1693	 * through to handle validating new mapping.
1694	 */
1695	if (opa) {
1696		int error;
1697		error = pmap_remove_pte(pmap, pte, va, 0, 0);
1698		if (error)
1699			panic("pmap_enter: pte vanished, va: 0x%lx", va);
1700	}
1701
1702	/*
1703	 * Enter on the PV list if part of our managed memory.
1704	 */
1705	if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
1706		pmap_insert_entry(pmap, va, m);
1707		managed |= PTE_IG_MANAGED;
1708	}
1709
1710	/*
1711	 * Increment counters
1712	 */
1713	pmap->pm_stats.resident_count++;
1714	if (wired)
1715		pmap->pm_stats.wired_count++;
1716
1717validate:
1718
1719	/*
1720	 * Now validate mapping with desired protection/wiring. This
1721	 * adds the pte to the VHPT if necessary.
1722	 */
1723	pmap_set_pte(pte, va, pa, managed | (wired ? PTE_IG_WIRED : 0),
1724		     pte_prot_pl(pmap, prot), pte_prot_ar(pmap, prot));
1725
1726	/*
1727	 * if the mapping or permission bits are different, we need
1728	 * to invalidate the page.
1729	 */
1730	if (!pmap_equal_pte(&origpte, pte))
1731		pmap_invalidate_page(pmap, va);
1732
1733	pmap_install(oldpmap);
1734}
1735
1736/*
1737 * this code makes some *MAJOR* assumptions:
1738 * 1. Current pmap & pmap exists.
1739 * 2. Not wired.
1740 * 3. Read access.
1741 * 4. No page table pages.
1742 * 5. Tlbflush is deferred to calling procedure.
1743 * 6. Page IS managed.
1744 * but is *MUCH* faster than pmap_enter...
1745 */
1746
1747static void
1748pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
1749{
1750	struct ia64_lpte *pte;
1751	pmap_t oldpmap;
1752
1753	pmap_ensure_rid(pmap, va);
1754
1755	oldpmap = pmap_install(pmap);
1756
1757	pte = pmap_find_pte(va);
1758	if (pte->pte_p)
1759		return;
1760
1761	/*
1762	 * Enter on the PV list since its part of our managed memory.
1763	 */
1764	pmap_insert_entry(pmap, va, m);
1765
1766	/*
1767	 * Increment counters
1768	 */
1769	pmap->pm_stats.resident_count++;
1770
1771	/*
1772	 * Initialise PTE with read-only protection and enter into VHPT.
1773	 */
1774	pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m),
1775		     PTE_IG_MANAGED,
1776		     PTE_PL_USER, PTE_AR_R);
1777
1778	pmap_install(oldpmap);
1779}
1780
1781/*
1782 * Make temporary mapping for a physical address. This is called
1783 * during dump.
1784 */
1785void *
1786pmap_kenter_temporary(vm_offset_t pa, int i)
1787{
1788	return (void *) IA64_PHYS_TO_RR7(pa - (i * PAGE_SIZE));
1789}
1790
1791#define MAX_INIT_PT (96)
1792/*
1793 * pmap_object_init_pt preloads the ptes for a given object
1794 * into the specified pmap.  This eliminates the blast of soft
1795 * faults on process startup and immediately after an mmap.
1796 */
1797void
1798pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1799		    vm_object_t object, vm_pindex_t pindex,
1800		    vm_size_t size, int limit)
1801{
1802	pmap_t oldpmap;
1803	vm_offset_t tmpidx;
1804	int psize;
1805	vm_page_t p;
1806	int objpgs;
1807
1808	if (pmap == NULL || object == NULL)
1809		return;
1810
1811	oldpmap = pmap_install(pmap);
1812
1813	psize = ia64_btop(size);
1814
1815	if ((object->type != OBJT_VNODE) ||
1816		((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
1817			(object->resident_page_count > MAX_INIT_PT))) {
1818		pmap_install(oldpmap);
1819		return;
1820	}
1821
1822	if (psize + pindex > object->size) {
1823		if (object->size < pindex)
1824			return;
1825		psize = object->size - pindex;
1826	}
1827
1828	/*
1829	 * if we are processing a major portion of the object, then scan the
1830	 * entire thing.
1831	 */
1832	if (psize > (object->resident_page_count >> 2)) {
1833		objpgs = psize;
1834
1835		for (p = TAILQ_FIRST(&object->memq);
1836		    ((objpgs > 0) && (p != NULL));
1837		    p = TAILQ_NEXT(p, listq)) {
1838
1839			tmpidx = p->pindex;
1840			if (tmpidx < pindex) {
1841				continue;
1842			}
1843			tmpidx -= pindex;
1844			if (tmpidx >= psize) {
1845				continue;
1846			}
1847			/*
1848			 * don't allow an madvise to blow away our really
1849			 * free pages allocating pv entries.
1850			 */
1851			if ((limit & MAP_PREFAULT_MADVISE) &&
1852			    cnt.v_free_count < cnt.v_free_reserved) {
1853				break;
1854			}
1855			vm_page_lock_queues();
1856			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1857				(p->busy == 0) &&
1858			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1859				if ((p->queue - p->pc) == PQ_CACHE)
1860					vm_page_deactivate(p);
1861				vm_page_busy(p);
1862				vm_page_unlock_queues();
1863				pmap_enter_quick(pmap,
1864						 addr + ia64_ptob(tmpidx), p);
1865				vm_page_lock_queues();
1866				vm_page_wakeup(p);
1867			}
1868			vm_page_unlock_queues();
1869			objpgs -= 1;
1870		}
1871	} else {
1872		/*
1873		 * else lookup the pages one-by-one.
1874		 */
1875		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
1876			/*
1877			 * don't allow an madvise to blow away our really
1878			 * free pages allocating pv entries.
1879			 */
1880			if ((limit & MAP_PREFAULT_MADVISE) &&
1881			    cnt.v_free_count < cnt.v_free_reserved) {
1882				break;
1883			}
1884			p = vm_page_lookup(object, tmpidx + pindex);
1885			if (p == NULL)
1886				continue;
1887			vm_page_lock_queues();
1888			if ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL &&
1889				(p->busy == 0) &&
1890			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1891				if ((p->queue - p->pc) == PQ_CACHE)
1892					vm_page_deactivate(p);
1893				vm_page_busy(p);
1894				vm_page_unlock_queues();
1895				pmap_enter_quick(pmap,
1896						 addr + ia64_ptob(tmpidx), p);
1897				vm_page_lock_queues();
1898				vm_page_wakeup(p);
1899			}
1900			vm_page_unlock_queues();
1901		}
1902	}
1903	pmap_install(oldpmap);
1904	return;
1905}
1906
1907/*
1908 * pmap_prefault provides a quick way of clustering
1909 * pagefaults into a processes address space.  It is a "cousin"
1910 * of pmap_object_init_pt, except it runs at page fault time instead
1911 * of mmap time.
1912 */
1913#define PFBAK 4
1914#define PFFOR 4
1915#define PAGEORDER_SIZE (PFBAK+PFFOR)
1916
1917static int pmap_prefault_pageorder[] = {
1918	-1 * PAGE_SIZE, 1 * PAGE_SIZE,
1919	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
1920	-3 * PAGE_SIZE, 3 * PAGE_SIZE,
1921	-4 * PAGE_SIZE, 4 * PAGE_SIZE
1922};
1923
1924void
1925pmap_prefault(pmap, addra, entry)
1926	pmap_t pmap;
1927	vm_offset_t addra;
1928	vm_map_entry_t entry;
1929{
1930	int i;
1931	vm_offset_t starta;
1932	vm_offset_t addr;
1933	vm_pindex_t pindex;
1934	vm_page_t m, mpte;
1935	vm_object_t object;
1936
1937	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)))
1938		return;
1939
1940	object = entry->object.vm_object;
1941
1942	starta = addra - PFBAK * PAGE_SIZE;
1943	if (starta < entry->start) {
1944		starta = entry->start;
1945	} else if (starta > addra) {
1946		starta = 0;
1947	}
1948
1949	mpte = NULL;
1950	for (i = 0; i < PAGEORDER_SIZE; i++) {
1951		vm_object_t lobject;
1952		struct ia64_lpte *pte;
1953
1954		addr = addra + pmap_prefault_pageorder[i];
1955		if (addr > addra + (PFFOR * PAGE_SIZE))
1956			addr = 0;
1957
1958		if (addr < starta || addr >= entry->end)
1959			continue;
1960
1961		pte = pmap_find_vhpt(addr);
1962		if (pte && pte->pte_p)
1963			continue;
1964
1965		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
1966		lobject = object;
1967		for (m = vm_page_lookup(lobject, pindex);
1968		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
1969		    lobject = lobject->backing_object) {
1970			if (lobject->backing_object_offset & PAGE_MASK)
1971				break;
1972			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
1973			m = vm_page_lookup(lobject->backing_object, pindex);
1974		}
1975
1976		/*
1977		 * give-up when a page is not in memory
1978		 */
1979		if (m == NULL)
1980			break;
1981		vm_page_lock_queues();
1982		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1983			(m->busy == 0) &&
1984		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1985
1986			if ((m->queue - m->pc) == PQ_CACHE) {
1987				vm_page_deactivate(m);
1988			}
1989			vm_page_busy(m);
1990			vm_page_unlock_queues();
1991			pmap_enter_quick(pmap, addr, m);
1992			vm_page_lock_queues();
1993			vm_page_wakeup(m);
1994		}
1995		vm_page_unlock_queues();
1996	}
1997}
1998
1999/*
2000 *	Routine:	pmap_change_wiring
2001 *	Function:	Change the wiring attribute for a map/virtual-address
2002 *			pair.
2003 *	In/out conditions:
2004 *			The mapping must already exist in the pmap.
2005 */
2006void
2007pmap_change_wiring(pmap, va, wired)
2008	register pmap_t pmap;
2009	vm_offset_t va;
2010	boolean_t wired;
2011{
2012	pmap_t oldpmap;
2013	struct ia64_lpte *pte;
2014
2015	if (pmap == NULL)
2016		return;
2017
2018	oldpmap = pmap_install(pmap);
2019
2020	pte = pmap_find_vhpt(va);
2021
2022	if (wired && !pmap_pte_w(pte))
2023		pmap->pm_stats.wired_count++;
2024	else if (!wired && pmap_pte_w(pte))
2025		pmap->pm_stats.wired_count--;
2026
2027	/*
2028	 * Wiring is not a hardware characteristic so there is no need to
2029	 * invalidate TLB.
2030	 */
2031	pmap_pte_set_w(pte, wired);
2032
2033	pmap_install(oldpmap);
2034}
2035
2036
2037
2038/*
2039 *	Copy the range specified by src_addr/len
2040 *	from the source map to the range dst_addr/len
2041 *	in the destination map.
2042 *
2043 *	This routine is only advisory and need not do anything.
2044 */
2045
2046void
2047pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2048	  vm_offset_t src_addr)
2049{
2050}
2051
2052
2053/*
2054 *	pmap_zero_page zeros the specified hardware page by
2055 *	mapping it into virtual memory and using bzero to clear
2056 *	its contents.
2057 */
2058
2059void
2060pmap_zero_page(vm_page_t m)
2061{
2062	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2063	bzero((caddr_t) va, PAGE_SIZE);
2064}
2065
2066
2067/*
2068 *	pmap_zero_page_area zeros the specified hardware page by
2069 *	mapping it into virtual memory and using bzero to clear
2070 *	its contents.
2071 *
2072 *	off and size must reside within a single page.
2073 */
2074
2075void
2076pmap_zero_page_area(vm_page_t m, int off, int size)
2077{
2078	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2079	bzero((char *)(caddr_t)va + off, size);
2080}
2081
2082
2083/*
2084 *	pmap_zero_page_idle zeros the specified hardware page by
2085 *	mapping it into virtual memory and using bzero to clear
2086 *	its contents.  This is for the vm_idlezero process.
2087 */
2088
2089void
2090pmap_zero_page_idle(vm_page_t m)
2091{
2092	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2093	bzero((caddr_t) va, PAGE_SIZE);
2094}
2095
2096
2097/*
2098 *	pmap_copy_page copies the specified (machine independent)
2099 *	page by mapping the page into virtual memory and using
2100 *	bcopy to copy the page, one machine dependent page at a
2101 *	time.
2102 */
2103void
2104pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2105{
2106	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
2107	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
2108	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
2109}
2110
2111/*
2112 * Returns true if the pmap's pv is one of the first
2113 * 16 pvs linked to from this page.  This count may
2114 * be changed upwards or downwards in the future; it
2115 * is only necessary that true be returned for a small
2116 * subset of pmaps for proper page aging.
2117 */
2118boolean_t
2119pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2120{
2121	pv_entry_t pv;
2122	int loops = 0;
2123	int s;
2124
2125	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2126		return FALSE;
2127
2128	s = splvm();
2129
2130	/*
2131	 * Not found, check current mappings returning immediately if found.
2132	 */
2133	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2134		if (pv->pv_pmap == pmap) {
2135			splx(s);
2136			return TRUE;
2137		}
2138		loops++;
2139		if (loops >= 16)
2140			break;
2141	}
2142	splx(s);
2143	return (FALSE);
2144}
2145
2146#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2147/*
2148 * Remove all pages from specified address space
2149 * this aids process exit speeds.  Also, this code
2150 * is special cased for current process only, but
2151 * can have the more generic (and slightly slower)
2152 * mode enabled.  This is much faster than pmap_remove
2153 * in the case of running down an entire address space.
2154 */
2155void
2156pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2157{
2158	pv_entry_t pv, npv;
2159	int s;
2160
2161#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2162	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
2163		printf("warning: pmap_remove_pages called with non-current pmap\n");
2164		return;
2165	}
2166#endif
2167
2168	s = splvm();
2169	for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
2170		pv;
2171		pv = npv) {
2172		struct ia64_lpte *pte;
2173
2174		npv = TAILQ_NEXT(pv, pv_plist);
2175
2176		if (pv->pv_va >= eva || pv->pv_va < sva) {
2177			continue;
2178		}
2179
2180		pte = pmap_find_vhpt(pv->pv_va);
2181		if (!pte)
2182			panic("pmap_remove_pages: page on pm_pvlist has no pte\n");
2183
2184
2185/*
2186 * We cannot remove wired pages from a process' mapping at this time
2187 */
2188		if (pte->pte_ig & PTE_IG_WIRED) {
2189			continue;
2190		}
2191
2192		pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
2193	}
2194	splx(s);
2195
2196	pmap_invalidate_all(pmap);
2197}
2198
2199/*
2200 *      pmap_page_protect:
2201 *
2202 *      Lower the permission for all mappings to a given page.
2203 */
2204void
2205pmap_page_protect(vm_page_t m, vm_prot_t prot)
2206{
2207	pv_entry_t pv;
2208
2209	if ((prot & VM_PROT_WRITE) != 0)
2210		return;
2211	if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
2212		if ((m->flags & PG_WRITEABLE) == 0)
2213			return;
2214		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2215			int newprot = pte_prot(pv->pv_pmap, prot);
2216			pmap_t oldpmap = pmap_install(pv->pv_pmap);
2217			struct ia64_lpte *pte;
2218			pte = pmap_find_vhpt(pv->pv_va);
2219			pmap_pte_set_prot(pte, newprot);
2220			pmap_update_vhpt(pte, pv->pv_va);
2221			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2222			pmap_install(oldpmap);
2223		}
2224		vm_page_flag_clear(m, PG_WRITEABLE);
2225	} else {
2226		pmap_remove_all(m);
2227	}
2228}
2229
2230vm_offset_t
2231pmap_phys_address(int ppn)
2232{
2233	return (ia64_ptob(ppn));
2234}
2235
2236/*
2237 *	pmap_ts_referenced:
2238 *
2239 *	Return a count of reference bits for a page, clearing those bits.
2240 *	It is not necessary for every reference bit to be cleared, but it
2241 *	is necessary that 0 only be returned when there are truly no
2242 *	reference bits set.
2243 *
2244 *	XXX: The exact number of bits to check and clear is a matter that
2245 *	should be tested and standardized at some point in the future for
2246 *	optimal aging of shared pages.
2247 */
2248int
2249pmap_ts_referenced(vm_page_t m)
2250{
2251	pv_entry_t pv;
2252	int count = 0;
2253
2254	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2255		return 0;
2256
2257	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2258		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2259		struct ia64_lpte *pte;
2260		pte = pmap_find_vhpt(pv->pv_va);
2261		if (pte->pte_a) {
2262			count++;
2263			pte->pte_a = 0;
2264			pmap_update_vhpt(pte, pv->pv_va);
2265			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2266		}
2267		pmap_install(oldpmap);
2268	}
2269
2270	return count;
2271}
2272
2273#if 0
2274/*
2275 *	pmap_is_referenced:
2276 *
2277 *	Return whether or not the specified physical page was referenced
2278 *	in any physical maps.
2279 */
2280static boolean_t
2281pmap_is_referenced(vm_page_t m)
2282{
2283	pv_entry_t pv;
2284
2285	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2286		return FALSE;
2287
2288	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2289		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2290		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2291		pmap_install(oldpmap);
2292		if (pte->pte_a)
2293			return 1;
2294	}
2295
2296	return 0;
2297}
2298#endif
2299
2300/*
2301 *	pmap_is_modified:
2302 *
2303 *	Return whether or not the specified physical page was modified
2304 *	in any physical maps.
2305 */
2306boolean_t
2307pmap_is_modified(vm_page_t m)
2308{
2309	pv_entry_t pv;
2310
2311	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2312		return FALSE;
2313
2314	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2315		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2316		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2317		pmap_install(oldpmap);
2318		if (pte->pte_d)
2319			return 1;
2320	}
2321
2322	return 0;
2323}
2324
2325/*
2326 *	Clear the modify bits on the specified physical page.
2327 */
2328void
2329pmap_clear_modify(vm_page_t m)
2330{
2331	pv_entry_t pv;
2332
2333	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2334		return;
2335
2336	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2337		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2338		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2339		if (pte->pte_d) {
2340			pte->pte_d = 0;
2341			pmap_update_vhpt(pte, pv->pv_va);
2342			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2343		}
2344		pmap_install(oldpmap);
2345	}
2346}
2347
2348/*
2349 *	pmap_clear_reference:
2350 *
2351 *	Clear the reference bit on the specified physical page.
2352 */
2353void
2354pmap_clear_reference(vm_page_t m)
2355{
2356	pv_entry_t pv;
2357
2358	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2359		return;
2360
2361	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2362		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2363		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2364		if (pte->pte_a) {
2365			pte->pte_a = 0;
2366			pmap_update_vhpt(pte, pv->pv_va);
2367			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2368		}
2369		pmap_install(oldpmap);
2370	}
2371}
2372
2373/*
2374 * Miscellaneous support routines follow
2375 */
2376
2377static void
2378ia64_protection_init()
2379{
2380	int prot, *kp, *up;
2381
2382	kp = protection_codes[0];
2383	up = protection_codes[1];
2384
2385	for (prot = 0; prot < 8; prot++) {
2386		switch (prot) {
2387		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
2388			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2389			*up++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2390			break;
2391
2392		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
2393			*kp++ = (PTE_AR_X_RX << 2) | PTE_PL_KERN;
2394			*up++ = (PTE_AR_X_RX << 2) | PTE_PL_USER;
2395			break;
2396
2397		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
2398			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2399			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2400			break;
2401
2402		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
2403			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2404			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2405			break;
2406
2407		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
2408			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2409			*up++ = (PTE_AR_R << 2) | PTE_PL_USER;
2410			break;
2411
2412		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
2413			*kp++ = (PTE_AR_RX << 2) | PTE_PL_KERN;
2414			*up++ = (PTE_AR_RX << 2) | PTE_PL_USER;
2415			break;
2416
2417		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
2418			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2419			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2420			break;
2421
2422		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
2423			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2424			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2425			break;
2426		}
2427	}
2428}
2429
2430/*
2431 * Map a set of physical memory pages into the kernel virtual
2432 * address space. Return a pointer to where it is mapped. This
2433 * routine is intended to be used for mapping device memory,
2434 * NOT real memory.
2435 */
2436void *
2437pmap_mapdev(vm_offset_t pa, vm_size_t size)
2438{
2439	return (void*) IA64_PHYS_TO_RR6(pa);
2440}
2441
2442/*
2443 * 'Unmap' a range mapped by pmap_mapdev().
2444 */
2445void
2446pmap_unmapdev(vm_offset_t va, vm_size_t size)
2447{
2448	return;
2449}
2450
2451/*
2452 * perform the pmap work for mincore
2453 */
2454int
2455pmap_mincore(pmap_t pmap, vm_offset_t addr)
2456{
2457	pmap_t oldpmap;
2458	struct ia64_lpte *pte;
2459	int val = 0;
2460
2461	oldpmap = pmap_install(pmap);
2462	pte = pmap_find_vhpt(addr);
2463	pmap_install(oldpmap);
2464
2465	if (!pte)
2466		return 0;
2467
2468	if (pmap_pte_v(pte)) {
2469		vm_page_t m;
2470		vm_offset_t pa;
2471
2472		val = MINCORE_INCORE;
2473		if ((pte->pte_ig & PTE_IG_MANAGED) == 0)
2474			return val;
2475
2476		pa = pmap_pte_pa(pte);
2477
2478		m = PHYS_TO_VM_PAGE(pa);
2479
2480		/*
2481		 * Modified by us
2482		 */
2483		if (pte->pte_d)
2484			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2485		/*
2486		 * Modified by someone
2487		 */
2488		else if (pmap_is_modified(m))
2489			val |= MINCORE_MODIFIED_OTHER;
2490		/*
2491		 * Referenced by us
2492		 */
2493		if (pte->pte_a)
2494			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2495
2496		/*
2497		 * Referenced by someone
2498		 */
2499		else if (pmap_ts_referenced(m)) {
2500			val |= MINCORE_REFERENCED_OTHER;
2501			vm_page_flag_set(m, PG_REFERENCED);
2502		}
2503	}
2504	return val;
2505}
2506
2507void
2508pmap_activate(struct thread *td)
2509{
2510	pmap_install(vmspace_pmap(td->td_proc->p_vmspace));
2511}
2512
2513pmap_t
2514pmap_install(pmap_t pmap)
2515{
2516	pmap_t oldpmap;
2517	int i;
2518
2519	critical_enter();
2520
2521	oldpmap = PCPU_GET(current_pmap);
2522
2523	if (pmap == oldpmap || pmap == kernel_pmap) {
2524		critical_exit();
2525		return pmap;
2526	}
2527
2528	if (oldpmap) {
2529		atomic_clear_32(&pmap->pm_active, PCPU_GET(cpumask));
2530	}
2531
2532	PCPU_SET(current_pmap, pmap);
2533	if (!pmap) {
2534		/*
2535		 * RIDs 0..4 have no mappings to make sure we generate
2536		 * page faults on accesses.
2537		 */
2538		ia64_set_rr(IA64_RR_BASE(0), (0 << 8)|(PAGE_SHIFT << 2)|1);
2539		ia64_set_rr(IA64_RR_BASE(1), (1 << 8)|(PAGE_SHIFT << 2)|1);
2540		ia64_set_rr(IA64_RR_BASE(2), (2 << 8)|(PAGE_SHIFT << 2)|1);
2541		ia64_set_rr(IA64_RR_BASE(3), (3 << 8)|(PAGE_SHIFT << 2)|1);
2542		ia64_set_rr(IA64_RR_BASE(4), (4 << 8)|(PAGE_SHIFT << 2)|1);
2543		critical_exit();
2544		return oldpmap;
2545	}
2546
2547	atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask));
2548
2549	for (i = 0; i < 5; i++)
2550		ia64_set_rr(IA64_RR_BASE(i),
2551			    (pmap->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2552
2553	critical_exit();
2554	return oldpmap;
2555}
2556
2557vm_offset_t
2558pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2559{
2560
2561	return addr;
2562}
2563
2564#include "opt_ddb.h"
2565
2566#ifdef DDB
2567
2568#include <ddb/ddb.h>
2569
2570static const char*	psnames[] = {
2571	"1B",	"2B",	"4B",	"8B",
2572	"16B",	"32B",	"64B",	"128B",
2573	"256B",	"512B",	"1K",	"2K",
2574	"4K",	"8K",	"16K",	"32K",
2575	"64K",	"128K",	"256K",	"512K",
2576	"1M",	"2M",	"4M",	"8M",
2577	"16M",	"32M",	"64M",	"128M",
2578	"256M",	"512M",	"1G",	"2G"
2579};
2580
2581static void
2582print_trs(int type)
2583{
2584	struct ia64_pal_result	res;
2585	int			i, maxtr;
2586	struct {
2587		struct ia64_pte	pte;
2588		struct ia64_itir itir;
2589		struct ia64_ifa ifa;
2590		struct ia64_rr	rr;
2591	}			buf;
2592	static const char*	manames[] = {
2593		"WB",	"bad",	"bad",	"bad",
2594		"UC",	"UCE",	"WC",	"NaT",
2595
2596	};
2597
2598	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2599	if (res.pal_status != 0) {
2600		db_printf("Can't get VM summary\n");
2601		return;
2602	}
2603
2604	if (type == 0)
2605		maxtr = (res.pal_result[0] >> 40) & 0xff;
2606	else
2607		maxtr = (res.pal_result[0] >> 32) & 0xff;
2608
2609	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2610	for (i = 0; i <= maxtr; i++) {
2611		bzero(&buf, sizeof(buf));
2612		res = ia64_call_pal_stacked_physical
2613			(PAL_VM_TR_READ, i, type, ia64_tpa((u_int64_t) &buf));
2614		if (!(res.pal_result[0] & 1))
2615			buf.pte.pte_ar = 0;
2616		if (!(res.pal_result[0] & 2))
2617			buf.pte.pte_pl = 0;
2618		if (!(res.pal_result[0] & 4))
2619			buf.pte.pte_d = 0;
2620		if (!(res.pal_result[0] & 8))
2621			buf.pte.pte_ma = 0;
2622		db_printf(
2623			"%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s %d %06x\n",
2624			buf.ifa.ifa_ig & 1,
2625			buf.rr.rr_rid,
2626			buf.ifa.ifa_vpn,
2627			buf.pte.pte_ppn,
2628			psnames[buf.itir.itir_ps],
2629			buf.pte.pte_ed,
2630			buf.pte.pte_ar,
2631			buf.pte.pte_pl,
2632			buf.pte.pte_d,
2633			buf.pte.pte_a,
2634			manames[buf.pte.pte_ma],
2635			buf.pte.pte_p,
2636			buf.itir.itir_key);
2637	}
2638}
2639
2640DB_COMMAND(itr, db_itr)
2641{
2642	print_trs(0);
2643}
2644
2645DB_COMMAND(dtr, db_dtr)
2646{
2647	print_trs(1);
2648}
2649
2650DB_COMMAND(rr, db_rr)
2651{
2652	int i;
2653	u_int64_t t;
2654	struct ia64_rr rr;
2655
2656	printf("RR RID    PgSz VE\n");
2657	for (i = 0; i < 8; i++) {
2658		__asm __volatile ("mov %0=rr[%1]"
2659				  : "=r"(t)
2660				  : "r"(IA64_RR_BASE(i)));
2661		*(u_int64_t *) &rr = t;
2662		printf("%d  %06x %4s %d\n",
2663		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2664	}
2665}
2666
2667DB_COMMAND(thash, db_thash)
2668{
2669	if (!have_addr)
2670		return;
2671
2672	db_printf("%p\n", (void *) ia64_thash(addr));
2673}
2674
2675DB_COMMAND(ttag, db_ttag)
2676{
2677	if (!have_addr)
2678		return;
2679
2680	db_printf("0x%lx\n", ia64_ttag(addr));
2681}
2682
2683#endif
2684