pmap.c revision 115937
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 * $FreeBSD: head/sys/ia64/ia64/pmap.c 115937 2003-06-07 04:17:39Z marcel $
47 */
48
49/*
50 *	Manages physical address maps.
51 *
52 *	In addition to hardware address maps, this
53 *	module is called upon to provide software-use-only
54 *	maps which may or may not be stored in the same
55 *	form as hardware maps.  These pseudo-maps are
56 *	used to store intermediate results from copy
57 *	operations to and from address spaces.
58 *
59 *	Since the information managed by this module is
60 *	also stored by the logical address mapping module,
61 *	this module may throw away valid virtual-to-physical
62 *	mappings at almost any time.  However, invalidations
63 *	of virtual-to-physical mappings must be done as
64 *	requested.
65 *
66 *	In order to cope with hardware architectures which
67 *	make virtual-to-physical map invalidates expensive,
68 *	this module may delay invalidate or reduced protection
69 *	operations until such time as they are actually
70 *	necessary.  This module is given full information as
71 *	to which processors are currently using which maps,
72 *	and to when physical maps must be made correct.
73 */
74
75/*
76 * Following the Linux model, region IDs are allocated in groups of
77 * eight so that a single region ID can be used for as many RRs as we
78 * want by encoding the RR number into the low bits of the ID.
79 *
80 * We reserve region ID 0 for the kernel and allocate the remaining
81 * IDs for user pmaps.
82 *
83 * Region 0..4
84 *	User virtually mapped
85 *
86 * Region 5
87 *	Kernel virtually mapped
88 *
89 * Region 6
90 *	Kernel physically mapped uncacheable
91 *
92 * Region 7
93 *	Kernel physically mapped cacheable
94 */
95
96#include <sys/param.h>
97#include <sys/kernel.h>
98#include <sys/lock.h>
99#include <sys/malloc.h>
100#include <sys/mman.h>
101#include <sys/msgbuf.h>
102#include <sys/mutex.h>
103#include <sys/proc.h>
104#include <sys/sx.h>
105#include <sys/systm.h>
106#include <sys/vmmeter.h>
107#include <sys/smp.h>
108#include <sys/sysctl.h>
109
110#include <vm/vm.h>
111#include <vm/vm_param.h>
112#include <vm/vm_kern.h>
113#include <vm/vm_page.h>
114#include <vm/vm_map.h>
115#include <vm/vm_object.h>
116#include <vm/vm_extern.h>
117#include <vm/vm_pageout.h>
118#include <vm/vm_pager.h>
119#include <vm/uma.h>
120#include <vm/uma_int.h>
121
122#include <sys/user.h>
123
124#include <machine/cpu.h>
125#include <machine/pal.h>
126#include <machine/md_var.h>
127
128/* XXX move to a header. */
129extern u_int64_t ia64_gateway_page[];
130
131MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
132
133#ifndef KSTACK_MAX_PAGES
134#define	KSTACK_MAX_PAGES 32
135#endif
136
137#ifndef PMAP_SHPGPERPROC
138#define PMAP_SHPGPERPROC 200
139#endif
140
141#if defined(DIAGNOSTIC)
142#define PMAP_DIAGNOSTIC
143#endif
144
145#define MINPV 2048	/* Preallocate at least this many */
146#define MAXPV 20480	/* But no more than this */
147
148#if 0
149#define PMAP_DIAGNOSTIC
150#define PMAP_DEBUG
151#endif
152
153#if !defined(PMAP_DIAGNOSTIC)
154#define PMAP_INLINE __inline
155#else
156#define PMAP_INLINE
157#endif
158
159/*
160 * Get PDEs and PTEs for user/kernel address space
161 */
162#define pmap_pte_w(pte)		((pte)->pte_ig & PTE_IG_WIRED)
163#define pmap_pte_managed(pte)	((pte)->pte_ig & PTE_IG_MANAGED)
164#define pmap_pte_v(pte)		((pte)->pte_p)
165#define pmap_pte_pa(pte)	(((pte)->pte_ppn) << 12)
166#define pmap_pte_prot(pte)	(((pte)->pte_ar << 2) | (pte)->pte_pl)
167
168#define pmap_pte_set_w(pte, v) ((v)?((pte)->pte_ig |= PTE_IG_WIRED) \
169				:((pte)->pte_ig &= ~PTE_IG_WIRED))
170#define pmap_pte_set_prot(pte, v) do {		\
171    (pte)->pte_ar = v >> 2;			\
172    (pte)->pte_pl = v & 3;			\
173} while (0)
174
175/*
176 * Given a map and a machine independent protection code,
177 * convert to an ia64 protection code.
178 */
179#define pte_prot(m, p)		(protection_codes[m == kernel_pmap ? 0 : 1][p])
180#define pte_prot_pl(m, p)	(pte_prot(m, p) & 3)
181#define pte_prot_ar(m, p)	(pte_prot(m, p) >> 2)
182int	protection_codes[2][8];
183
184/*
185 * Return non-zero if this pmap is currently active
186 */
187#define pmap_isactive(pmap)	(pmap->pm_active)
188
189/*
190 * Statically allocated kernel pmap
191 */
192struct pmap kernel_pmap_store;
193
194vm_offset_t avail_start;	/* PA of first available physical page */
195vm_offset_t avail_end;		/* PA of last available physical page */
196vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
197vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
198static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
199
200vm_offset_t vhpt_base, vhpt_size;
201struct mtx pmap_vhptmutex;
202
203/*
204 * We use an object to own the kernel's 'page tables'. For simplicity,
205 * we use one page directory to index a set of pages containing
206 * ia64_lptes. This gives us up to 2Gb of kernel virtual space.
207 */
208static int nkpt;
209struct ia64_lpte **ia64_kptdir;
210#define KPTE_DIR_INDEX(va) \
211	((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
212#define KPTE_PTE_INDEX(va) \
213	((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
214#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
215
216vm_offset_t kernel_vm_end;
217
218/* Values for ptc.e. XXX values for SKI. */
219static u_int64_t pmap_ptc_e_base = 0x100000000;
220static u_int64_t pmap_ptc_e_count1 = 3;
221static u_int64_t pmap_ptc_e_count2 = 2;
222static u_int64_t pmap_ptc_e_stride1 = 0x2000;
223static u_int64_t pmap_ptc_e_stride2 = 0x100000000;
224
225/*
226 * Data for the RID allocator
227 */
228static int pmap_ridcount;
229static int pmap_rididx;
230static int pmap_ridmapsz;
231static int pmap_ridmax;
232static u_int64_t *pmap_ridmap;
233struct mtx pmap_ridmutex;
234
235/*
236 * Data for the pv entry allocation mechanism
237 */
238static uma_zone_t pvzone;
239static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
240int pmap_pagedaemon_waken;
241static struct pv_entry *pvbootentries;
242static int pvbootnext, pvbootmax;
243
244/*
245 * Data for allocating PTEs for user processes.
246 */
247static uma_zone_t ptezone;
248
249/*
250 * VHPT instrumentation.
251 */
252static int pmap_vhpt_inserts;
253static int pmap_vhpt_collisions;
254static int pmap_vhpt_resident;
255SYSCTL_DECL(_vm_stats);
256SYSCTL_NODE(_vm_stats, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
257SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
258	   &pmap_vhpt_inserts, 0, "");
259SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, collisions, CTLFLAG_RD,
260	   &pmap_vhpt_collisions, 0, "");
261SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, resident, CTLFLAG_RD,
262	   &pmap_vhpt_resident, 0, "");
263
264static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
265static pv_entry_t get_pv_entry(void);
266static void	ia64_protection_init(void);
267
268static pmap_t	pmap_install(pmap_t);
269static void	pmap_invalidate_all(pmap_t pmap);
270static void	pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m);
271
272vm_offset_t
273pmap_steal_memory(vm_size_t size)
274{
275	vm_size_t bank_size;
276	vm_offset_t pa, va;
277
278	size = round_page(size);
279
280	bank_size = phys_avail[1] - phys_avail[0];
281	while (size > bank_size) {
282		int i;
283		for (i = 0; phys_avail[i+2]; i+= 2) {
284			phys_avail[i] = phys_avail[i+2];
285			phys_avail[i+1] = phys_avail[i+3];
286		}
287		phys_avail[i] = 0;
288		phys_avail[i+1] = 0;
289		if (!phys_avail[0])
290			panic("pmap_steal_memory: out of memory");
291		bank_size = phys_avail[1] - phys_avail[0];
292	}
293
294	pa = phys_avail[0];
295	phys_avail[0] += size;
296
297	va = IA64_PHYS_TO_RR7(pa);
298	bzero((caddr_t) va, size);
299	return va;
300}
301
302/*
303 *	Bootstrap the system enough to run with virtual memory.
304 */
305void
306pmap_bootstrap()
307{
308	int i, j, count, ridbits;
309	struct ia64_pal_result res;
310
311	/*
312	 * Query the PAL Code to find the loop parameters for the
313	 * ptc.e instruction.
314	 */
315	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
316	if (res.pal_status != 0)
317		panic("Can't configure ptc.e parameters");
318	pmap_ptc_e_base = res.pal_result[0];
319	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
320	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
321	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
322	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
323	if (bootverbose)
324		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
325		       "stride1=0x%lx, stride2=0x%lx\n",
326		       pmap_ptc_e_base,
327		       pmap_ptc_e_count1,
328		       pmap_ptc_e_count2,
329		       pmap_ptc_e_stride1,
330		       pmap_ptc_e_stride2);
331
332	/*
333	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
334	 *
335	 * We currently need at least 19 bits in the RID because PID_MAX
336	 * can only be encoded in 17 bits and we need RIDs for 5 regions
337	 * per process. With PID_MAX equalling 99999 this means that we
338	 * need to be able to encode 499995 (=5*PID_MAX).
339	 * The Itanium processor only has 18 bits and the architected
340	 * minimum is exactly that. So, we cannot use a PID based scheme
341	 * in those cases. Enter pmap_ridmap...
342	 * We should avoid the map when running on a processor that has
343	 * implemented enough bits. This means that we should pass the
344	 * process/thread ID to pmap. This we currently don't do, so we
345	 * use the map anyway. However, we don't want to allocate a map
346	 * that is large enough to cover the range dictated by the number
347	 * of bits in the RID, because that may result in a RID map of
348	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
349	 * The bottomline: we create a 32KB map when the processor only
350	 * implements 18 bits (or when we can't figure it out). Otherwise
351	 * we create a 64KB map.
352	 */
353	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
354	if (res.pal_status != 0) {
355		if (bootverbose)
356			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
357		ridbits = 18; /* guaranteed minimum */
358	} else {
359		ridbits = (res.pal_result[1] >> 8) & 0xff;
360		if (bootverbose)
361			printf("Processor supports %d Region ID bits\n",
362			    ridbits);
363	}
364	if (ridbits > 19)
365		ridbits = 19;
366
367	pmap_ridmax = (1 << ridbits);
368	pmap_ridmapsz = pmap_ridmax / 64;
369	pmap_ridmap = (u_int64_t *)pmap_steal_memory(pmap_ridmax / 8);
370	pmap_ridmap[0] |= 0xff;
371	pmap_rididx = 0;
372	pmap_ridcount = 8;
373	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
374
375	/*
376	 * Allocate some memory for initial kernel 'page tables'.
377	 */
378	ia64_kptdir = (void *)pmap_steal_memory(PAGE_SIZE);
379	for (i = 0; i < NKPT; i++) {
380		ia64_kptdir[i] = (void*)pmap_steal_memory(PAGE_SIZE);
381	}
382	nkpt = NKPT;
383	kernel_vm_end = NKPT * PAGE_SIZE * NKPTEPG + VM_MIN_KERNEL_ADDRESS -
384	    VM_GATEWAY_SIZE;
385
386	avail_start = phys_avail[0];
387	for (i = 0; phys_avail[i+2]; i+= 2) ;
388	avail_end = phys_avail[i+1];
389	count = i+2;
390
391	/*
392	 * Figure out a useful size for the VHPT, based on the size of
393	 * physical memory and try to locate a region which is large
394	 * enough to contain the VHPT (which must be a power of two in
395	 * size and aligned to a natural boundary).
396	 * Don't use the difference between avail_start and avail_end
397	 * as a measure for memory size. The address space is often
398	 * enough sparse, causing us to (try to) create a huge VHPT.
399	 */
400	vhpt_size = 15;
401	while ((1<<vhpt_size) < Maxmem * 32)
402		vhpt_size++;
403
404	vhpt_base = 0;
405	while (!vhpt_base) {
406		vm_offset_t mask;
407		if (bootverbose)
408			printf("Trying VHPT size 0x%lx\n", (1L<<vhpt_size));
409		mask = (1L << vhpt_size) - 1;
410		for (i = 0; i < count; i += 2) {
411			vm_offset_t base, limit;
412			base = (phys_avail[i] + mask) & ~mask;
413			limit = base + (1L << vhpt_size);
414			if (limit <= phys_avail[i+1])
415				/*
416				 * VHPT can fit in this region
417				 */
418				break;
419		}
420		if (!phys_avail[i]) {
421			/*
422			 * Can't fit, try next smaller size.
423			 */
424			vhpt_size--;
425		} else {
426			vhpt_base = (phys_avail[i] + mask) & ~mask;
427		}
428	}
429	if (vhpt_size < 15)
430		panic("Can't find space for VHPT");
431
432	if (bootverbose)
433		printf("Putting VHPT at %p\n", (void *) vhpt_base);
434	if (vhpt_base != phys_avail[i]) {
435		/*
436		 * Split this region.
437		 */
438		if (bootverbose)
439			printf("Splitting [%p-%p]\n",
440			       (void *) phys_avail[i],
441			       (void *) phys_avail[i+1]);
442		for (j = count; j > i; j -= 2) {
443			phys_avail[j] = phys_avail[j-2];
444			phys_avail[j+1] = phys_avail[j-2+1];
445		}
446		phys_avail[count+2] = 0;
447		phys_avail[count+3] = 0;
448		phys_avail[i+1] = vhpt_base;
449		phys_avail[i+2] = vhpt_base + (1L << vhpt_size);
450	} else {
451		phys_avail[i] = vhpt_base + (1L << vhpt_size);
452	}
453
454	vhpt_base = IA64_PHYS_TO_RR7(vhpt_base);
455	bzero((void *) vhpt_base, (1L << vhpt_size));
456
457	mtx_init(&pmap_vhptmutex, "VHPT collision chain lock", NULL, MTX_DEF);
458
459	__asm __volatile("mov cr.pta=%0;; srlz.i;;"
460			 :: "r" (vhpt_base + (1<<8) + (vhpt_size<<2) + 1));
461
462	virtual_avail = VM_MIN_KERNEL_ADDRESS;
463	virtual_end = VM_MAX_KERNEL_ADDRESS;
464
465	/*
466	 * Initialize protection array.
467	 */
468	ia64_protection_init();
469
470	/*
471	 * Initialize the kernel pmap (which is statically allocated).
472	 */
473	for (i = 0; i < 5; i++)
474		kernel_pmap->pm_rid[i] = 0;
475	kernel_pmap->pm_active = 1;
476	TAILQ_INIT(&kernel_pmap->pm_pvlist);
477	PCPU_SET(current_pmap, kernel_pmap);
478
479	/*
480	 * Region 5 is mapped via the vhpt.
481	 */
482	ia64_set_rr(IA64_RR_BASE(5),
483		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
484
485	/*
486	 * Region 6 is direct mapped UC and region 7 is direct mapped
487	 * WC. The details of this is controlled by the Alt {I,D}TLB
488	 * handlers. Here we just make sure that they have the largest
489	 * possible page size to minimise TLB usage.
490	 */
491	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (28 << 2));
492	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (28 << 2));
493
494	/*
495	 * Reserve some memory for allocating pvs while bootstrapping
496	 * the pv allocator. We need to have enough to cover mapping
497	 * the kmem_alloc region used to allocate the initial_pvs in
498	 * pmap_init. In general, the size of this region is
499	 * approximately (# physical pages) * (size of pv entry).
500	 */
501	pvbootmax = ((physmem * sizeof(struct pv_entry)) >> PAGE_SHIFT) + 128;
502	pvbootentries = (struct pv_entry *)
503		pmap_steal_memory(pvbootmax * sizeof(struct pv_entry));
504	pvbootnext = 0;
505
506	/*
507	 * Clear out any random TLB entries left over from booting.
508	 */
509	pmap_invalidate_all(kernel_pmap);
510
511	map_gateway_page();
512}
513
514void *
515uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
516{
517	static vm_pindex_t color;
518	vm_page_t m;
519	int pflags;
520	void *va;
521
522	*flags = UMA_SLAB_PRIV;
523	if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT)
524		pflags = VM_ALLOC_INTERRUPT;
525	else
526		pflags = VM_ALLOC_SYSTEM;
527	if (wait & M_ZERO)
528		pflags |= VM_ALLOC_ZERO;
529
530	for (;;) {
531		m = vm_page_alloc(NULL, color++, pflags | VM_ALLOC_NOOBJ);
532		if (m == NULL) {
533			if (wait & M_NOWAIT)
534				return (NULL);
535			else
536				VM_WAIT;
537		} else
538			break;
539	}
540
541	va = (void *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
542	if ((m->flags & PG_ZERO) == 0)
543		bzero(va, PAGE_SIZE);
544	return (va);
545}
546
547void
548uma_small_free(void *mem, int size, u_int8_t flags)
549{
550	vm_page_t m;
551
552	m = PHYS_TO_VM_PAGE(IA64_RR_MASK((u_int64_t)mem));
553	vm_page_lock_queues();
554	vm_page_free(m);
555	vm_page_unlock_queues();
556}
557
558/*
559 *	Initialize the pmap module.
560 *	Called by vm_init, to initialize any structures that the pmap
561 *	system needs to map virtual memory.
562 *	pmap_init has been enhanced to support in a fairly consistant
563 *	way, discontiguous physical memory.
564 */
565void
566pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
567{
568	int i;
569	int initial_pvs;
570
571	/*
572	 * Allocate memory for random pmap data structures.  Includes the
573	 * pv_head_table.
574	 */
575
576	for(i = 0; i < vm_page_array_size; i++) {
577		vm_page_t m;
578
579		m = &vm_page_array[i];
580		TAILQ_INIT(&m->md.pv_list);
581		m->md.pv_list_count = 0;
582 	}
583
584	/*
585	 * Init the pv free list and the PTE free list.
586	 */
587	initial_pvs = vm_page_array_size;
588	if (initial_pvs < MINPV)
589		initial_pvs = MINPV;
590	if (initial_pvs > MAXPV)
591		initial_pvs = MAXPV;
592	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry),
593	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
594	uma_prealloc(pvzone, initial_pvs);
595
596	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
597	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
598	uma_prealloc(ptezone, initial_pvs);
599
600	/*
601	 * Now it is safe to enable pv_table recording.
602	 */
603	pmap_initialized = TRUE;
604}
605
606/*
607 * Initialize the address space (zone) for the pv_entries.  Set a
608 * high water mark so that the system can recover from excessive
609 * numbers of pv entries.
610 */
611void
612pmap_init2()
613{
614	int shpgperproc = PMAP_SHPGPERPROC;
615
616	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
617	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
618	pv_entry_high_water = 9 * (pv_entry_max / 10);
619}
620
621
622/***************************************************
623 * Manipulate TLBs for a pmap
624 ***************************************************/
625
626static void
627pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
628{
629	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
630		("invalidating TLB for non-current pmap"));
631	ia64_ptc_g(va, PAGE_SHIFT << 2);
632}
633
634static void
635pmap_invalidate_all_1(void *arg)
636{
637	u_int64_t addr;
638	int i, j;
639	register_t psr;
640
641	psr = intr_disable();
642	addr = pmap_ptc_e_base;
643	for (i = 0; i < pmap_ptc_e_count1; i++) {
644		for (j = 0; j < pmap_ptc_e_count2; j++) {
645			ia64_ptc_e(addr);
646			addr += pmap_ptc_e_stride2;
647		}
648		addr += pmap_ptc_e_stride1;
649	}
650	intr_restore(psr);
651}
652
653static void
654pmap_invalidate_all(pmap_t pmap)
655{
656	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
657		("invalidating TLB for non-current pmap"));
658
659
660#ifdef SMP
661	smp_rendezvous(0, pmap_invalidate_all_1, 0, 0);
662#else
663	pmap_invalidate_all_1(0);
664#endif
665}
666
667static u_int32_t
668pmap_allocate_rid(void)
669{
670	uint64_t bit, bits;
671	int rid;
672
673	mtx_lock(&pmap_ridmutex);
674	if (pmap_ridcount == pmap_ridmax)
675		panic("pmap_allocate_rid: All Region IDs used");
676
677	/* Find an index with a free bit. */
678	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
679		pmap_rididx++;
680		if (pmap_rididx == pmap_ridmapsz)
681			pmap_rididx = 0;
682	}
683	rid = pmap_rididx * 64;
684
685	/* Find a free bit. */
686	bit = 1UL;
687	while (bits & bit) {
688		rid++;
689		bit <<= 1;
690	}
691
692	pmap_ridmap[pmap_rididx] |= bit;
693	pmap_ridcount++;
694	mtx_unlock(&pmap_ridmutex);
695
696	return rid;
697}
698
699static void
700pmap_free_rid(u_int32_t rid)
701{
702	uint64_t bit;
703	int idx;
704
705	idx = rid / 64;
706	bit = ~(1UL << (rid & 63));
707
708	mtx_lock(&pmap_ridmutex);
709	pmap_ridmap[idx] &= bit;
710	pmap_ridcount--;
711	mtx_unlock(&pmap_ridmutex);
712}
713
714/***************************************************
715 * Low level helper routines.....
716 ***************************************************/
717
718/*
719 * Install a pte into the VHPT
720 */
721static PMAP_INLINE void
722pmap_install_pte(struct ia64_lpte *vhpte, struct ia64_lpte *pte)
723{
724	u_int64_t *vhp, *p;
725
726	vhp = (u_int64_t *)vhpte;
727	p = (u_int64_t *)pte;
728
729	critical_enter();
730
731	/* Invalidate the tag so the VHPT walker will not match this entry. */
732	vhp[2] = 1UL << 63;
733	ia64_mf();
734
735	vhp[0] = p[0];
736	vhp[1] = p[1];
737	ia64_mf();
738
739	/* Install a proper tag now that we're done. */
740	vhp[2] = p[2];
741	ia64_mf();
742
743	critical_exit();
744}
745
746/*
747 * Compare essential parts of pte.
748 */
749static PMAP_INLINE int
750pmap_equal_pte(struct ia64_lpte *pte1, struct ia64_lpte *pte2)
751{
752	return *(u_int64_t *) pte1 == *(u_int64_t *) pte2;
753}
754
755/*
756 * this routine defines the region(s) of memory that should
757 * not be tested for the modified bit.
758 */
759static PMAP_INLINE int
760pmap_track_modified(vm_offset_t va)
761{
762	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
763		return 1;
764	else
765		return 0;
766}
767
768/*
769 * Create the KSTACK for a new thread.
770 * This routine directly affects the fork perf for a process/thread.
771 */
772void
773pmap_new_thread(struct thread *td, int pages)
774{
775
776	/* Bounds check */
777	if (pages <= 1)
778		pages = KSTACK_PAGES;
779	else if (pages > KSTACK_MAX_PAGES)
780		pages = KSTACK_MAX_PAGES;
781	td->td_kstack = (vm_offset_t)malloc(pages * PAGE_SIZE, M_PMAP,
782	    M_WAITOK);
783	td->td_kstack_pages = pages;
784}
785
786/*
787 * Dispose the KSTACK for a thread that has exited.
788 * This routine directly impacts the exit perf of a process/thread.
789 */
790void
791pmap_dispose_thread(struct thread *td)
792{
793
794	free((void*)td->td_kstack, M_PMAP);
795	td->td_kstack = 0;
796	td->td_kstack_pages = 0;
797}
798
799/*
800 * Set up a variable sized alternate kstack.  This appears to be MI.
801 */
802void
803pmap_new_altkstack(struct thread *td, int pages)
804{
805
806	td->td_altkstack = td->td_kstack;
807	td->td_altkstack_obj = td->td_kstack_obj;
808	td->td_altkstack_pages = td->td_kstack_pages;
809	pmap_new_thread(td, pages);
810}
811
812void
813pmap_dispose_altkstack(struct thread *td)
814{
815
816	pmap_dispose_thread(td);
817	td->td_kstack = td->td_altkstack;
818	td->td_kstack_obj = td->td_altkstack_obj;
819	td->td_kstack_pages = td->td_altkstack_pages;
820	td->td_altkstack = 0;
821	td->td_altkstack_obj = NULL;
822	td->td_altkstack_pages = 0;
823}
824
825/*
826 * Allow the KSTACK for a thread to be prejudicially paged out.
827 */
828void
829pmap_swapout_thread(struct thread *td)
830{
831}
832
833/*
834 * Bring the KSTACK for a specified thread back in.
835 */
836void
837pmap_swapin_thread(struct thread *td)
838{
839}
840
841/***************************************************
842 * Page table page management routines.....
843 ***************************************************/
844
845void
846pmap_pinit0(struct pmap *pmap)
847{
848	/* kernel_pmap is the same as any other pmap. */
849	pmap_pinit(pmap);
850}
851
852/*
853 * Initialize a preallocated and zeroed pmap structure,
854 * such as one in a vmspace structure.
855 */
856void
857pmap_pinit(struct pmap *pmap)
858{
859	int i;
860
861	pmap->pm_flags = 0;
862	for (i = 0; i < 5; i++)
863		pmap->pm_rid[i] = 0;
864	pmap->pm_ptphint = NULL;
865	pmap->pm_active = 0;
866	TAILQ_INIT(&pmap->pm_pvlist);
867	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
868}
869
870/*
871 * Wire in kernel global address entries.  To avoid a race condition
872 * between pmap initialization and pmap_growkernel, this procedure
873 * should be called after the vmspace is attached to the process
874 * but before this pmap is activated.
875 */
876void
877pmap_pinit2(struct pmap *pmap)
878{
879	int i;
880
881	for (i = 0; i < 5; i++)
882		pmap->pm_rid[i] = pmap_allocate_rid();
883}
884
885/***************************************************
886 * Pmap allocation/deallocation routines.
887 ***************************************************/
888
889/*
890 * Release any resources held by the given physical map.
891 * Called when a pmap initialized by pmap_pinit is being released.
892 * Should only be called if the map contains no valid mappings.
893 */
894void
895pmap_release(pmap_t pmap)
896{
897	int i;
898
899	for (i = 0; i < 5; i++)
900		if (pmap->pm_rid[i])
901			pmap_free_rid(pmap->pm_rid[i]);
902}
903
904/*
905 * grow the number of kernel page table entries, if needed
906 */
907void
908pmap_growkernel(vm_offset_t addr)
909{
910	struct ia64_lpte *ptepage;
911	vm_page_t nkpg;
912
913	if (kernel_vm_end >= addr)
914		return;
915
916	critical_enter();
917
918	while (kernel_vm_end < addr) {
919		/* We could handle more by increasing the size of kptdir. */
920		if (nkpt == MAXKPT)
921			panic("pmap_growkernel: out of kernel address space");
922
923		nkpg = vm_page_alloc(NULL, nkpt,
924		    VM_ALLOC_NOOBJ | VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
925		if (!nkpg)
926			panic("pmap_growkernel: no memory to grow kernel");
927
928		ptepage = (struct ia64_lpte *)
929		    IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
930		bzero(ptepage, PAGE_SIZE);
931		ia64_kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage;
932
933		nkpt++;
934		kernel_vm_end += PAGE_SIZE * NKPTEPG;
935	}
936
937	critical_exit();
938}
939
940/***************************************************
941 * page management routines.
942 ***************************************************/
943
944/*
945 * free the pv_entry back to the free list
946 */
947static PMAP_INLINE void
948free_pv_entry(pv_entry_t pv)
949{
950	pv_entry_count--;
951	uma_zfree(pvzone, pv);
952}
953
954/*
955 * get a new pv_entry, allocating a block from the system
956 * when needed.
957 * the memory allocation is performed bypassing the malloc code
958 * because of the possibility of allocations at interrupt time.
959 */
960static pv_entry_t
961get_pv_entry(void)
962{
963	pv_entry_count++;
964	if (pv_entry_high_water &&
965		(pv_entry_count > pv_entry_high_water) &&
966		(pmap_pagedaemon_waken == 0)) {
967		pmap_pagedaemon_waken = 1;
968		wakeup (&vm_pages_needed);
969	}
970	return uma_zalloc(pvzone, M_NOWAIT);
971}
972
973/*
974 * Add an ia64_lpte to the VHPT.
975 */
976static void
977pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
978{
979	struct ia64_lpte *vhpte;
980
981	pmap_vhpt_inserts++;
982	pmap_vhpt_resident++;
983
984	vhpte = (struct ia64_lpte *) ia64_thash(va);
985
986	if (vhpte->pte_chain)
987		pmap_vhpt_collisions++;
988
989	mtx_lock(&pmap_vhptmutex);
990
991	pte->pte_chain = vhpte->pte_chain;
992	ia64_mf();
993	vhpte->pte_chain = ia64_tpa((vm_offset_t)pte);
994	ia64_mf();
995
996	if (!vhpte->pte_p && pte->pte_p)
997		pmap_install_pte(vhpte, pte);
998
999	mtx_unlock(&pmap_vhptmutex);
1000}
1001
1002/*
1003 * Update VHPT after a pte has changed.
1004 */
1005static void
1006pmap_update_vhpt(struct ia64_lpte *pte, vm_offset_t va)
1007{
1008	struct ia64_lpte *vhpte;
1009
1010	vhpte = (struct ia64_lpte *)ia64_thash(va);
1011
1012	mtx_lock(&pmap_vhptmutex);
1013
1014	if ((!vhpte->pte_p || vhpte->pte_tag == pte->pte_tag) && pte->pte_p)
1015		pmap_install_pte(vhpte, pte);
1016
1017	mtx_unlock(&pmap_vhptmutex);
1018}
1019
1020/*
1021 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
1022 * worked or an appropriate error code otherwise.
1023 */
1024static int
1025pmap_remove_vhpt(vm_offset_t va)
1026{
1027	struct ia64_lpte *pte;
1028	struct ia64_lpte *lpte;
1029	struct ia64_lpte *vhpte;
1030	u_int64_t tag;
1031
1032	vhpte = (struct ia64_lpte *)ia64_thash(va);
1033
1034	/*
1035	 * If the VHPTE is invalid, there can't be a collision chain.
1036	 */
1037	if (!vhpte->pte_p) {
1038		KASSERT(!vhpte->pte_chain, ("bad vhpte"));
1039		return (ENOENT);
1040	}
1041
1042	lpte = vhpte;
1043	tag = ia64_ttag(va);
1044
1045	mtx_lock(&pmap_vhptmutex);
1046
1047	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(vhpte->pte_chain);
1048	KASSERT(pte != NULL, ("foo"));
1049
1050	while (pte->pte_tag != tag) {
1051		lpte = pte;
1052		if (pte->pte_chain == 0) {
1053			mtx_unlock(&pmap_vhptmutex);
1054			return (ENOENT);
1055		}
1056		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(pte->pte_chain);
1057	}
1058
1059	/* Snip this pv_entry out of the collision chain. */
1060	lpte->pte_chain = pte->pte_chain;
1061	ia64_mf();
1062
1063	/*
1064	 * If the VHPTE matches as well, change it to map the first
1065	 * element from the chain if there is one.
1066	 */
1067	if (vhpte->pte_tag == tag) {
1068		if (vhpte->pte_chain) {
1069			pte = (void*)IA64_PHYS_TO_RR7(vhpte->pte_chain);
1070			pmap_install_pte(vhpte, pte);
1071		} else
1072			vhpte->pte_p = 0;
1073	}
1074
1075	mtx_unlock(&pmap_vhptmutex);
1076	pmap_vhpt_resident--;
1077	return (0);
1078}
1079
1080/*
1081 * Find the ia64_lpte for the given va, if any.
1082 */
1083static struct ia64_lpte *
1084pmap_find_vhpt(vm_offset_t va)
1085{
1086	struct ia64_lpte *pte;
1087	u_int64_t tag;
1088
1089	tag = ia64_ttag(va);
1090	pte = (struct ia64_lpte *)ia64_thash(va);
1091	if (pte->pte_chain == 0)
1092		return (NULL);
1093	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(pte->pte_chain);
1094	while (pte->pte_tag != tag) {
1095		if (pte->pte_chain == 0)
1096			return (NULL);
1097		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(pte->pte_chain);
1098	}
1099	return (pte);
1100}
1101
1102/*
1103 * Remove an entry from the list of managed mappings.
1104 */
1105static int
1106pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1107{
1108	if (!pv) {
1109		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1110			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1111				if (pmap == pv->pv_pmap && va == pv->pv_va)
1112					break;
1113			}
1114		} else {
1115			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1116				if (va == pv->pv_va)
1117					break;
1118			}
1119		}
1120	}
1121
1122	if (pv) {
1123		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1124		m->md.pv_list_count--;
1125		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1126			vm_page_flag_clear(m, PG_WRITEABLE);
1127
1128		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1129		free_pv_entry(pv);
1130		return 0;
1131	} else {
1132		return ENOENT;
1133	}
1134}
1135
1136/*
1137 * Create a pv entry for page at pa for
1138 * (pmap, va).
1139 */
1140static void
1141pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1142{
1143	pv_entry_t pv;
1144
1145	pv = get_pv_entry();
1146	pv->pv_pmap = pmap;
1147	pv->pv_va = va;
1148
1149	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1150	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1151	m->md.pv_list_count++;
1152}
1153
1154/*
1155 *	Routine:	pmap_extract
1156 *	Function:
1157 *		Extract the physical page address associated
1158 *		with the given map/virtual_address pair.
1159 */
1160vm_offset_t
1161pmap_extract(pmap, va)
1162	register pmap_t pmap;
1163	vm_offset_t va;
1164{
1165	struct ia64_lpte *pte;
1166	pmap_t oldpmap;
1167
1168	if (!pmap)
1169		return 0;
1170
1171	oldpmap = pmap_install(pmap);
1172	pte = pmap_find_vhpt(va);
1173	pmap_install(oldpmap);
1174
1175	if (!pte)
1176		return 0;
1177
1178	return pmap_pte_pa(pte);
1179}
1180
1181/***************************************************
1182 * Low level mapping routines.....
1183 ***************************************************/
1184
1185/*
1186 * Find the kernel lpte for mapping the given virtual address, which
1187 * must be in the part of region 5 which we can cover with our kernel
1188 * 'page tables'.
1189 */
1190static struct ia64_lpte *
1191pmap_find_kpte(vm_offset_t va)
1192{
1193	KASSERT((va >> 61) == 5,
1194		("kernel mapping 0x%lx not in region 5", va));
1195	KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG),
1196		("kernel mapping 0x%lx out of range", va));
1197	return (&ia64_kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)]);
1198}
1199
1200/*
1201 * Find a pte suitable for mapping a user-space address. If one exists
1202 * in the VHPT, that one will be returned, otherwise a new pte is
1203 * allocated.
1204 */
1205static struct ia64_lpte *
1206pmap_find_pte(vm_offset_t va)
1207{
1208	struct ia64_lpte *pte;
1209
1210	if (va >= VM_MAXUSER_ADDRESS)
1211		return pmap_find_kpte(va);
1212
1213	pte = pmap_find_vhpt(va);
1214	if (!pte) {
1215		pte = uma_zalloc(ptezone, M_WAITOK);
1216		pte->pte_p = 0;
1217	}
1218	return pte;
1219}
1220
1221/*
1222 * Free a pte which is now unused. This simply returns it to the zone
1223 * allocator if it is a user mapping. For kernel mappings, clear the
1224 * valid bit to make it clear that the mapping is not currently used.
1225 */
1226static void
1227pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1228{
1229	if (va < VM_MAXUSER_ADDRESS)
1230		uma_zfree(ptezone, pte);
1231	else
1232		pte->pte_p = 0;
1233}
1234
1235/*
1236 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1237 * the pte was orginally valid, then its assumed to already be in the
1238 * VHPT.
1239 */
1240static void
1241pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1242	     int ig, int pl, int ar)
1243{
1244	int wasvalid = pte->pte_p;
1245
1246	pte->pte_p = 1;
1247	pte->pte_ma = PTE_MA_WB;
1248	if (ig & PTE_IG_MANAGED) {
1249		pte->pte_a = 0;
1250		pte->pte_d = 0;
1251	} else {
1252		pte->pte_a = 1;
1253		pte->pte_d = 1;
1254	}
1255	pte->pte_pl = pl;
1256	pte->pte_ar = ar;
1257	pte->pte_ppn = pa >> 12;
1258	pte->pte_ed = 0;
1259	pte->pte_ig = ig;
1260
1261	pte->pte_ps = PAGE_SHIFT;
1262	pte->pte_key = 0;
1263
1264	pte->pte_tag = ia64_ttag(va);
1265
1266	if (wasvalid) {
1267		pmap_update_vhpt(pte, va);
1268	} else {
1269		pmap_enter_vhpt(pte, va);
1270	}
1271}
1272
1273/*
1274 * If a pte contains a valid mapping, clear it and update the VHPT.
1275 */
1276static void
1277pmap_clear_pte(struct ia64_lpte *pte, vm_offset_t va)
1278{
1279	if (pte->pte_p) {
1280		pmap_remove_vhpt(va);
1281		ia64_ptc_g(va, PAGE_SHIFT << 2);
1282		pte->pte_p = 0;
1283	}
1284}
1285
1286/*
1287 * Remove the (possibly managed) mapping represented by pte from the
1288 * given pmap.
1289 */
1290static int
1291pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1292		pv_entry_t pv, int freepte)
1293{
1294	int error;
1295	vm_page_t m;
1296
1297	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1298		("removing pte for non-current pmap"));
1299
1300	/*
1301	 * First remove from the VHPT.
1302	 */
1303	error = pmap_remove_vhpt(va);
1304	if (error)
1305		return error;
1306
1307	/*
1308	 * Make sure pmap_set_pte() knows it isn't in the VHPT.
1309	 */
1310	pte->pte_p = 0;
1311
1312	if (pte->pte_ig & PTE_IG_WIRED)
1313		pmap->pm_stats.wired_count -= 1;
1314
1315	pmap->pm_stats.resident_count -= 1;
1316	if (pte->pte_ig & PTE_IG_MANAGED) {
1317		m = PHYS_TO_VM_PAGE(pmap_pte_pa(pte));
1318		if (pte->pte_d)
1319			if (pmap_track_modified(va))
1320				vm_page_dirty(m);
1321		if (pte->pte_a)
1322			vm_page_flag_set(m, PG_REFERENCED);
1323
1324		if (freepte)
1325			pmap_free_pte(pte, va);
1326		return pmap_remove_entry(pmap, m, va, pv);
1327	} else {
1328		if (freepte)
1329			pmap_free_pte(pte, va);
1330		return 0;
1331	}
1332}
1333
1334/*
1335 * Extract the physical page address associated with a kernel
1336 * virtual address.
1337 */
1338vm_paddr_t
1339pmap_kextract(vm_offset_t va)
1340{
1341	struct ia64_lpte *pte;
1342	vm_offset_t gwpage;
1343
1344	KASSERT(va >= IA64_RR_BASE(5), ("Must be kernel VA"));
1345
1346	/* Regions 6 and 7 are direct mapped. */
1347	if (va >= IA64_RR_BASE(6))
1348		return (IA64_RR_MASK(va));
1349
1350	/* EPC gateway page? */
1351	gwpage = (vm_offset_t)ia64_get_k5();
1352	if (va >= gwpage && va < gwpage + VM_GATEWAY_SIZE)
1353		return (IA64_RR_MASK((vm_offset_t)ia64_gateway_page));
1354
1355	/* Bail out if the virtual address is beyond our limits. */
1356	if (IA64_RR_MASK(va) >= nkpt * PAGE_SIZE * NKPTEPG)
1357		return (0);
1358
1359	pte = pmap_find_kpte(va);
1360	if (!pte->pte_p)
1361		return (0);
1362	return ((pte->pte_ppn << 12) | (va & PAGE_MASK));
1363}
1364
1365/*
1366 * Add a list of wired pages to the kva
1367 * this routine is only used for temporary
1368 * kernel mappings that do not need to have
1369 * page modification or references recorded.
1370 * Note that old mappings are simply written
1371 * over.  The page *must* be wired.
1372 */
1373void
1374pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1375{
1376	int i;
1377	struct ia64_lpte *pte;
1378
1379	for (i = 0; i < count; i++) {
1380		vm_offset_t tva = va + i * PAGE_SIZE;
1381		int wasvalid;
1382		pte = pmap_find_kpte(tva);
1383		wasvalid = pte->pte_p;
1384		pmap_set_pte(pte, tva, VM_PAGE_TO_PHYS(m[i]),
1385			     0, PTE_PL_KERN, PTE_AR_RWX);
1386		if (wasvalid)
1387			ia64_ptc_g(tva, PAGE_SHIFT << 2);
1388	}
1389}
1390
1391/*
1392 * this routine jerks page mappings from the
1393 * kernel -- it is meant only for temporary mappings.
1394 */
1395void
1396pmap_qremove(vm_offset_t va, int count)
1397{
1398	int i;
1399	struct ia64_lpte *pte;
1400
1401	for (i = 0; i < count; i++) {
1402		pte = pmap_find_kpte(va);
1403		pmap_clear_pte(pte, va);
1404		va += PAGE_SIZE;
1405	}
1406}
1407
1408/*
1409 * Add a wired page to the kva.
1410 */
1411void
1412pmap_kenter(vm_offset_t va, vm_offset_t pa)
1413{
1414	struct ia64_lpte *pte;
1415	int wasvalid;
1416
1417	pte = pmap_find_kpte(va);
1418	wasvalid = pte->pte_p;
1419	pmap_set_pte(pte, va, pa, 0, PTE_PL_KERN, PTE_AR_RWX);
1420	if (wasvalid)
1421		ia64_ptc_g(va, PAGE_SHIFT << 2);
1422}
1423
1424/*
1425 * Remove a page from the kva
1426 */
1427void
1428pmap_kremove(vm_offset_t va)
1429{
1430	struct ia64_lpte *pte;
1431
1432	pte = pmap_find_kpte(va);
1433	pmap_clear_pte(pte, va);
1434}
1435
1436/*
1437 *	Used to map a range of physical addresses into kernel
1438 *	virtual address space.
1439 *
1440 *	The value passed in '*virt' is a suggested virtual address for
1441 *	the mapping. Architectures which can support a direct-mapped
1442 *	physical to virtual region can return the appropriate address
1443 *	within that region, leaving '*virt' unchanged. Other
1444 *	architectures should map the pages starting at '*virt' and
1445 *	update '*virt' with the first usable address after the mapped
1446 *	region.
1447 */
1448vm_offset_t
1449pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1450{
1451	return IA64_PHYS_TO_RR7(start);
1452}
1453
1454/*
1455 * Remove a single page from a process address space
1456 */
1457static void
1458pmap_remove_page(pmap_t pmap, vm_offset_t va)
1459{
1460	struct ia64_lpte *pte;
1461
1462	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1463		("removing page for non-current pmap"));
1464
1465	pte = pmap_find_vhpt(va);
1466	if (pte) {
1467		pmap_remove_pte(pmap, pte, va, 0, 1);
1468		pmap_invalidate_page(pmap, va);
1469	}
1470	return;
1471}
1472
1473/*
1474 *	Remove the given range of addresses from the specified map.
1475 *
1476 *	It is assumed that the start and end are properly
1477 *	rounded to the page size.
1478 */
1479void
1480pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1481{
1482	pmap_t oldpmap;
1483	vm_offset_t va;
1484	pv_entry_t pv;
1485	struct ia64_lpte *pte;
1486
1487	if (pmap == NULL)
1488		return;
1489
1490	if (pmap->pm_stats.resident_count == 0)
1491		return;
1492
1493	oldpmap = pmap_install(pmap);
1494
1495	/*
1496	 * special handling of removing one page.  a very
1497	 * common operation and easy to short circuit some
1498	 * code.
1499	 */
1500	if (sva + PAGE_SIZE == eva) {
1501		pmap_remove_page(pmap, sva);
1502		pmap_install(oldpmap);
1503		return;
1504	}
1505
1506	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1507		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1508			va = pv->pv_va;
1509			if (va >= sva && va < eva) {
1510				pte = pmap_find_vhpt(va);
1511				KASSERT(pte != NULL, ("pte"));
1512				pmap_remove_pte(pmap, pte, va, pv, 1);
1513				pmap_invalidate_page(pmap, va);
1514			}
1515		}
1516
1517	} else {
1518		for (va = sva; va < eva; va = va += PAGE_SIZE) {
1519			pte = pmap_find_vhpt(va);
1520			if (pte) {
1521				pmap_remove_pte(pmap, pte, va, 0, 1);
1522				pmap_invalidate_page(pmap, va);
1523			}
1524		}
1525	}
1526
1527	pmap_install(oldpmap);
1528}
1529
1530/*
1531 *	Routine:	pmap_remove_all
1532 *	Function:
1533 *		Removes this physical page from
1534 *		all physical maps in which it resides.
1535 *		Reflects back modify bits to the pager.
1536 *
1537 *	Notes:
1538 *		Original versions of this routine were very
1539 *		inefficient because they iteratively called
1540 *		pmap_remove (slow...)
1541 */
1542
1543void
1544pmap_remove_all(vm_page_t m)
1545{
1546	pmap_t oldpmap;
1547	pv_entry_t pv;
1548	int s;
1549
1550#if defined(PMAP_DIAGNOSTIC)
1551	/*
1552	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1553	 * pages!
1554	 */
1555	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1556		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1557	}
1558#endif
1559
1560	s = splvm();
1561
1562	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1563		struct ia64_lpte *pte;
1564		pmap_t pmap = pv->pv_pmap;
1565		vm_offset_t va = pv->pv_va;
1566
1567		oldpmap = pmap_install(pmap);
1568		pte = pmap_find_vhpt(va);
1569		KASSERT(pte != NULL, ("pte"));
1570		if (pmap_pte_pa(pte) != VM_PAGE_TO_PHYS(m))
1571			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1572		pmap_remove_pte(pmap, pte, va, pv, 1);
1573		pmap_invalidate_page(pmap, va);
1574		pmap_install(oldpmap);
1575	}
1576
1577	vm_page_flag_clear(m, PG_WRITEABLE);
1578
1579	splx(s);
1580	return;
1581}
1582
1583/*
1584 *	Set the physical protection on the
1585 *	specified range of this map as requested.
1586 */
1587void
1588pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1589{
1590	pmap_t oldpmap;
1591	struct ia64_lpte *pte;
1592	int newprot;
1593
1594	if (pmap == NULL)
1595		return;
1596
1597	oldpmap = pmap_install(pmap);
1598
1599	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1600		pmap_remove(pmap, sva, eva);
1601		pmap_install(oldpmap);
1602		return;
1603	}
1604
1605	if (prot & VM_PROT_WRITE) {
1606		pmap_install(oldpmap);
1607		return;
1608	}
1609
1610	newprot = pte_prot(pmap, prot);
1611
1612	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1613		panic("pmap_protect: unaligned addresses");
1614
1615	while (sva < eva) {
1616		/*
1617		 * If page is invalid, skip this page
1618		 */
1619		pte = pmap_find_vhpt(sva);
1620		if (!pte) {
1621			sva += PAGE_SIZE;
1622			continue;
1623		}
1624
1625		if (pmap_pte_prot(pte) != newprot) {
1626			if (pte->pte_ig & PTE_IG_MANAGED) {
1627				vm_offset_t pa = pmap_pte_pa(pte);
1628				vm_page_t m = PHYS_TO_VM_PAGE(pa);
1629				if (pte->pte_d) {
1630					if (pmap_track_modified(sva))
1631						vm_page_dirty(m);
1632					pte->pte_d = 0;
1633				}
1634				if (pte->pte_a) {
1635					vm_page_flag_set(m, PG_REFERENCED);
1636					pte->pte_a = 0;
1637				}
1638			}
1639			pmap_pte_set_prot(pte, newprot);
1640			pmap_update_vhpt(pte, sva);
1641			pmap_invalidate_page(pmap, sva);
1642		}
1643
1644		sva += PAGE_SIZE;
1645	}
1646	pmap_install(oldpmap);
1647}
1648
1649/*
1650 *	Insert the given physical page (p) at
1651 *	the specified virtual address (v) in the
1652 *	target physical map with the protection requested.
1653 *
1654 *	If specified, the page will be wired down, meaning
1655 *	that the related pte can not be reclaimed.
1656 *
1657 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1658 *	or lose information.  That is, this routine must actually
1659 *	insert this page into the given map NOW.
1660 */
1661void
1662pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1663	   boolean_t wired)
1664{
1665	pmap_t oldpmap;
1666	vm_offset_t pa;
1667	vm_offset_t opa;
1668	struct ia64_lpte origpte;
1669	struct ia64_lpte *pte;
1670	int managed;
1671
1672	if (pmap == NULL)
1673		return;
1674
1675	oldpmap = pmap_install(pmap);
1676
1677	va &= ~PAGE_MASK;
1678#ifdef PMAP_DIAGNOSTIC
1679	if (va > VM_MAX_KERNEL_ADDRESS)
1680		panic("pmap_enter: toobig");
1681#endif
1682
1683	/*
1684	 * Find (or create) a pte for the given mapping.
1685	 */
1686	pte = pmap_find_pte(va);
1687	origpte = *pte;
1688
1689	if (origpte.pte_p)
1690		opa = pmap_pte_pa(&origpte);
1691	else
1692		opa = 0;
1693	managed = 0;
1694
1695	pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK;
1696
1697	/*
1698	 * Mapping has not changed, must be protection or wiring change.
1699	 */
1700	if (origpte.pte_p && (opa == pa)) {
1701		/*
1702		 * Wiring change, just update stats. We don't worry about
1703		 * wiring PT pages as they remain resident as long as there
1704		 * are valid mappings in them. Hence, if a user page is wired,
1705		 * the PT page will be also.
1706		 */
1707		if (wired && ((origpte.pte_ig & PTE_IG_WIRED) == 0))
1708			pmap->pm_stats.wired_count++;
1709		else if (!wired && (origpte.pte_ig & PTE_IG_WIRED))
1710			pmap->pm_stats.wired_count--;
1711
1712		/*
1713		 * We might be turning off write access to the page,
1714		 * so we go ahead and sense modify status.
1715		 */
1716		if (origpte.pte_ig & PTE_IG_MANAGED) {
1717			if (origpte.pte_d && pmap_track_modified(va)) {
1718				vm_page_t om;
1719				om = PHYS_TO_VM_PAGE(opa);
1720				vm_page_dirty(om);
1721			}
1722		}
1723
1724		managed = origpte.pte_ig & PTE_IG_MANAGED;
1725		goto validate;
1726	}
1727	/*
1728	 * Mapping has changed, invalidate old range and fall
1729	 * through to handle validating new mapping.
1730	 */
1731	if (opa) {
1732		int error;
1733		vm_page_lock_queues();
1734		error = pmap_remove_pte(pmap, pte, va, 0, 0);
1735		vm_page_unlock_queues();
1736		if (error)
1737			panic("pmap_enter: pte vanished, va: 0x%lx", va);
1738	}
1739
1740	/*
1741	 * Enter on the PV list if part of our managed memory.
1742	 */
1743	if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
1744		pmap_insert_entry(pmap, va, m);
1745		managed |= PTE_IG_MANAGED;
1746	}
1747
1748	/*
1749	 * Increment counters
1750	 */
1751	pmap->pm_stats.resident_count++;
1752	if (wired)
1753		pmap->pm_stats.wired_count++;
1754
1755validate:
1756
1757	/*
1758	 * Now validate mapping with desired protection/wiring. This
1759	 * adds the pte to the VHPT if necessary.
1760	 */
1761	pmap_set_pte(pte, va, pa, managed | (wired ? PTE_IG_WIRED : 0),
1762		     pte_prot_pl(pmap, prot), pte_prot_ar(pmap, prot));
1763
1764	/*
1765	 * if the mapping or permission bits are different, we need
1766	 * to invalidate the page.
1767	 */
1768	if (!pmap_equal_pte(&origpte, pte))
1769		pmap_invalidate_page(pmap, va);
1770
1771	pmap_install(oldpmap);
1772}
1773
1774/*
1775 * this code makes some *MAJOR* assumptions:
1776 * 1. Current pmap & pmap exists.
1777 * 2. Not wired.
1778 * 3. Read access.
1779 * 4. No page table pages.
1780 * 5. Tlbflush is deferred to calling procedure.
1781 * 6. Page IS managed.
1782 * but is *MUCH* faster than pmap_enter...
1783 */
1784
1785static void
1786pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
1787{
1788	struct ia64_lpte *pte;
1789	pmap_t oldpmap;
1790
1791	oldpmap = pmap_install(pmap);
1792
1793	pte = pmap_find_pte(va);
1794	if (pte->pte_p)
1795		return;
1796
1797	/*
1798	 * Enter on the PV list since its part of our managed memory.
1799	 */
1800	pmap_insert_entry(pmap, va, m);
1801
1802	/*
1803	 * Increment counters
1804	 */
1805	pmap->pm_stats.resident_count++;
1806
1807	/*
1808	 * Initialise PTE with read-only protection and enter into VHPT.
1809	 */
1810	pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m),
1811		     PTE_IG_MANAGED,
1812		     PTE_PL_USER, PTE_AR_R);
1813
1814	pmap_install(oldpmap);
1815}
1816
1817/*
1818 * Make temporary mapping for a physical address. This is called
1819 * during dump.
1820 */
1821void *
1822pmap_kenter_temporary(vm_offset_t pa, int i)
1823{
1824	return (void *) IA64_PHYS_TO_RR7(pa - (i * PAGE_SIZE));
1825}
1826
1827#define MAX_INIT_PT (96)
1828/*
1829 * pmap_object_init_pt preloads the ptes for a given object
1830 * into the specified pmap.  This eliminates the blast of soft
1831 * faults on process startup and immediately after an mmap.
1832 */
1833void
1834pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1835		    vm_object_t object, vm_pindex_t pindex,
1836		    vm_size_t size, int limit)
1837{
1838	pmap_t oldpmap;
1839	vm_offset_t tmpidx;
1840	int psize;
1841	vm_page_t p;
1842	int objpgs;
1843
1844	if (pmap == NULL || object == NULL)
1845		return;
1846
1847	oldpmap = pmap_install(pmap);
1848
1849	psize = ia64_btop(size);
1850
1851	if ((object->type != OBJT_VNODE) ||
1852		((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
1853			(object->resident_page_count > MAX_INIT_PT))) {
1854		pmap_install(oldpmap);
1855		return;
1856	}
1857
1858	if (psize + pindex > object->size) {
1859		if (object->size < pindex)
1860			return;
1861		psize = object->size - pindex;
1862	}
1863
1864	/*
1865	 * if we are processing a major portion of the object, then scan the
1866	 * entire thing.
1867	 */
1868	if (psize > (object->resident_page_count >> 2)) {
1869		objpgs = psize;
1870
1871		for (p = TAILQ_FIRST(&object->memq);
1872		    ((objpgs > 0) && (p != NULL));
1873		    p = TAILQ_NEXT(p, listq)) {
1874
1875			tmpidx = p->pindex;
1876			if (tmpidx < pindex) {
1877				continue;
1878			}
1879			tmpidx -= pindex;
1880			if (tmpidx >= psize) {
1881				continue;
1882			}
1883			/*
1884			 * don't allow an madvise to blow away our really
1885			 * free pages allocating pv entries.
1886			 */
1887			if ((limit & MAP_PREFAULT_MADVISE) &&
1888			    cnt.v_free_count < cnt.v_free_reserved) {
1889				break;
1890			}
1891			vm_page_lock_queues();
1892			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1893				(p->busy == 0) &&
1894			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1895				if ((p->queue - p->pc) == PQ_CACHE)
1896					vm_page_deactivate(p);
1897				vm_page_busy(p);
1898				vm_page_unlock_queues();
1899				pmap_enter_quick(pmap,
1900						 addr + ia64_ptob(tmpidx), p);
1901				vm_page_lock_queues();
1902				vm_page_wakeup(p);
1903			}
1904			vm_page_unlock_queues();
1905			objpgs -= 1;
1906		}
1907	} else {
1908		/*
1909		 * else lookup the pages one-by-one.
1910		 */
1911		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
1912			/*
1913			 * don't allow an madvise to blow away our really
1914			 * free pages allocating pv entries.
1915			 */
1916			if ((limit & MAP_PREFAULT_MADVISE) &&
1917			    cnt.v_free_count < cnt.v_free_reserved) {
1918				break;
1919			}
1920			p = vm_page_lookup(object, tmpidx + pindex);
1921			if (p == NULL)
1922				continue;
1923			vm_page_lock_queues();
1924			if ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL &&
1925				(p->busy == 0) &&
1926			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1927				if ((p->queue - p->pc) == PQ_CACHE)
1928					vm_page_deactivate(p);
1929				vm_page_busy(p);
1930				vm_page_unlock_queues();
1931				pmap_enter_quick(pmap,
1932						 addr + ia64_ptob(tmpidx), p);
1933				vm_page_lock_queues();
1934				vm_page_wakeup(p);
1935			}
1936			vm_page_unlock_queues();
1937		}
1938	}
1939	pmap_install(oldpmap);
1940	return;
1941}
1942
1943/*
1944 * pmap_prefault provides a quick way of clustering
1945 * pagefaults into a processes address space.  It is a "cousin"
1946 * of pmap_object_init_pt, except it runs at page fault time instead
1947 * of mmap time.
1948 */
1949#define PFBAK 4
1950#define PFFOR 4
1951#define PAGEORDER_SIZE (PFBAK+PFFOR)
1952
1953static int pmap_prefault_pageorder[] = {
1954	-1 * PAGE_SIZE, 1 * PAGE_SIZE,
1955	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
1956	-3 * PAGE_SIZE, 3 * PAGE_SIZE,
1957	-4 * PAGE_SIZE, 4 * PAGE_SIZE
1958};
1959
1960void
1961pmap_prefault(pmap, addra, entry)
1962	pmap_t pmap;
1963	vm_offset_t addra;
1964	vm_map_entry_t entry;
1965{
1966	int i;
1967	vm_offset_t starta;
1968	vm_offset_t addr;
1969	vm_pindex_t pindex;
1970	vm_page_t m, mpte;
1971	vm_object_t object;
1972
1973	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)))
1974		return;
1975
1976	object = entry->object.vm_object;
1977
1978	starta = addra - PFBAK * PAGE_SIZE;
1979	if (starta < entry->start) {
1980		starta = entry->start;
1981	} else if (starta > addra) {
1982		starta = 0;
1983	}
1984
1985	mpte = NULL;
1986	for (i = 0; i < PAGEORDER_SIZE; i++) {
1987		vm_object_t lobject;
1988		struct ia64_lpte *pte;
1989
1990		addr = addra + pmap_prefault_pageorder[i];
1991		if (addr > addra + (PFFOR * PAGE_SIZE))
1992			addr = 0;
1993
1994		if (addr < starta || addr >= entry->end)
1995			continue;
1996
1997		pte = pmap_find_vhpt(addr);
1998		if (pte && pte->pte_p)
1999			continue;
2000
2001		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
2002		lobject = object;
2003		for (m = vm_page_lookup(lobject, pindex);
2004		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
2005		    lobject = lobject->backing_object) {
2006			if (lobject->backing_object_offset & PAGE_MASK)
2007				break;
2008			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
2009			m = vm_page_lookup(lobject->backing_object, pindex);
2010		}
2011
2012		/*
2013		 * give-up when a page is not in memory
2014		 */
2015		if (m == NULL)
2016			break;
2017		vm_page_lock_queues();
2018		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2019			(m->busy == 0) &&
2020		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2021
2022			if ((m->queue - m->pc) == PQ_CACHE) {
2023				vm_page_deactivate(m);
2024			}
2025			vm_page_busy(m);
2026			vm_page_unlock_queues();
2027			pmap_enter_quick(pmap, addr, m);
2028			vm_page_lock_queues();
2029			vm_page_wakeup(m);
2030		}
2031		vm_page_unlock_queues();
2032	}
2033}
2034
2035/*
2036 *	Routine:	pmap_change_wiring
2037 *	Function:	Change the wiring attribute for a map/virtual-address
2038 *			pair.
2039 *	In/out conditions:
2040 *			The mapping must already exist in the pmap.
2041 */
2042void
2043pmap_change_wiring(pmap, va, wired)
2044	register pmap_t pmap;
2045	vm_offset_t va;
2046	boolean_t wired;
2047{
2048	pmap_t oldpmap;
2049	struct ia64_lpte *pte;
2050
2051	if (pmap == NULL)
2052		return;
2053
2054	oldpmap = pmap_install(pmap);
2055
2056	pte = pmap_find_vhpt(va);
2057	KASSERT(pte != NULL, ("pte"));
2058	if (wired && !pmap_pte_w(pte))
2059		pmap->pm_stats.wired_count++;
2060	else if (!wired && pmap_pte_w(pte))
2061		pmap->pm_stats.wired_count--;
2062
2063	/*
2064	 * Wiring is not a hardware characteristic so there is no need to
2065	 * invalidate TLB.
2066	 */
2067	pmap_pte_set_w(pte, wired);
2068
2069	pmap_install(oldpmap);
2070}
2071
2072
2073
2074/*
2075 *	Copy the range specified by src_addr/len
2076 *	from the source map to the range dst_addr/len
2077 *	in the destination map.
2078 *
2079 *	This routine is only advisory and need not do anything.
2080 */
2081
2082void
2083pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2084	  vm_offset_t src_addr)
2085{
2086}
2087
2088
2089/*
2090 *	pmap_zero_page zeros the specified hardware page by
2091 *	mapping it into virtual memory and using bzero to clear
2092 *	its contents.
2093 */
2094
2095void
2096pmap_zero_page(vm_page_t m)
2097{
2098	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2099	bzero((caddr_t) va, PAGE_SIZE);
2100}
2101
2102
2103/*
2104 *	pmap_zero_page_area zeros the specified hardware page by
2105 *	mapping it into virtual memory and using bzero to clear
2106 *	its contents.
2107 *
2108 *	off and size must reside within a single page.
2109 */
2110
2111void
2112pmap_zero_page_area(vm_page_t m, int off, int size)
2113{
2114	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2115	bzero((char *)(caddr_t)va + off, size);
2116}
2117
2118
2119/*
2120 *	pmap_zero_page_idle zeros the specified hardware page by
2121 *	mapping it into virtual memory and using bzero to clear
2122 *	its contents.  This is for the vm_idlezero process.
2123 */
2124
2125void
2126pmap_zero_page_idle(vm_page_t m)
2127{
2128	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2129	bzero((caddr_t) va, PAGE_SIZE);
2130}
2131
2132
2133/*
2134 *	pmap_copy_page copies the specified (machine independent)
2135 *	page by mapping the page into virtual memory and using
2136 *	bcopy to copy the page, one machine dependent page at a
2137 *	time.
2138 */
2139void
2140pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2141{
2142	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
2143	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
2144	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
2145}
2146
2147/*
2148 * Returns true if the pmap's pv is one of the first
2149 * 16 pvs linked to from this page.  This count may
2150 * be changed upwards or downwards in the future; it
2151 * is only necessary that true be returned for a small
2152 * subset of pmaps for proper page aging.
2153 */
2154boolean_t
2155pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2156{
2157	pv_entry_t pv;
2158	int loops = 0;
2159	int s;
2160
2161	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2162		return FALSE;
2163
2164	s = splvm();
2165
2166	/*
2167	 * Not found, check current mappings returning immediately if found.
2168	 */
2169	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2170		if (pv->pv_pmap == pmap) {
2171			splx(s);
2172			return TRUE;
2173		}
2174		loops++;
2175		if (loops >= 16)
2176			break;
2177	}
2178	splx(s);
2179	return (FALSE);
2180}
2181
2182#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2183/*
2184 * Remove all pages from specified address space
2185 * this aids process exit speeds.  Also, this code
2186 * is special cased for current process only, but
2187 * can have the more generic (and slightly slower)
2188 * mode enabled.  This is much faster than pmap_remove
2189 * in the case of running down an entire address space.
2190 */
2191void
2192pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2193{
2194	pv_entry_t pv, npv;
2195	int s;
2196
2197#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2198	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
2199		printf("warning: pmap_remove_pages called with non-current pmap\n");
2200		return;
2201	}
2202#endif
2203
2204	s = splvm();
2205	for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
2206		pv;
2207		pv = npv) {
2208		struct ia64_lpte *pte;
2209
2210		npv = TAILQ_NEXT(pv, pv_plist);
2211
2212		if (pv->pv_va >= eva || pv->pv_va < sva) {
2213			continue;
2214		}
2215
2216		pte = pmap_find_vhpt(pv->pv_va);
2217		KASSERT(pte != NULL, ("pte"));
2218		if (pte->pte_ig & PTE_IG_WIRED)
2219			continue;
2220
2221		pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
2222	}
2223	splx(s);
2224
2225	pmap_invalidate_all(pmap);
2226}
2227
2228/*
2229 *      pmap_page_protect:
2230 *
2231 *      Lower the permission for all mappings to a given page.
2232 */
2233void
2234pmap_page_protect(vm_page_t m, vm_prot_t prot)
2235{
2236	pv_entry_t pv;
2237
2238	if ((prot & VM_PROT_WRITE) != 0)
2239		return;
2240	if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
2241		if ((m->flags & PG_WRITEABLE) == 0)
2242			return;
2243		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2244			int newprot = pte_prot(pv->pv_pmap, prot);
2245			pmap_t oldpmap = pmap_install(pv->pv_pmap);
2246			struct ia64_lpte *pte;
2247			pte = pmap_find_vhpt(pv->pv_va);
2248			KASSERT(pte != NULL, ("pte"));
2249			pmap_pte_set_prot(pte, newprot);
2250			pmap_update_vhpt(pte, pv->pv_va);
2251			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2252			pmap_install(oldpmap);
2253		}
2254		vm_page_flag_clear(m, PG_WRITEABLE);
2255	} else {
2256		pmap_remove_all(m);
2257	}
2258}
2259
2260/*
2261 *	pmap_ts_referenced:
2262 *
2263 *	Return a count of reference bits for a page, clearing those bits.
2264 *	It is not necessary for every reference bit to be cleared, but it
2265 *	is necessary that 0 only be returned when there are truly no
2266 *	reference bits set.
2267 *
2268 *	XXX: The exact number of bits to check and clear is a matter that
2269 *	should be tested and standardized at some point in the future for
2270 *	optimal aging of shared pages.
2271 */
2272int
2273pmap_ts_referenced(vm_page_t m)
2274{
2275	pv_entry_t pv;
2276	int count = 0;
2277
2278	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2279		return 0;
2280
2281	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2282		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2283		struct ia64_lpte *pte;
2284		pte = pmap_find_vhpt(pv->pv_va);
2285		KASSERT(pte != NULL, ("pte"));
2286		if (pte->pte_a) {
2287			count++;
2288			pte->pte_a = 0;
2289			pmap_update_vhpt(pte, pv->pv_va);
2290			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2291		}
2292		pmap_install(oldpmap);
2293	}
2294
2295	return count;
2296}
2297
2298#if 0
2299/*
2300 *	pmap_is_referenced:
2301 *
2302 *	Return whether or not the specified physical page was referenced
2303 *	in any physical maps.
2304 */
2305static boolean_t
2306pmap_is_referenced(vm_page_t m)
2307{
2308	pv_entry_t pv;
2309
2310	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2311		return FALSE;
2312
2313	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2314		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2315		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2316		pmap_install(oldpmap);
2317		KASSERT(pte != NULL, ("pte"));
2318		if (pte->pte_a)
2319			return 1;
2320	}
2321
2322	return 0;
2323}
2324#endif
2325
2326/*
2327 *	pmap_is_modified:
2328 *
2329 *	Return whether or not the specified physical page was modified
2330 *	in any physical maps.
2331 */
2332boolean_t
2333pmap_is_modified(vm_page_t m)
2334{
2335	pv_entry_t pv;
2336
2337	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2338		return FALSE;
2339
2340	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2341		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2342		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2343		pmap_install(oldpmap);
2344		KASSERT(pte != NULL, ("pte"));
2345		if (pte->pte_d)
2346			return 1;
2347	}
2348
2349	return 0;
2350}
2351
2352/*
2353 *	Clear the modify bits on the specified physical page.
2354 */
2355void
2356pmap_clear_modify(vm_page_t m)
2357{
2358	pv_entry_t pv;
2359
2360	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2361		return;
2362
2363	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2364		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2365		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2366		KASSERT(pte != NULL, ("pte"));
2367		if (pte->pte_d) {
2368			pte->pte_d = 0;
2369			pmap_update_vhpt(pte, pv->pv_va);
2370			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2371		}
2372		pmap_install(oldpmap);
2373	}
2374}
2375
2376/*
2377 *	pmap_clear_reference:
2378 *
2379 *	Clear the reference bit on the specified physical page.
2380 */
2381void
2382pmap_clear_reference(vm_page_t m)
2383{
2384	pv_entry_t pv;
2385
2386	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2387		return;
2388
2389	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2390		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2391		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2392		KASSERT(pte != NULL, ("pte"));
2393		if (pte->pte_a) {
2394			pte->pte_a = 0;
2395			pmap_update_vhpt(pte, pv->pv_va);
2396			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2397		}
2398		pmap_install(oldpmap);
2399	}
2400}
2401
2402/*
2403 * Miscellaneous support routines follow
2404 */
2405
2406static void
2407ia64_protection_init()
2408{
2409	int prot, *kp, *up;
2410
2411	kp = protection_codes[0];
2412	up = protection_codes[1];
2413
2414	for (prot = 0; prot < 8; prot++) {
2415		switch (prot) {
2416		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
2417			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2418			*up++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2419			break;
2420
2421		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
2422			*kp++ = (PTE_AR_X_RX << 2) | PTE_PL_KERN;
2423			*up++ = (PTE_AR_X_RX << 2) | PTE_PL_USER;
2424			break;
2425
2426		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
2427			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2428			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2429			break;
2430
2431		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
2432			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2433			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2434			break;
2435
2436		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
2437			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2438			*up++ = (PTE_AR_R << 2) | PTE_PL_USER;
2439			break;
2440
2441		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
2442			*kp++ = (PTE_AR_RX << 2) | PTE_PL_KERN;
2443			*up++ = (PTE_AR_RX << 2) | PTE_PL_USER;
2444			break;
2445
2446		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
2447			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2448			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2449			break;
2450
2451		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
2452			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2453			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2454			break;
2455		}
2456	}
2457}
2458
2459/*
2460 * Map a set of physical memory pages into the kernel virtual
2461 * address space. Return a pointer to where it is mapped. This
2462 * routine is intended to be used for mapping device memory,
2463 * NOT real memory.
2464 */
2465void *
2466pmap_mapdev(vm_offset_t pa, vm_size_t size)
2467{
2468	return (void*) IA64_PHYS_TO_RR6(pa);
2469}
2470
2471/*
2472 * 'Unmap' a range mapped by pmap_mapdev().
2473 */
2474void
2475pmap_unmapdev(vm_offset_t va, vm_size_t size)
2476{
2477	return;
2478}
2479
2480/*
2481 * perform the pmap work for mincore
2482 */
2483int
2484pmap_mincore(pmap_t pmap, vm_offset_t addr)
2485{
2486	pmap_t oldpmap;
2487	struct ia64_lpte *pte;
2488	int val = 0;
2489
2490	oldpmap = pmap_install(pmap);
2491	pte = pmap_find_vhpt(addr);
2492	pmap_install(oldpmap);
2493
2494	if (!pte)
2495		return 0;
2496
2497	if (pmap_pte_v(pte)) {
2498		vm_page_t m;
2499		vm_offset_t pa;
2500
2501		val = MINCORE_INCORE;
2502		if ((pte->pte_ig & PTE_IG_MANAGED) == 0)
2503			return val;
2504
2505		pa = pmap_pte_pa(pte);
2506
2507		m = PHYS_TO_VM_PAGE(pa);
2508
2509		/*
2510		 * Modified by us
2511		 */
2512		if (pte->pte_d)
2513			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2514		/*
2515		 * Modified by someone
2516		 */
2517		else if (pmap_is_modified(m))
2518			val |= MINCORE_MODIFIED_OTHER;
2519		/*
2520		 * Referenced by us
2521		 */
2522		if (pte->pte_a)
2523			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2524
2525		/*
2526		 * Referenced by someone
2527		 */
2528		else if (pmap_ts_referenced(m)) {
2529			val |= MINCORE_REFERENCED_OTHER;
2530			vm_page_flag_set(m, PG_REFERENCED);
2531		}
2532	}
2533	return val;
2534}
2535
2536void
2537pmap_activate(struct thread *td)
2538{
2539	pmap_install(vmspace_pmap(td->td_proc->p_vmspace));
2540}
2541
2542pmap_t
2543pmap_switch(pmap_t pm)
2544{
2545	pmap_t prevpm;
2546	int i;
2547
2548	mtx_assert(&sched_lock, MA_OWNED);
2549
2550	prevpm = PCPU_GET(current_pmap);
2551	if (prevpm == pm)
2552		return (prevpm);
2553	if (prevpm != NULL)
2554		atomic_clear_32(&prevpm->pm_active, PCPU_GET(cpumask));
2555	if (pm == NULL) {
2556		for (i = 0; i < 5; i++) {
2557			ia64_set_rr(IA64_RR_BASE(i),
2558			    (i << 8)|(PAGE_SHIFT << 2)|1);
2559		}
2560	} else {
2561		for (i = 0; i < 5; i++) {
2562			ia64_set_rr(IA64_RR_BASE(i),
2563			    (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2564		}
2565		atomic_set_32(&pm->pm_active, PCPU_GET(cpumask));
2566	}
2567	PCPU_SET(current_pmap, pm);
2568	__asm __volatile("srlz.d");
2569	return (prevpm);
2570}
2571
2572static pmap_t
2573pmap_install(pmap_t pm)
2574{
2575	pmap_t prevpm;
2576
2577	mtx_lock_spin(&sched_lock);
2578	prevpm = pmap_switch(pm);
2579	mtx_unlock_spin(&sched_lock);
2580	return (prevpm);
2581}
2582
2583vm_offset_t
2584pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2585{
2586
2587	return addr;
2588}
2589
2590#include "opt_ddb.h"
2591
2592#ifdef DDB
2593
2594#include <ddb/ddb.h>
2595
2596static const char*	psnames[] = {
2597	"1B",	"2B",	"4B",	"8B",
2598	"16B",	"32B",	"64B",	"128B",
2599	"256B",	"512B",	"1K",	"2K",
2600	"4K",	"8K",	"16K",	"32K",
2601	"64K",	"128K",	"256K",	"512K",
2602	"1M",	"2M",	"4M",	"8M",
2603	"16M",	"32M",	"64M",	"128M",
2604	"256M",	"512M",	"1G",	"2G"
2605};
2606
2607static void
2608print_trs(int type)
2609{
2610	struct ia64_pal_result	res;
2611	int			i, maxtr;
2612	struct {
2613		struct ia64_pte	pte;
2614		struct ia64_itir itir;
2615		struct ia64_ifa ifa;
2616		struct ia64_rr	rr;
2617	}			buf;
2618	static const char*	manames[] = {
2619		"WB",	"bad",	"bad",	"bad",
2620		"UC",	"UCE",	"WC",	"NaT",
2621
2622	};
2623
2624	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2625	if (res.pal_status != 0) {
2626		db_printf("Can't get VM summary\n");
2627		return;
2628	}
2629
2630	if (type == 0)
2631		maxtr = (res.pal_result[0] >> 40) & 0xff;
2632	else
2633		maxtr = (res.pal_result[0] >> 32) & 0xff;
2634
2635	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2636	for (i = 0; i <= maxtr; i++) {
2637		bzero(&buf, sizeof(buf));
2638		res = ia64_call_pal_stacked_physical
2639			(PAL_VM_TR_READ, i, type, ia64_tpa((u_int64_t) &buf));
2640		if (!(res.pal_result[0] & 1))
2641			buf.pte.pte_ar = 0;
2642		if (!(res.pal_result[0] & 2))
2643			buf.pte.pte_pl = 0;
2644		if (!(res.pal_result[0] & 4))
2645			buf.pte.pte_d = 0;
2646		if (!(res.pal_result[0] & 8))
2647			buf.pte.pte_ma = 0;
2648		db_printf(
2649			"%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s %d %06x\n",
2650			buf.ifa.ifa_ig & 1,
2651			buf.rr.rr_rid,
2652			buf.ifa.ifa_vpn,
2653			buf.pte.pte_ppn,
2654			psnames[buf.itir.itir_ps],
2655			buf.pte.pte_ed,
2656			buf.pte.pte_ar,
2657			buf.pte.pte_pl,
2658			buf.pte.pte_d,
2659			buf.pte.pte_a,
2660			manames[buf.pte.pte_ma],
2661			buf.pte.pte_p,
2662			buf.itir.itir_key);
2663	}
2664}
2665
2666DB_COMMAND(itr, db_itr)
2667{
2668	print_trs(0);
2669}
2670
2671DB_COMMAND(dtr, db_dtr)
2672{
2673	print_trs(1);
2674}
2675
2676DB_COMMAND(rr, db_rr)
2677{
2678	int i;
2679	u_int64_t t;
2680	struct ia64_rr rr;
2681
2682	printf("RR RID    PgSz VE\n");
2683	for (i = 0; i < 8; i++) {
2684		__asm __volatile ("mov %0=rr[%1]"
2685				  : "=r"(t)
2686				  : "r"(IA64_RR_BASE(i)));
2687		*(u_int64_t *) &rr = t;
2688		printf("%d  %06x %4s %d\n",
2689		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2690	}
2691}
2692
2693DB_COMMAND(thash, db_thash)
2694{
2695	if (!have_addr)
2696		return;
2697
2698	db_printf("%p\n", (void *) ia64_thash(addr));
2699}
2700
2701DB_COMMAND(ttag, db_ttag)
2702{
2703	if (!have_addr)
2704		return;
2705
2706	db_printf("0x%lx\n", ia64_ttag(addr));
2707}
2708
2709#endif
2710