pmap.c revision 115059
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 * $FreeBSD: head/sys/ia64/ia64/pmap.c 115059 2003-05-16 07:05:08Z marcel $
47 */
48
49/*
50 *	Manages physical address maps.
51 *
52 *	In addition to hardware address maps, this
53 *	module is called upon to provide software-use-only
54 *	maps which may or may not be stored in the same
55 *	form as hardware maps.  These pseudo-maps are
56 *	used to store intermediate results from copy
57 *	operations to and from address spaces.
58 *
59 *	Since the information managed by this module is
60 *	also stored by the logical address mapping module,
61 *	this module may throw away valid virtual-to-physical
62 *	mappings at almost any time.  However, invalidations
63 *	of virtual-to-physical mappings must be done as
64 *	requested.
65 *
66 *	In order to cope with hardware architectures which
67 *	make virtual-to-physical map invalidates expensive,
68 *	this module may delay invalidate or reduced protection
69 *	operations until such time as they are actually
70 *	necessary.  This module is given full information as
71 *	to which processors are currently using which maps,
72 *	and to when physical maps must be made correct.
73 */
74
75/*
76 * Following the Linux model, region IDs are allocated in groups of
77 * eight so that a single region ID can be used for as many RRs as we
78 * want by encoding the RR number into the low bits of the ID.
79 *
80 * We reserve region ID 0 for the kernel and allocate the remaining
81 * IDs for user pmaps.
82 *
83 * Region 0..4
84 *	User virtually mapped
85 *
86 * Region 5
87 *	Kernel virtually mapped
88 *
89 * Region 6
90 *	Kernel physically mapped uncacheable
91 *
92 * Region 7
93 *	Kernel physically mapped cacheable
94 */
95
96#include <sys/param.h>
97#include <sys/kernel.h>
98#include <sys/lock.h>
99#include <sys/malloc.h>
100#include <sys/mman.h>
101#include <sys/msgbuf.h>
102#include <sys/mutex.h>
103#include <sys/proc.h>
104#include <sys/sx.h>
105#include <sys/systm.h>
106#include <sys/vmmeter.h>
107#include <sys/smp.h>
108#include <sys/sysctl.h>
109
110#include <vm/vm.h>
111#include <vm/vm_param.h>
112#include <vm/vm_kern.h>
113#include <vm/vm_page.h>
114#include <vm/vm_map.h>
115#include <vm/vm_object.h>
116#include <vm/vm_extern.h>
117#include <vm/vm_pageout.h>
118#include <vm/vm_pager.h>
119#include <vm/uma.h>
120#include <vm/uma_int.h>
121
122#include <sys/user.h>
123
124#include <machine/pal.h>
125#include <machine/md_var.h>
126
127MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
128
129#ifndef KSTACK_MAX_PAGES
130#define	KSTACK_MAX_PAGES 32
131#endif
132
133#ifndef PMAP_SHPGPERPROC
134#define PMAP_SHPGPERPROC 200
135#endif
136
137#if defined(DIAGNOSTIC)
138#define PMAP_DIAGNOSTIC
139#endif
140
141#define MINPV 2048	/* Preallocate at least this many */
142#define MAXPV 20480	/* But no more than this */
143
144#if 0
145#define PMAP_DIAGNOSTIC
146#define PMAP_DEBUG
147#endif
148
149#if !defined(PMAP_DIAGNOSTIC)
150#define PMAP_INLINE __inline
151#else
152#define PMAP_INLINE
153#endif
154
155/*
156 * Get PDEs and PTEs for user/kernel address space
157 */
158#define pmap_pte_w(pte)		((pte)->pte_ig & PTE_IG_WIRED)
159#define pmap_pte_managed(pte)	((pte)->pte_ig & PTE_IG_MANAGED)
160#define pmap_pte_v(pte)		((pte)->pte_p)
161#define pmap_pte_pa(pte)	(((pte)->pte_ppn) << 12)
162#define pmap_pte_prot(pte)	(((pte)->pte_ar << 2) | (pte)->pte_pl)
163
164#define pmap_pte_set_w(pte, v) ((v)?((pte)->pte_ig |= PTE_IG_WIRED) \
165				:((pte)->pte_ig &= ~PTE_IG_WIRED))
166#define pmap_pte_set_prot(pte, v) do {		\
167    (pte)->pte_ar = v >> 2;			\
168    (pte)->pte_pl = v & 3;			\
169} while (0)
170
171/*
172 * Given a map and a machine independent protection code,
173 * convert to an ia64 protection code.
174 */
175#define pte_prot(m, p)		(protection_codes[m == kernel_pmap ? 0 : 1][p])
176#define pte_prot_pl(m, p)	(pte_prot(m, p) & 3)
177#define pte_prot_ar(m, p)	(pte_prot(m, p) >> 2)
178int	protection_codes[2][8];
179
180/*
181 * Return non-zero if this pmap is currently active
182 */
183#define pmap_isactive(pmap)	(pmap->pm_active)
184
185/*
186 * Statically allocated kernel pmap
187 */
188struct pmap kernel_pmap_store;
189
190vm_offset_t avail_start;	/* PA of first available physical page */
191vm_offset_t avail_end;		/* PA of last available physical page */
192vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
193vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
194static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
195
196vm_offset_t vhpt_base, vhpt_size;
197
198/*
199 * We use an object to own the kernel's 'page tables'. For simplicity,
200 * we use one page directory to index a set of pages containing
201 * ia64_lptes. This gives us up to 2Gb of kernel virtual space.
202 */
203static int nkpt;
204static struct ia64_lpte **kptdir;
205#define KPTE_DIR_INDEX(va) \
206	((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
207#define KPTE_PTE_INDEX(va) \
208	((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
209#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
210
211vm_offset_t kernel_vm_end;
212
213/* Values for ptc.e. XXX values for SKI. */
214static u_int64_t pmap_ptc_e_base = 0x100000000;
215static u_int64_t pmap_ptc_e_count1 = 3;
216static u_int64_t pmap_ptc_e_count2 = 2;
217static u_int64_t pmap_ptc_e_stride1 = 0x2000;
218static u_int64_t pmap_ptc_e_stride2 = 0x100000000;
219
220/*
221 * Data for the RID allocator
222 */
223static int pmap_ridcount;
224static int pmap_rididx;
225static int pmap_ridmapsz;
226static int pmap_ridmax;
227static u_int64_t *pmap_ridmap;
228struct mtx pmap_ridmutex;
229
230/*
231 * Data for the pv entry allocation mechanism
232 */
233static uma_zone_t pvzone;
234static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
235int pmap_pagedaemon_waken;
236static struct pv_entry *pvbootentries;
237static int pvbootnext, pvbootmax;
238
239/*
240 * Data for allocating PTEs for user processes.
241 */
242static uma_zone_t ptezone;
243
244/*
245 * VHPT instrumentation.
246 */
247static int pmap_vhpt_inserts;
248static int pmap_vhpt_collisions;
249static int pmap_vhpt_resident;
250SYSCTL_DECL(_vm_stats);
251SYSCTL_NODE(_vm_stats, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
252SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
253	   &pmap_vhpt_inserts, 0, "");
254SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, collisions, CTLFLAG_RD,
255	   &pmap_vhpt_collisions, 0, "");
256SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, resident, CTLFLAG_RD,
257	   &pmap_vhpt_resident, 0, "");
258
259static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
260static pv_entry_t get_pv_entry(void);
261static void	ia64_protection_init(void);
262
263static void	pmap_invalidate_all(pmap_t pmap);
264static void	pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m);
265
266vm_offset_t
267pmap_steal_memory(vm_size_t size)
268{
269	vm_size_t bank_size;
270	vm_offset_t pa, va;
271
272	size = round_page(size);
273
274	bank_size = phys_avail[1] - phys_avail[0];
275	while (size > bank_size) {
276		int i;
277		for (i = 0; phys_avail[i+2]; i+= 2) {
278			phys_avail[i] = phys_avail[i+2];
279			phys_avail[i+1] = phys_avail[i+3];
280		}
281		phys_avail[i] = 0;
282		phys_avail[i+1] = 0;
283		if (!phys_avail[0])
284			panic("pmap_steal_memory: out of memory");
285		bank_size = phys_avail[1] - phys_avail[0];
286	}
287
288	pa = phys_avail[0];
289	phys_avail[0] += size;
290
291	va = IA64_PHYS_TO_RR7(pa);
292	bzero((caddr_t) va, size);
293	return va;
294}
295
296/*
297 *	Bootstrap the system enough to run with virtual memory.
298 */
299void
300pmap_bootstrap()
301{
302	int i, j, count, ridbits;
303	struct ia64_pal_result res;
304
305	/*
306	 * Query the PAL Code to find the loop parameters for the
307	 * ptc.e instruction.
308	 */
309	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
310	if (res.pal_status != 0)
311		panic("Can't configure ptc.e parameters");
312	pmap_ptc_e_base = res.pal_result[0];
313	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
314	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
315	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
316	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
317	if (bootverbose)
318		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
319		       "stride1=0x%lx, stride2=0x%lx\n",
320		       pmap_ptc_e_base,
321		       pmap_ptc_e_count1,
322		       pmap_ptc_e_count2,
323		       pmap_ptc_e_stride1,
324		       pmap_ptc_e_stride2);
325
326	/*
327	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
328	 *
329	 * We currently need at least 19 bits in the RID because PID_MAX
330	 * can only be encoded in 17 bits and we need RIDs for 5 regions
331	 * per process. With PID_MAX equalling 99999 this means that we
332	 * need to be able to encode 499995 (=5*PID_MAX).
333	 * The Itanium processor only has 18 bits and the architected
334	 * minimum is exactly that. So, we cannot use a PID based scheme
335	 * in those cases. Enter pmap_ridmap...
336	 * We should avoid the map when running on a processor that has
337	 * implemented enough bits. This means that we should pass the
338	 * process/thread ID to pmap. This we currently don't do, so we
339	 * use the map anyway. However, we don't want to allocate a map
340	 * that is large enough to cover the range dictated by the number
341	 * of bits in the RID, because that may result in a RID map of
342	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
343	 * The bottomline: we create a 32KB map when the processor only
344	 * implements 18 bits (or when we can't figure it out). Otherwise
345	 * we create a 64KB map.
346	 */
347	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
348	if (res.pal_status != 0) {
349		if (bootverbose)
350			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
351		ridbits = 18; /* guaranteed minimum */
352	} else {
353		ridbits = (res.pal_result[1] >> 8) & 0xff;
354		if (bootverbose)
355			printf("Processor supports %d Region ID bits\n",
356			    ridbits);
357	}
358	if (ridbits > 19)
359		ridbits = 19;
360
361	pmap_ridmax = (1 << ridbits);
362	pmap_ridmapsz = pmap_ridmax / 64;
363	pmap_ridmap = (u_int64_t *)pmap_steal_memory(pmap_ridmax / 8);
364	pmap_ridmap[0] |= 0xff;
365	pmap_rididx = 0;
366	pmap_ridcount = 8;
367	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
368
369	/*
370	 * Allocate some memory for initial kernel 'page tables'.
371	 */
372	kptdir = (struct ia64_lpte **) pmap_steal_memory(PAGE_SIZE);
373	for (i = 0; i < NKPT; i++) {
374		kptdir[i] = (struct ia64_lpte *) pmap_steal_memory(PAGE_SIZE);
375	}
376	nkpt = NKPT;
377	kernel_vm_end = NKPT * PAGE_SIZE * NKPTEPG + VM_MIN_KERNEL_ADDRESS;
378
379	avail_start = phys_avail[0];
380	for (i = 0; phys_avail[i+2]; i+= 2) ;
381	avail_end = phys_avail[i+1];
382	count = i+2;
383
384	/*
385	 * Figure out a useful size for the VHPT, based on the size of
386	 * physical memory and try to locate a region which is large
387	 * enough to contain the VHPT (which must be a power of two in
388	 * size and aligned to a natural boundary).
389	 * Don't use the difference between avail_start and avail_end
390	 * as a measure for memory size. The address space is often
391	 * enough sparse, causing us to (try to) create a huge VHPT.
392	 */
393	vhpt_size = 15;
394	while ((1<<vhpt_size) < Maxmem * 32)
395		vhpt_size++;
396
397	vhpt_base = 0;
398	while (!vhpt_base) {
399		vm_offset_t mask;
400		if (bootverbose)
401			printf("Trying VHPT size 0x%lx\n", (1L<<vhpt_size));
402		mask = (1L << vhpt_size) - 1;
403		for (i = 0; i < count; i += 2) {
404			vm_offset_t base, limit;
405			base = (phys_avail[i] + mask) & ~mask;
406			limit = base + (1L << vhpt_size);
407			if (limit <= phys_avail[i+1])
408				/*
409				 * VHPT can fit in this region
410				 */
411				break;
412		}
413		if (!phys_avail[i]) {
414			/*
415			 * Can't fit, try next smaller size.
416			 */
417			vhpt_size--;
418		} else {
419			vhpt_base = (phys_avail[i] + mask) & ~mask;
420		}
421	}
422	if (vhpt_size < 15)
423		panic("Can't find space for VHPT");
424
425	if (bootverbose)
426		printf("Putting VHPT at %p\n", (void *) vhpt_base);
427	if (vhpt_base != phys_avail[i]) {
428		/*
429		 * Split this region.
430		 */
431		if (bootverbose)
432			printf("Splitting [%p-%p]\n",
433			       (void *) phys_avail[i],
434			       (void *) phys_avail[i+1]);
435		for (j = count; j > i; j -= 2) {
436			phys_avail[j] = phys_avail[j-2];
437			phys_avail[j+1] = phys_avail[j-2+1];
438		}
439		phys_avail[count+2] = 0;
440		phys_avail[count+3] = 0;
441		phys_avail[i+1] = vhpt_base;
442		phys_avail[i+2] = vhpt_base + (1L << vhpt_size);
443	} else {
444		phys_avail[i] = vhpt_base + (1L << vhpt_size);
445	}
446
447	vhpt_base = IA64_PHYS_TO_RR7(vhpt_base);
448	bzero((void *) vhpt_base, (1L << vhpt_size));
449	__asm __volatile("mov cr.pta=%0;; srlz.i;;"
450			 :: "r" (vhpt_base + (1<<8) + (vhpt_size<<2) + 1));
451
452	virtual_avail = VM_MIN_KERNEL_ADDRESS;
453	virtual_end = VM_MAX_KERNEL_ADDRESS;
454
455	/*
456	 * Initialize protection array.
457	 */
458	ia64_protection_init();
459
460	/*
461	 * Initialize the kernel pmap (which is statically allocated).
462	 */
463	for (i = 0; i < 5; i++)
464		kernel_pmap->pm_rid[i] = 0;
465	kernel_pmap->pm_active = 1;
466	TAILQ_INIT(&kernel_pmap->pm_pvlist);
467	PCPU_SET(current_pmap, kernel_pmap);
468
469	/*
470	 * Region 5 is mapped via the vhpt.
471	 */
472	ia64_set_rr(IA64_RR_BASE(5),
473		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
474
475	/*
476	 * Region 6 is direct mapped UC and region 7 is direct mapped
477	 * WC. The details of this is controlled by the Alt {I,D}TLB
478	 * handlers. Here we just make sure that they have the largest
479	 * possible page size to minimise TLB usage.
480	 */
481	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (28 << 2));
482	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (28 << 2));
483
484	/*
485	 * Reserve some memory for allocating pvs while bootstrapping
486	 * the pv allocator. We need to have enough to cover mapping
487	 * the kmem_alloc region used to allocate the initial_pvs in
488	 * pmap_init. In general, the size of this region is
489	 * approximately (# physical pages) * (size of pv entry).
490	 */
491	pvbootmax = ((physmem * sizeof(struct pv_entry)) >> PAGE_SHIFT) + 128;
492	pvbootentries = (struct pv_entry *)
493		pmap_steal_memory(pvbootmax * sizeof(struct pv_entry));
494	pvbootnext = 0;
495
496	/*
497	 * Clear out any random TLB entries left over from booting.
498	 */
499	pmap_invalidate_all(kernel_pmap);
500}
501
502void *
503uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
504{
505	static vm_pindex_t color;
506	vm_page_t m;
507	int pflags;
508	void *va;
509
510	*flags = UMA_SLAB_PRIV;
511	if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT)
512		pflags = VM_ALLOC_INTERRUPT;
513	else
514		pflags = VM_ALLOC_SYSTEM;
515	if (wait & M_ZERO)
516		pflags |= VM_ALLOC_ZERO;
517
518	for (;;) {
519		m = vm_page_alloc(NULL, color++, pflags | VM_ALLOC_NOOBJ);
520		if (m == NULL) {
521			if (wait & M_NOWAIT)
522				return (NULL);
523			else
524				VM_WAIT;
525		} else
526			break;
527	}
528
529	va = (void *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
530	if ((m->flags & PG_ZERO) == 0)
531		bzero(va, PAGE_SIZE);
532	return (va);
533}
534
535void
536uma_small_free(void *mem, int size, u_int8_t flags)
537{
538	vm_page_t m;
539
540	m = PHYS_TO_VM_PAGE(IA64_RR_MASK((u_int64_t)mem));
541	vm_page_lock_queues();
542	vm_page_free(m);
543	vm_page_unlock_queues();
544}
545
546/*
547 *	Initialize the pmap module.
548 *	Called by vm_init, to initialize any structures that the pmap
549 *	system needs to map virtual memory.
550 *	pmap_init has been enhanced to support in a fairly consistant
551 *	way, discontiguous physical memory.
552 */
553void
554pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
555{
556	int i;
557	int initial_pvs;
558
559	/*
560	 * Allocate memory for random pmap data structures.  Includes the
561	 * pv_head_table.
562	 */
563
564	for(i = 0; i < vm_page_array_size; i++) {
565		vm_page_t m;
566
567		m = &vm_page_array[i];
568		TAILQ_INIT(&m->md.pv_list);
569		m->md.pv_list_count = 0;
570 	}
571
572	/*
573	 * Init the pv free list and the PTE free list.
574	 */
575	initial_pvs = vm_page_array_size;
576	if (initial_pvs < MINPV)
577		initial_pvs = MINPV;
578	if (initial_pvs > MAXPV)
579		initial_pvs = MAXPV;
580	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry),
581	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
582	uma_prealloc(pvzone, initial_pvs);
583
584	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
585	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
586	uma_prealloc(ptezone, initial_pvs);
587
588	/*
589	 * Now it is safe to enable pv_table recording.
590	 */
591	pmap_initialized = TRUE;
592}
593
594/*
595 * Initialize the address space (zone) for the pv_entries.  Set a
596 * high water mark so that the system can recover from excessive
597 * numbers of pv entries.
598 */
599void
600pmap_init2()
601{
602	int shpgperproc = PMAP_SHPGPERPROC;
603
604	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
605	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
606	pv_entry_high_water = 9 * (pv_entry_max / 10);
607}
608
609
610/***************************************************
611 * Manipulate TLBs for a pmap
612 ***************************************************/
613
614static void
615pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
616{
617	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
618		("invalidating TLB for non-current pmap"));
619	ia64_ptc_g(va, PAGE_SHIFT << 2);
620}
621
622static void
623pmap_invalidate_all_1(void *arg)
624{
625	u_int64_t addr;
626	int i, j;
627	register_t psr;
628
629	psr = intr_disable();
630	addr = pmap_ptc_e_base;
631	for (i = 0; i < pmap_ptc_e_count1; i++) {
632		for (j = 0; j < pmap_ptc_e_count2; j++) {
633			ia64_ptc_e(addr);
634			addr += pmap_ptc_e_stride2;
635		}
636		addr += pmap_ptc_e_stride1;
637	}
638	intr_restore(psr);
639}
640
641static void
642pmap_invalidate_all(pmap_t pmap)
643{
644	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
645		("invalidating TLB for non-current pmap"));
646
647
648#ifdef SMP
649	smp_rendezvous(0, pmap_invalidate_all_1, 0, 0);
650#else
651	pmap_invalidate_all_1(0);
652#endif
653}
654
655static u_int32_t
656pmap_allocate_rid(void)
657{
658	uint64_t bit, bits;
659	int rid;
660
661	mtx_lock(&pmap_ridmutex);
662	if (pmap_ridcount == pmap_ridmax)
663		panic("pmap_allocate_rid: All Region IDs used");
664
665	/* Find an index with a free bit. */
666	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
667		pmap_rididx++;
668		if (pmap_rididx == pmap_ridmapsz)
669			pmap_rididx = 0;
670	}
671	rid = pmap_rididx * 64;
672
673	/* Find a free bit. */
674	bit = 1UL;
675	while (bits & bit) {
676		rid++;
677		bit <<= 1;
678	}
679
680	pmap_ridmap[pmap_rididx] |= bit;
681	pmap_ridcount++;
682	mtx_unlock(&pmap_ridmutex);
683
684	return rid;
685}
686
687static void
688pmap_free_rid(u_int32_t rid)
689{
690	uint64_t bit;
691	int idx;
692
693	idx = rid / 64;
694	bit = ~(1UL << (rid & 63));
695
696	mtx_lock(&pmap_ridmutex);
697	pmap_ridmap[idx] &= bit;
698	pmap_ridcount--;
699	mtx_unlock(&pmap_ridmutex);
700}
701
702/***************************************************
703 * Low level helper routines.....
704 ***************************************************/
705
706/*
707 * Install a pte into the VHPT
708 */
709static PMAP_INLINE void
710pmap_install_pte(struct ia64_lpte *vhpte, struct ia64_lpte *pte)
711{
712	u_int64_t *vhp, *p;
713
714	/* invalidate the pte */
715	atomic_set_64(&vhpte->pte_tag, 1L << 63);
716	ia64_mf();			/* make sure everyone sees */
717
718	vhp = (u_int64_t *) vhpte;
719	p = (u_int64_t *) pte;
720
721	vhp[0] = p[0];
722	vhp[1] = p[1];
723	vhp[2] = p[2];			/* sets ti to one */
724
725	ia64_mf();
726}
727
728/*
729 * Compare essential parts of pte.
730 */
731static PMAP_INLINE int
732pmap_equal_pte(struct ia64_lpte *pte1, struct ia64_lpte *pte2)
733{
734	return *(u_int64_t *) pte1 == *(u_int64_t *) pte2;
735}
736
737/*
738 * this routine defines the region(s) of memory that should
739 * not be tested for the modified bit.
740 */
741static PMAP_INLINE int
742pmap_track_modified(vm_offset_t va)
743{
744	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
745		return 1;
746	else
747		return 0;
748}
749
750/*
751 * Create the KSTACK for a new thread.
752 * This routine directly affects the fork perf for a process/thread.
753 */
754void
755pmap_new_thread(struct thread *td, int pages)
756{
757	vm_offset_t *ks;
758
759	/* Bounds check */
760	if (pages <= 1)
761		pages = KSTACK_PAGES;
762	else if (pages > KSTACK_MAX_PAGES)
763		pages = KSTACK_MAX_PAGES;
764
765	/*
766	 * Use contigmalloc for user area so that we can use a region
767	 * 7 address for it which makes it impossible to accidentally
768	 * lose when recording a trapframe.
769	 */
770	ks = contigmalloc(pages * PAGE_SIZE, M_PMAP, M_WAITOK, 0ul,
771	    256*1024*1024 - 1, PAGE_SIZE, 256*1024*1024);
772	if (ks == NULL)
773		panic("pmap_new_thread: could not contigmalloc %d pages\n",
774		    pages);
775
776	td->td_md.md_kstackvirt = ks;
777	td->td_kstack = IA64_PHYS_TO_RR7(ia64_tpa((u_int64_t)ks));
778	td->td_kstack_pages = pages;
779}
780
781/*
782 * Dispose the KSTACK for a thread that has exited.
783 * This routine directly impacts the exit perf of a process/thread.
784 */
785void
786pmap_dispose_thread(struct thread *td)
787{
788	int pages;
789
790	pages = td->td_kstack_pages;
791	contigfree(td->td_md.md_kstackvirt, pages * PAGE_SIZE, M_PMAP);
792	td->td_md.md_kstackvirt = NULL;
793	td->td_kstack = 0;
794}
795
796/*
797 * Set up a variable sized alternate kstack.  This appears to be MI.
798 */
799void
800pmap_new_altkstack(struct thread *td, int pages)
801{
802
803	/*
804	 * Shuffle the original stack. Save the virtual kstack address
805	 * instead of the physical address because 1) we can derive the
806	 * physical address from the virtual address and 2) we need the
807	 * virtual address in pmap_dispose_thread.
808	 */
809	td->td_altkstack_obj = td->td_kstack_obj;
810	td->td_altkstack = (vm_offset_t)td->td_md.md_kstackvirt;
811	td->td_altkstack_pages = td->td_kstack_pages;
812
813	pmap_new_thread(td, pages);
814}
815
816void
817pmap_dispose_altkstack(struct thread *td)
818{
819
820	pmap_dispose_thread(td);
821
822	/*
823	 * Restore the original kstack. Note that td_altkstack holds the
824	 * virtual kstack address of the previous kstack.
825	 */
826	td->td_md.md_kstackvirt = (void*)td->td_altkstack;
827	td->td_kstack = IA64_PHYS_TO_RR7(ia64_tpa(td->td_altkstack));
828	td->td_kstack_obj = td->td_altkstack_obj;
829	td->td_kstack_pages = td->td_altkstack_pages;
830	td->td_altkstack = 0;
831	td->td_altkstack_obj = NULL;
832	td->td_altkstack_pages = 0;
833}
834
835/*
836 * Allow the KSTACK for a thread to be prejudicially paged out.
837 */
838void
839pmap_swapout_thread(struct thread *td)
840{
841}
842
843/*
844 * Bring the KSTACK for a specified thread back in.
845 */
846void
847pmap_swapin_thread(struct thread *td)
848{
849}
850
851/***************************************************
852 * Page table page management routines.....
853 ***************************************************/
854
855void
856pmap_pinit0(struct pmap *pmap)
857{
858	/* kernel_pmap is the same as any other pmap. */
859	pmap_pinit(pmap);
860}
861
862/*
863 * Initialize a preallocated and zeroed pmap structure,
864 * such as one in a vmspace structure.
865 */
866void
867pmap_pinit(struct pmap *pmap)
868{
869	int i;
870
871	pmap->pm_flags = 0;
872	for (i = 0; i < 5; i++)
873		pmap->pm_rid[i] = 0;
874	pmap->pm_ptphint = NULL;
875	pmap->pm_active = 0;
876	TAILQ_INIT(&pmap->pm_pvlist);
877	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
878}
879
880/*
881 * Wire in kernel global address entries.  To avoid a race condition
882 * between pmap initialization and pmap_growkernel, this procedure
883 * should be called after the vmspace is attached to the process
884 * but before this pmap is activated.
885 */
886void
887pmap_pinit2(struct pmap *pmap)
888{
889	int i;
890
891	for (i = 0; i < 5; i++)
892		pmap->pm_rid[i] = pmap_allocate_rid();
893}
894
895/***************************************************
896 * Pmap allocation/deallocation routines.
897 ***************************************************/
898
899/*
900 * Release any resources held by the given physical map.
901 * Called when a pmap initialized by pmap_pinit is being released.
902 * Should only be called if the map contains no valid mappings.
903 */
904void
905pmap_release(pmap_t pmap)
906{
907	int i;
908
909	for (i = 0; i < 5; i++)
910		if (pmap->pm_rid[i])
911			pmap_free_rid(pmap->pm_rid[i]);
912}
913
914/*
915 * grow the number of kernel page table entries, if needed
916 */
917void
918pmap_growkernel(vm_offset_t addr)
919{
920	struct ia64_lpte *ptepage;
921	vm_page_t nkpg;
922
923	if (kernel_vm_end >= addr)
924		return;
925
926	critical_enter();
927
928	while (kernel_vm_end < addr) {
929		/* We could handle more by increasing the size of kptdir. */
930		if (nkpt == MAXKPT)
931			panic("pmap_growkernel: out of kernel address space");
932
933		nkpg = vm_page_alloc(NULL, nkpt,
934		    VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
935		if (!nkpg)
936			panic("pmap_growkernel: no memory to grow kernel");
937
938		ptepage = (struct ia64_lpte *)
939		    IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
940		bzero(ptepage, PAGE_SIZE);
941		kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage;
942
943		nkpt++;
944		kernel_vm_end += PAGE_SIZE * NKPTEPG;
945	}
946
947	critical_exit();
948}
949
950/***************************************************
951 * page management routines.
952 ***************************************************/
953
954/*
955 * free the pv_entry back to the free list
956 */
957static PMAP_INLINE void
958free_pv_entry(pv_entry_t pv)
959{
960	pv_entry_count--;
961	uma_zfree(pvzone, pv);
962}
963
964/*
965 * get a new pv_entry, allocating a block from the system
966 * when needed.
967 * the memory allocation is performed bypassing the malloc code
968 * because of the possibility of allocations at interrupt time.
969 */
970static pv_entry_t
971get_pv_entry(void)
972{
973	pv_entry_count++;
974	if (pv_entry_high_water &&
975		(pv_entry_count > pv_entry_high_water) &&
976		(pmap_pagedaemon_waken == 0)) {
977		pmap_pagedaemon_waken = 1;
978		wakeup (&vm_pages_needed);
979	}
980	return uma_zalloc(pvzone, M_NOWAIT);
981}
982
983/*
984 * Add an ia64_lpte to the VHPT.
985 */
986static void
987pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
988{
989	struct ia64_lpte *vhpte;
990
991	pmap_vhpt_inserts++;
992	pmap_vhpt_resident++;
993
994	vhpte = (struct ia64_lpte *) ia64_thash(va);
995
996	if (vhpte->pte_chain)
997		pmap_vhpt_collisions++;
998
999	pte->pte_chain = vhpte->pte_chain;
1000	vhpte->pte_chain = ia64_tpa((vm_offset_t) pte);
1001
1002	if (!vhpte->pte_p && pte->pte_p)
1003		pmap_install_pte(vhpte, pte);
1004	else
1005		ia64_mf();
1006}
1007
1008/*
1009 * Update VHPT after a pte has changed.
1010 */
1011static void
1012pmap_update_vhpt(struct ia64_lpte *pte, vm_offset_t va)
1013{
1014	struct ia64_lpte *vhpte;
1015
1016	vhpte = (struct ia64_lpte *) ia64_thash(va);
1017
1018	if ((!vhpte->pte_p || vhpte->pte_tag == pte->pte_tag)
1019	    && pte->pte_p)
1020		pmap_install_pte(vhpte, pte);
1021}
1022
1023/*
1024 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
1025 * worked or an appropriate error code otherwise.
1026 */
1027static int
1028pmap_remove_vhpt(vm_offset_t va)
1029{
1030	struct ia64_lpte *pte;
1031	struct ia64_lpte *lpte;
1032	struct ia64_lpte *vhpte;
1033	u_int64_t tag;
1034	int error = ENOENT;
1035
1036	vhpte = (struct ia64_lpte *) ia64_thash(va);
1037
1038	/*
1039	 * If the VHPTE is invalid, there can't be a collision chain.
1040	 */
1041	if (!vhpte->pte_p) {
1042		KASSERT(!vhpte->pte_chain, ("bad vhpte"));
1043		printf("can't remove vhpt entry for 0x%lx\n", va);
1044		goto done;
1045	}
1046
1047	lpte = vhpte;
1048	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(vhpte->pte_chain);
1049	tag = ia64_ttag(va);
1050
1051	while (pte->pte_tag != tag) {
1052		lpte = pte;
1053		if (pte->pte_chain)
1054			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1055		else {
1056			printf("can't remove vhpt entry for 0x%lx\n", va);
1057			goto done;
1058		}
1059	}
1060
1061	/*
1062	 * Snip this pv_entry out of the collision chain.
1063	 */
1064	lpte->pte_chain = pte->pte_chain;
1065
1066	/*
1067	 * If the VHPTE matches as well, change it to map the first
1068	 * element from the chain if there is one.
1069	 */
1070	if (vhpte->pte_tag == tag) {
1071		if (vhpte->pte_chain) {
1072			pte = (struct ia64_lpte *)
1073				IA64_PHYS_TO_RR7(vhpte->pte_chain);
1074			pmap_install_pte(vhpte, pte);
1075		} else {
1076			vhpte->pte_p = 0;
1077			ia64_mf();
1078		}
1079	}
1080
1081	pmap_vhpt_resident--;
1082	error = 0;
1083 done:
1084	return error;
1085}
1086
1087/*
1088 * Find the ia64_lpte for the given va, if any.
1089 */
1090static struct ia64_lpte *
1091pmap_find_vhpt(vm_offset_t va)
1092{
1093	struct ia64_lpte *pte;
1094	u_int64_t tag;
1095
1096	pte = (struct ia64_lpte *) ia64_thash(va);
1097	if (!pte->pte_chain) {
1098		pte = 0;
1099		goto done;
1100	}
1101
1102	tag = ia64_ttag(va);
1103	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1104
1105	while (pte->pte_tag != tag) {
1106		if (pte->pte_chain) {
1107			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1108		} else {
1109			pte = 0;
1110			break;
1111		}
1112	}
1113
1114 done:
1115	return pte;
1116}
1117
1118/*
1119 * Remove an entry from the list of managed mappings.
1120 */
1121static int
1122pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1123{
1124	if (!pv) {
1125		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1126			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1127				if (pmap == pv->pv_pmap && va == pv->pv_va)
1128					break;
1129			}
1130		} else {
1131			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1132				if (va == pv->pv_va)
1133					break;
1134			}
1135		}
1136	}
1137
1138	if (pv) {
1139		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1140		m->md.pv_list_count--;
1141		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1142			vm_page_flag_clear(m, PG_WRITEABLE);
1143
1144		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1145		free_pv_entry(pv);
1146		return 0;
1147	} else {
1148		return ENOENT;
1149	}
1150}
1151
1152/*
1153 * Create a pv entry for page at pa for
1154 * (pmap, va).
1155 */
1156static void
1157pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1158{
1159	pv_entry_t pv;
1160
1161	pv = get_pv_entry();
1162	pv->pv_pmap = pmap;
1163	pv->pv_va = va;
1164
1165	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1166	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1167	m->md.pv_list_count++;
1168}
1169
1170/*
1171 *	Routine:	pmap_extract
1172 *	Function:
1173 *		Extract the physical page address associated
1174 *		with the given map/virtual_address pair.
1175 */
1176vm_offset_t
1177pmap_extract(pmap, va)
1178	register pmap_t pmap;
1179	vm_offset_t va;
1180{
1181	struct ia64_lpte *pte;
1182	pmap_t oldpmap;
1183
1184	if (!pmap)
1185		return 0;
1186
1187	oldpmap = pmap_install(pmap);
1188	pte = pmap_find_vhpt(va);
1189	pmap_install(oldpmap);
1190
1191	if (!pte)
1192		return 0;
1193
1194	return pmap_pte_pa(pte);
1195}
1196
1197/***************************************************
1198 * Low level mapping routines.....
1199 ***************************************************/
1200
1201/*
1202 * Find the kernel lpte for mapping the given virtual address, which
1203 * must be in the part of region 5 which we can cover with our kernel
1204 * 'page tables'.
1205 */
1206static struct ia64_lpte *
1207pmap_find_kpte(vm_offset_t va)
1208{
1209	KASSERT((va >> 61) == 5,
1210		("kernel mapping 0x%lx not in region 5", va));
1211	KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG),
1212		("kernel mapping 0x%lx out of range", va));
1213	return &kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)];
1214}
1215
1216/*
1217 * Find a pte suitable for mapping a user-space address. If one exists
1218 * in the VHPT, that one will be returned, otherwise a new pte is
1219 * allocated.
1220 */
1221static struct ia64_lpte *
1222pmap_find_pte(vm_offset_t va)
1223{
1224	struct ia64_lpte *pte;
1225
1226	if (va >= VM_MAXUSER_ADDRESS)
1227		return pmap_find_kpte(va);
1228
1229	pte = pmap_find_vhpt(va);
1230	if (!pte) {
1231		pte = uma_zalloc(ptezone, M_WAITOK);
1232		pte->pte_p = 0;
1233	}
1234	return pte;
1235}
1236
1237/*
1238 * Free a pte which is now unused. This simply returns it to the zone
1239 * allocator if it is a user mapping. For kernel mappings, clear the
1240 * valid bit to make it clear that the mapping is not currently used.
1241 */
1242static void
1243pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1244{
1245	if (va < VM_MAXUSER_ADDRESS)
1246		uma_zfree(ptezone, pte);
1247	else
1248		pte->pte_p = 0;
1249}
1250
1251/*
1252 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1253 * the pte was orginally valid, then its assumed to already be in the
1254 * VHPT.
1255 */
1256static void
1257pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1258	     int ig, int pl, int ar)
1259{
1260	int wasvalid = pte->pte_p;
1261
1262	pte->pte_p = 1;
1263	pte->pte_ma = PTE_MA_WB;
1264	if (ig & PTE_IG_MANAGED) {
1265		pte->pte_a = 0;
1266		pte->pte_d = 0;
1267	} else {
1268		pte->pte_a = 1;
1269		pte->pte_d = 1;
1270	}
1271	pte->pte_pl = pl;
1272	pte->pte_ar = ar;
1273	pte->pte_ppn = pa >> 12;
1274	pte->pte_ed = 0;
1275	pte->pte_ig = ig;
1276
1277	pte->pte_ps = PAGE_SHIFT;
1278	pte->pte_key = 0;
1279
1280	pte->pte_tag = ia64_ttag(va);
1281
1282	if (wasvalid) {
1283		pmap_update_vhpt(pte, va);
1284	} else {
1285		pmap_enter_vhpt(pte, va);
1286	}
1287}
1288
1289/*
1290 * If a pte contains a valid mapping, clear it and update the VHPT.
1291 */
1292static void
1293pmap_clear_pte(struct ia64_lpte *pte, vm_offset_t va)
1294{
1295	if (pte->pte_p) {
1296		pmap_remove_vhpt(va);
1297		ia64_ptc_g(va, PAGE_SHIFT << 2);
1298		pte->pte_p = 0;
1299	}
1300}
1301
1302/*
1303 * Remove the (possibly managed) mapping represented by pte from the
1304 * given pmap.
1305 */
1306static int
1307pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1308		pv_entry_t pv, int freepte)
1309{
1310	int error;
1311	vm_page_t m;
1312
1313	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1314		("removing pte for non-current pmap"));
1315
1316	/*
1317	 * First remove from the VHPT.
1318	 */
1319	error = pmap_remove_vhpt(va);
1320	if (error)
1321		return error;
1322
1323	/*
1324	 * Make sure pmap_set_pte() knows it isn't in the VHPT.
1325	 */
1326	pte->pte_p = 0;
1327
1328	if (pte->pte_ig & PTE_IG_WIRED)
1329		pmap->pm_stats.wired_count -= 1;
1330
1331	pmap->pm_stats.resident_count -= 1;
1332	if (pte->pte_ig & PTE_IG_MANAGED) {
1333		m = PHYS_TO_VM_PAGE(pmap_pte_pa(pte));
1334		if (pte->pte_d)
1335			if (pmap_track_modified(va))
1336				vm_page_dirty(m);
1337		if (pte->pte_a)
1338			vm_page_flag_set(m, PG_REFERENCED);
1339
1340		if (freepte)
1341			pmap_free_pte(pte, va);
1342		return pmap_remove_entry(pmap, m, va, pv);
1343	} else {
1344		if (freepte)
1345			pmap_free_pte(pte, va);
1346		return 0;
1347	}
1348}
1349
1350/*
1351 * Extract the physical page address associated with a kernel
1352 * virtual address.
1353 */
1354vm_paddr_t
1355pmap_kextract(vm_offset_t va)
1356{
1357	struct ia64_lpte *pte;
1358
1359	KASSERT(va >= IA64_RR_BASE(5), ("Must be kernel VA"));
1360
1361	/* Regions 6 and 7 are direct mapped. */
1362	if (va >= IA64_RR_BASE(6))
1363		return (IA64_RR_MASK(va));
1364
1365	/* Bail out if the virtual address is beyond our limits. */
1366	if (IA64_RR_MASK(va) >= nkpt * PAGE_SIZE * NKPTEPG)
1367		return (0);
1368
1369	pte = pmap_find_kpte(va);
1370	if (!pte->pte_p)
1371		return (0);
1372	return ((pte->pte_ppn << 12) | (va & PAGE_MASK));
1373}
1374
1375/*
1376 * Add a list of wired pages to the kva
1377 * this routine is only used for temporary
1378 * kernel mappings that do not need to have
1379 * page modification or references recorded.
1380 * Note that old mappings are simply written
1381 * over.  The page *must* be wired.
1382 */
1383void
1384pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1385{
1386	int i;
1387	struct ia64_lpte *pte;
1388
1389	for (i = 0; i < count; i++) {
1390		vm_offset_t tva = va + i * PAGE_SIZE;
1391		int wasvalid;
1392		pte = pmap_find_kpte(tva);
1393		wasvalid = pte->pte_p;
1394		pmap_set_pte(pte, tva, VM_PAGE_TO_PHYS(m[i]),
1395			     0, PTE_PL_KERN, PTE_AR_RWX);
1396		if (wasvalid)
1397			ia64_ptc_g(tva, PAGE_SHIFT << 2);
1398	}
1399}
1400
1401/*
1402 * this routine jerks page mappings from the
1403 * kernel -- it is meant only for temporary mappings.
1404 */
1405void
1406pmap_qremove(vm_offset_t va, int count)
1407{
1408	int i;
1409	struct ia64_lpte *pte;
1410
1411	for (i = 0; i < count; i++) {
1412		pte = pmap_find_kpte(va);
1413		pmap_clear_pte(pte, va);
1414		va += PAGE_SIZE;
1415	}
1416}
1417
1418/*
1419 * Add a wired page to the kva.
1420 */
1421void
1422pmap_kenter(vm_offset_t va, vm_offset_t pa)
1423{
1424	struct ia64_lpte *pte;
1425	int wasvalid;
1426
1427	pte = pmap_find_kpte(va);
1428	wasvalid = pte->pte_p;
1429	pmap_set_pte(pte, va, pa, 0, PTE_PL_KERN, PTE_AR_RWX);
1430	if (wasvalid)
1431		ia64_ptc_g(va, PAGE_SHIFT << 2);
1432}
1433
1434/*
1435 * Remove a page from the kva
1436 */
1437void
1438pmap_kremove(vm_offset_t va)
1439{
1440	struct ia64_lpte *pte;
1441
1442	pte = pmap_find_kpte(va);
1443	pmap_clear_pte(pte, va);
1444}
1445
1446/*
1447 *	Used to map a range of physical addresses into kernel
1448 *	virtual address space.
1449 *
1450 *	The value passed in '*virt' is a suggested virtual address for
1451 *	the mapping. Architectures which can support a direct-mapped
1452 *	physical to virtual region can return the appropriate address
1453 *	within that region, leaving '*virt' unchanged. Other
1454 *	architectures should map the pages starting at '*virt' and
1455 *	update '*virt' with the first usable address after the mapped
1456 *	region.
1457 */
1458vm_offset_t
1459pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1460{
1461	return IA64_PHYS_TO_RR7(start);
1462}
1463
1464/*
1465 * Remove a single page from a process address space
1466 */
1467static void
1468pmap_remove_page(pmap_t pmap, vm_offset_t va)
1469{
1470	struct ia64_lpte *pte;
1471
1472	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1473		("removing page for non-current pmap"));
1474
1475	pte = pmap_find_vhpt(va);
1476	if (pte) {
1477		pmap_remove_pte(pmap, pte, va, 0, 1);
1478		pmap_invalidate_page(pmap, va);
1479	}
1480	return;
1481}
1482
1483/*
1484 *	Remove the given range of addresses from the specified map.
1485 *
1486 *	It is assumed that the start and end are properly
1487 *	rounded to the page size.
1488 */
1489void
1490pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1491{
1492	pmap_t oldpmap;
1493	vm_offset_t va;
1494	pv_entry_t pv;
1495	struct ia64_lpte *pte;
1496
1497	if (pmap == NULL)
1498		return;
1499
1500	if (pmap->pm_stats.resident_count == 0)
1501		return;
1502
1503	oldpmap = pmap_install(pmap);
1504
1505	/*
1506	 * special handling of removing one page.  a very
1507	 * common operation and easy to short circuit some
1508	 * code.
1509	 */
1510	if (sva + PAGE_SIZE == eva) {
1511		pmap_remove_page(pmap, sva);
1512		pmap_install(oldpmap);
1513		return;
1514	}
1515
1516	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1517		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1518			va = pv->pv_va;
1519			if (va >= sva && va < eva) {
1520				pte = pmap_find_vhpt(va);
1521				pmap_remove_pte(pmap, pte, va, pv, 1);
1522				pmap_invalidate_page(pmap, va);
1523			}
1524		}
1525
1526	} else {
1527		for (va = sva; va < eva; va = va += PAGE_SIZE) {
1528			pte = pmap_find_vhpt(va);
1529			if (pte) {
1530				pmap_remove_pte(pmap, pte, va, 0, 1);
1531				pmap_invalidate_page(pmap, va);
1532			}
1533		}
1534	}
1535
1536	pmap_install(oldpmap);
1537}
1538
1539/*
1540 *	Routine:	pmap_remove_all
1541 *	Function:
1542 *		Removes this physical page from
1543 *		all physical maps in which it resides.
1544 *		Reflects back modify bits to the pager.
1545 *
1546 *	Notes:
1547 *		Original versions of this routine were very
1548 *		inefficient because they iteratively called
1549 *		pmap_remove (slow...)
1550 */
1551
1552void
1553pmap_remove_all(vm_page_t m)
1554{
1555	pmap_t oldpmap;
1556	pv_entry_t pv;
1557	int s;
1558
1559#if defined(PMAP_DIAGNOSTIC)
1560	/*
1561	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1562	 * pages!
1563	 */
1564	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1565		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1566	}
1567#endif
1568
1569	s = splvm();
1570
1571	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1572		struct ia64_lpte *pte;
1573		pmap_t pmap = pv->pv_pmap;
1574		vm_offset_t va = pv->pv_va;
1575
1576		oldpmap = pmap_install(pmap);
1577		pte = pmap_find_vhpt(va);
1578		if (pmap_pte_pa(pte) != VM_PAGE_TO_PHYS(m))
1579			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1580		pmap_remove_pte(pmap, pte, va, pv, 1);
1581		pmap_invalidate_page(pmap, va);
1582		pmap_install(oldpmap);
1583	}
1584
1585	vm_page_flag_clear(m, PG_WRITEABLE);
1586
1587	splx(s);
1588	return;
1589}
1590
1591/*
1592 *	Set the physical protection on the
1593 *	specified range of this map as requested.
1594 */
1595void
1596pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1597{
1598	pmap_t oldpmap;
1599	struct ia64_lpte *pte;
1600	int newprot;
1601
1602	if (pmap == NULL)
1603		return;
1604
1605	oldpmap = pmap_install(pmap);
1606
1607	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1608		pmap_remove(pmap, sva, eva);
1609		pmap_install(oldpmap);
1610		return;
1611	}
1612
1613	if (prot & VM_PROT_WRITE) {
1614		pmap_install(oldpmap);
1615		return;
1616	}
1617
1618	newprot = pte_prot(pmap, prot);
1619
1620	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1621		panic("pmap_protect: unaligned addresses");
1622
1623	while (sva < eva) {
1624		/*
1625		 * If page is invalid, skip this page
1626		 */
1627		pte = pmap_find_vhpt(sva);
1628		if (!pte) {
1629			sva += PAGE_SIZE;
1630			continue;
1631		}
1632
1633		if (pmap_pte_prot(pte) != newprot) {
1634			if (pte->pte_ig & PTE_IG_MANAGED) {
1635				vm_offset_t pa = pmap_pte_pa(pte);
1636				vm_page_t m = PHYS_TO_VM_PAGE(pa);
1637				if (pte->pte_d) {
1638					if (pmap_track_modified(sva))
1639						vm_page_dirty(m);
1640					pte->pte_d = 0;
1641				}
1642				if (pte->pte_a) {
1643					vm_page_flag_set(m, PG_REFERENCED);
1644					pte->pte_a = 0;
1645				}
1646			}
1647			pmap_pte_set_prot(pte, newprot);
1648			pmap_update_vhpt(pte, sva);
1649			pmap_invalidate_page(pmap, sva);
1650		}
1651
1652		sva += PAGE_SIZE;
1653	}
1654	pmap_install(oldpmap);
1655}
1656
1657/*
1658 *	Insert the given physical page (p) at
1659 *	the specified virtual address (v) in the
1660 *	target physical map with the protection requested.
1661 *
1662 *	If specified, the page will be wired down, meaning
1663 *	that the related pte can not be reclaimed.
1664 *
1665 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1666 *	or lose information.  That is, this routine must actually
1667 *	insert this page into the given map NOW.
1668 */
1669void
1670pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1671	   boolean_t wired)
1672{
1673	pmap_t oldpmap;
1674	vm_offset_t pa;
1675	vm_offset_t opa;
1676	struct ia64_lpte origpte;
1677	struct ia64_lpte *pte;
1678	int managed;
1679
1680	if (pmap == NULL)
1681		return;
1682
1683	oldpmap = pmap_install(pmap);
1684
1685	va &= ~PAGE_MASK;
1686#ifdef PMAP_DIAGNOSTIC
1687	if (va > VM_MAX_KERNEL_ADDRESS)
1688		panic("pmap_enter: toobig");
1689#endif
1690
1691	/*
1692	 * Find (or create) a pte for the given mapping.
1693	 */
1694	pte = pmap_find_pte(va);
1695	origpte = *pte;
1696
1697	if (origpte.pte_p)
1698		opa = pmap_pte_pa(&origpte);
1699	else
1700		opa = 0;
1701	managed = 0;
1702
1703	pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK;
1704
1705	/*
1706	 * Mapping has not changed, must be protection or wiring change.
1707	 */
1708	if (origpte.pte_p && (opa == pa)) {
1709		/*
1710		 * Wiring change, just update stats. We don't worry about
1711		 * wiring PT pages as they remain resident as long as there
1712		 * are valid mappings in them. Hence, if a user page is wired,
1713		 * the PT page will be also.
1714		 */
1715		if (wired && ((origpte.pte_ig & PTE_IG_WIRED) == 0))
1716			pmap->pm_stats.wired_count++;
1717		else if (!wired && (origpte.pte_ig & PTE_IG_WIRED))
1718			pmap->pm_stats.wired_count--;
1719
1720		/*
1721		 * We might be turning off write access to the page,
1722		 * so we go ahead and sense modify status.
1723		 */
1724		if (origpte.pte_ig & PTE_IG_MANAGED) {
1725			if (origpte.pte_d && pmap_track_modified(va)) {
1726				vm_page_t om;
1727				om = PHYS_TO_VM_PAGE(opa);
1728				vm_page_dirty(om);
1729			}
1730		}
1731
1732		managed = origpte.pte_ig & PTE_IG_MANAGED;
1733		goto validate;
1734	}
1735	/*
1736	 * Mapping has changed, invalidate old range and fall
1737	 * through to handle validating new mapping.
1738	 */
1739	if (opa) {
1740		int error;
1741		vm_page_lock_queues();
1742		error = pmap_remove_pte(pmap, pte, va, 0, 0);
1743		vm_page_unlock_queues();
1744		if (error)
1745			panic("pmap_enter: pte vanished, va: 0x%lx", va);
1746	}
1747
1748	/*
1749	 * Enter on the PV list if part of our managed memory.
1750	 */
1751	if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
1752		pmap_insert_entry(pmap, va, m);
1753		managed |= PTE_IG_MANAGED;
1754	}
1755
1756	/*
1757	 * Increment counters
1758	 */
1759	pmap->pm_stats.resident_count++;
1760	if (wired)
1761		pmap->pm_stats.wired_count++;
1762
1763validate:
1764
1765	/*
1766	 * Now validate mapping with desired protection/wiring. This
1767	 * adds the pte to the VHPT if necessary.
1768	 */
1769	pmap_set_pte(pte, va, pa, managed | (wired ? PTE_IG_WIRED : 0),
1770		     pte_prot_pl(pmap, prot), pte_prot_ar(pmap, prot));
1771
1772	/*
1773	 * if the mapping or permission bits are different, we need
1774	 * to invalidate the page.
1775	 */
1776	if (!pmap_equal_pte(&origpte, pte))
1777		pmap_invalidate_page(pmap, va);
1778
1779	pmap_install(oldpmap);
1780}
1781
1782/*
1783 * this code makes some *MAJOR* assumptions:
1784 * 1. Current pmap & pmap exists.
1785 * 2. Not wired.
1786 * 3. Read access.
1787 * 4. No page table pages.
1788 * 5. Tlbflush is deferred to calling procedure.
1789 * 6. Page IS managed.
1790 * but is *MUCH* faster than pmap_enter...
1791 */
1792
1793static void
1794pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
1795{
1796	struct ia64_lpte *pte;
1797	pmap_t oldpmap;
1798
1799	oldpmap = pmap_install(pmap);
1800
1801	pte = pmap_find_pte(va);
1802	if (pte->pte_p)
1803		return;
1804
1805	/*
1806	 * Enter on the PV list since its part of our managed memory.
1807	 */
1808	pmap_insert_entry(pmap, va, m);
1809
1810	/*
1811	 * Increment counters
1812	 */
1813	pmap->pm_stats.resident_count++;
1814
1815	/*
1816	 * Initialise PTE with read-only protection and enter into VHPT.
1817	 */
1818	pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m),
1819		     PTE_IG_MANAGED,
1820		     PTE_PL_USER, PTE_AR_R);
1821
1822	pmap_install(oldpmap);
1823}
1824
1825/*
1826 * Make temporary mapping for a physical address. This is called
1827 * during dump.
1828 */
1829void *
1830pmap_kenter_temporary(vm_offset_t pa, int i)
1831{
1832	return (void *) IA64_PHYS_TO_RR7(pa - (i * PAGE_SIZE));
1833}
1834
1835#define MAX_INIT_PT (96)
1836/*
1837 * pmap_object_init_pt preloads the ptes for a given object
1838 * into the specified pmap.  This eliminates the blast of soft
1839 * faults on process startup and immediately after an mmap.
1840 */
1841void
1842pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1843		    vm_object_t object, vm_pindex_t pindex,
1844		    vm_size_t size, int limit)
1845{
1846	pmap_t oldpmap;
1847	vm_offset_t tmpidx;
1848	int psize;
1849	vm_page_t p;
1850	int objpgs;
1851
1852	if (pmap == NULL || object == NULL)
1853		return;
1854
1855	oldpmap = pmap_install(pmap);
1856
1857	psize = ia64_btop(size);
1858
1859	if ((object->type != OBJT_VNODE) ||
1860		((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
1861			(object->resident_page_count > MAX_INIT_PT))) {
1862		pmap_install(oldpmap);
1863		return;
1864	}
1865
1866	if (psize + pindex > object->size) {
1867		if (object->size < pindex)
1868			return;
1869		psize = object->size - pindex;
1870	}
1871
1872	/*
1873	 * if we are processing a major portion of the object, then scan the
1874	 * entire thing.
1875	 */
1876	if (psize > (object->resident_page_count >> 2)) {
1877		objpgs = psize;
1878
1879		for (p = TAILQ_FIRST(&object->memq);
1880		    ((objpgs > 0) && (p != NULL));
1881		    p = TAILQ_NEXT(p, listq)) {
1882
1883			tmpidx = p->pindex;
1884			if (tmpidx < pindex) {
1885				continue;
1886			}
1887			tmpidx -= pindex;
1888			if (tmpidx >= psize) {
1889				continue;
1890			}
1891			/*
1892			 * don't allow an madvise to blow away our really
1893			 * free pages allocating pv entries.
1894			 */
1895			if ((limit & MAP_PREFAULT_MADVISE) &&
1896			    cnt.v_free_count < cnt.v_free_reserved) {
1897				break;
1898			}
1899			vm_page_lock_queues();
1900			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1901				(p->busy == 0) &&
1902			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1903				if ((p->queue - p->pc) == PQ_CACHE)
1904					vm_page_deactivate(p);
1905				vm_page_busy(p);
1906				vm_page_unlock_queues();
1907				pmap_enter_quick(pmap,
1908						 addr + ia64_ptob(tmpidx), p);
1909				vm_page_lock_queues();
1910				vm_page_wakeup(p);
1911			}
1912			vm_page_unlock_queues();
1913			objpgs -= 1;
1914		}
1915	} else {
1916		/*
1917		 * else lookup the pages one-by-one.
1918		 */
1919		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
1920			/*
1921			 * don't allow an madvise to blow away our really
1922			 * free pages allocating pv entries.
1923			 */
1924			if ((limit & MAP_PREFAULT_MADVISE) &&
1925			    cnt.v_free_count < cnt.v_free_reserved) {
1926				break;
1927			}
1928			p = vm_page_lookup(object, tmpidx + pindex);
1929			if (p == NULL)
1930				continue;
1931			vm_page_lock_queues();
1932			if ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL &&
1933				(p->busy == 0) &&
1934			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1935				if ((p->queue - p->pc) == PQ_CACHE)
1936					vm_page_deactivate(p);
1937				vm_page_busy(p);
1938				vm_page_unlock_queues();
1939				pmap_enter_quick(pmap,
1940						 addr + ia64_ptob(tmpidx), p);
1941				vm_page_lock_queues();
1942				vm_page_wakeup(p);
1943			}
1944			vm_page_unlock_queues();
1945		}
1946	}
1947	pmap_install(oldpmap);
1948	return;
1949}
1950
1951/*
1952 * pmap_prefault provides a quick way of clustering
1953 * pagefaults into a processes address space.  It is a "cousin"
1954 * of pmap_object_init_pt, except it runs at page fault time instead
1955 * of mmap time.
1956 */
1957#define PFBAK 4
1958#define PFFOR 4
1959#define PAGEORDER_SIZE (PFBAK+PFFOR)
1960
1961static int pmap_prefault_pageorder[] = {
1962	-1 * PAGE_SIZE, 1 * PAGE_SIZE,
1963	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
1964	-3 * PAGE_SIZE, 3 * PAGE_SIZE,
1965	-4 * PAGE_SIZE, 4 * PAGE_SIZE
1966};
1967
1968void
1969pmap_prefault(pmap, addra, entry)
1970	pmap_t pmap;
1971	vm_offset_t addra;
1972	vm_map_entry_t entry;
1973{
1974	int i;
1975	vm_offset_t starta;
1976	vm_offset_t addr;
1977	vm_pindex_t pindex;
1978	vm_page_t m, mpte;
1979	vm_object_t object;
1980
1981	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)))
1982		return;
1983
1984	object = entry->object.vm_object;
1985
1986	starta = addra - PFBAK * PAGE_SIZE;
1987	if (starta < entry->start) {
1988		starta = entry->start;
1989	} else if (starta > addra) {
1990		starta = 0;
1991	}
1992
1993	mpte = NULL;
1994	for (i = 0; i < PAGEORDER_SIZE; i++) {
1995		vm_object_t lobject;
1996		struct ia64_lpte *pte;
1997
1998		addr = addra + pmap_prefault_pageorder[i];
1999		if (addr > addra + (PFFOR * PAGE_SIZE))
2000			addr = 0;
2001
2002		if (addr < starta || addr >= entry->end)
2003			continue;
2004
2005		pte = pmap_find_vhpt(addr);
2006		if (pte && pte->pte_p)
2007			continue;
2008
2009		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
2010		lobject = object;
2011		for (m = vm_page_lookup(lobject, pindex);
2012		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
2013		    lobject = lobject->backing_object) {
2014			if (lobject->backing_object_offset & PAGE_MASK)
2015				break;
2016			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
2017			m = vm_page_lookup(lobject->backing_object, pindex);
2018		}
2019
2020		/*
2021		 * give-up when a page is not in memory
2022		 */
2023		if (m == NULL)
2024			break;
2025		vm_page_lock_queues();
2026		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2027			(m->busy == 0) &&
2028		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2029
2030			if ((m->queue - m->pc) == PQ_CACHE) {
2031				vm_page_deactivate(m);
2032			}
2033			vm_page_busy(m);
2034			vm_page_unlock_queues();
2035			pmap_enter_quick(pmap, addr, m);
2036			vm_page_lock_queues();
2037			vm_page_wakeup(m);
2038		}
2039		vm_page_unlock_queues();
2040	}
2041}
2042
2043/*
2044 *	Routine:	pmap_change_wiring
2045 *	Function:	Change the wiring attribute for a map/virtual-address
2046 *			pair.
2047 *	In/out conditions:
2048 *			The mapping must already exist in the pmap.
2049 */
2050void
2051pmap_change_wiring(pmap, va, wired)
2052	register pmap_t pmap;
2053	vm_offset_t va;
2054	boolean_t wired;
2055{
2056	pmap_t oldpmap;
2057	struct ia64_lpte *pte;
2058
2059	if (pmap == NULL)
2060		return;
2061
2062	oldpmap = pmap_install(pmap);
2063
2064	pte = pmap_find_vhpt(va);
2065
2066	if (wired && !pmap_pte_w(pte))
2067		pmap->pm_stats.wired_count++;
2068	else if (!wired && pmap_pte_w(pte))
2069		pmap->pm_stats.wired_count--;
2070
2071	/*
2072	 * Wiring is not a hardware characteristic so there is no need to
2073	 * invalidate TLB.
2074	 */
2075	pmap_pte_set_w(pte, wired);
2076
2077	pmap_install(oldpmap);
2078}
2079
2080
2081
2082/*
2083 *	Copy the range specified by src_addr/len
2084 *	from the source map to the range dst_addr/len
2085 *	in the destination map.
2086 *
2087 *	This routine is only advisory and need not do anything.
2088 */
2089
2090void
2091pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2092	  vm_offset_t src_addr)
2093{
2094}
2095
2096
2097/*
2098 *	pmap_zero_page zeros the specified hardware page by
2099 *	mapping it into virtual memory and using bzero to clear
2100 *	its contents.
2101 */
2102
2103void
2104pmap_zero_page(vm_page_t m)
2105{
2106	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2107	bzero((caddr_t) va, PAGE_SIZE);
2108}
2109
2110
2111/*
2112 *	pmap_zero_page_area zeros the specified hardware page by
2113 *	mapping it into virtual memory and using bzero to clear
2114 *	its contents.
2115 *
2116 *	off and size must reside within a single page.
2117 */
2118
2119void
2120pmap_zero_page_area(vm_page_t m, int off, int size)
2121{
2122	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2123	bzero((char *)(caddr_t)va + off, size);
2124}
2125
2126
2127/*
2128 *	pmap_zero_page_idle zeros the specified hardware page by
2129 *	mapping it into virtual memory and using bzero to clear
2130 *	its contents.  This is for the vm_idlezero process.
2131 */
2132
2133void
2134pmap_zero_page_idle(vm_page_t m)
2135{
2136	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2137	bzero((caddr_t) va, PAGE_SIZE);
2138}
2139
2140
2141/*
2142 *	pmap_copy_page copies the specified (machine independent)
2143 *	page by mapping the page into virtual memory and using
2144 *	bcopy to copy the page, one machine dependent page at a
2145 *	time.
2146 */
2147void
2148pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2149{
2150	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
2151	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
2152	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
2153}
2154
2155/*
2156 * Returns true if the pmap's pv is one of the first
2157 * 16 pvs linked to from this page.  This count may
2158 * be changed upwards or downwards in the future; it
2159 * is only necessary that true be returned for a small
2160 * subset of pmaps for proper page aging.
2161 */
2162boolean_t
2163pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2164{
2165	pv_entry_t pv;
2166	int loops = 0;
2167	int s;
2168
2169	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2170		return FALSE;
2171
2172	s = splvm();
2173
2174	/*
2175	 * Not found, check current mappings returning immediately if found.
2176	 */
2177	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2178		if (pv->pv_pmap == pmap) {
2179			splx(s);
2180			return TRUE;
2181		}
2182		loops++;
2183		if (loops >= 16)
2184			break;
2185	}
2186	splx(s);
2187	return (FALSE);
2188}
2189
2190#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2191/*
2192 * Remove all pages from specified address space
2193 * this aids process exit speeds.  Also, this code
2194 * is special cased for current process only, but
2195 * can have the more generic (and slightly slower)
2196 * mode enabled.  This is much faster than pmap_remove
2197 * in the case of running down an entire address space.
2198 */
2199void
2200pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2201{
2202	pv_entry_t pv, npv;
2203	int s;
2204
2205#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2206	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
2207		printf("warning: pmap_remove_pages called with non-current pmap\n");
2208		return;
2209	}
2210#endif
2211
2212	s = splvm();
2213	for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
2214		pv;
2215		pv = npv) {
2216		struct ia64_lpte *pte;
2217
2218		npv = TAILQ_NEXT(pv, pv_plist);
2219
2220		if (pv->pv_va >= eva || pv->pv_va < sva) {
2221			continue;
2222		}
2223
2224		pte = pmap_find_vhpt(pv->pv_va);
2225		if (!pte)
2226			panic("pmap_remove_pages: page on pm_pvlist has no pte\n");
2227
2228
2229/*
2230 * We cannot remove wired pages from a process' mapping at this time
2231 */
2232		if (pte->pte_ig & PTE_IG_WIRED) {
2233			continue;
2234		}
2235
2236		pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
2237	}
2238	splx(s);
2239
2240	pmap_invalidate_all(pmap);
2241}
2242
2243/*
2244 *      pmap_page_protect:
2245 *
2246 *      Lower the permission for all mappings to a given page.
2247 */
2248void
2249pmap_page_protect(vm_page_t m, vm_prot_t prot)
2250{
2251	pv_entry_t pv;
2252
2253	if ((prot & VM_PROT_WRITE) != 0)
2254		return;
2255	if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
2256		if ((m->flags & PG_WRITEABLE) == 0)
2257			return;
2258		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2259			int newprot = pte_prot(pv->pv_pmap, prot);
2260			pmap_t oldpmap = pmap_install(pv->pv_pmap);
2261			struct ia64_lpte *pte;
2262			pte = pmap_find_vhpt(pv->pv_va);
2263			pmap_pte_set_prot(pte, newprot);
2264			pmap_update_vhpt(pte, pv->pv_va);
2265			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2266			pmap_install(oldpmap);
2267		}
2268		vm_page_flag_clear(m, PG_WRITEABLE);
2269	} else {
2270		pmap_remove_all(m);
2271	}
2272}
2273
2274/*
2275 *	pmap_ts_referenced:
2276 *
2277 *	Return a count of reference bits for a page, clearing those bits.
2278 *	It is not necessary for every reference bit to be cleared, but it
2279 *	is necessary that 0 only be returned when there are truly no
2280 *	reference bits set.
2281 *
2282 *	XXX: The exact number of bits to check and clear is a matter that
2283 *	should be tested and standardized at some point in the future for
2284 *	optimal aging of shared pages.
2285 */
2286int
2287pmap_ts_referenced(vm_page_t m)
2288{
2289	pv_entry_t pv;
2290	int count = 0;
2291
2292	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2293		return 0;
2294
2295	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2296		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2297		struct ia64_lpte *pte;
2298		pte = pmap_find_vhpt(pv->pv_va);
2299		if (pte->pte_a) {
2300			count++;
2301			pte->pte_a = 0;
2302			pmap_update_vhpt(pte, pv->pv_va);
2303			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2304		}
2305		pmap_install(oldpmap);
2306	}
2307
2308	return count;
2309}
2310
2311#if 0
2312/*
2313 *	pmap_is_referenced:
2314 *
2315 *	Return whether or not the specified physical page was referenced
2316 *	in any physical maps.
2317 */
2318static boolean_t
2319pmap_is_referenced(vm_page_t m)
2320{
2321	pv_entry_t pv;
2322
2323	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2324		return FALSE;
2325
2326	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2327		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2328		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2329		pmap_install(oldpmap);
2330		if (pte->pte_a)
2331			return 1;
2332	}
2333
2334	return 0;
2335}
2336#endif
2337
2338/*
2339 *	pmap_is_modified:
2340 *
2341 *	Return whether or not the specified physical page was modified
2342 *	in any physical maps.
2343 */
2344boolean_t
2345pmap_is_modified(vm_page_t m)
2346{
2347	pv_entry_t pv;
2348
2349	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2350		return FALSE;
2351
2352	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2353		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2354		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2355		pmap_install(oldpmap);
2356		if (pte->pte_d)
2357			return 1;
2358	}
2359
2360	return 0;
2361}
2362
2363/*
2364 *	Clear the modify bits on the specified physical page.
2365 */
2366void
2367pmap_clear_modify(vm_page_t m)
2368{
2369	pv_entry_t pv;
2370
2371	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2372		return;
2373
2374	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2375		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2376		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2377		if (pte->pte_d) {
2378			pte->pte_d = 0;
2379			pmap_update_vhpt(pte, pv->pv_va);
2380			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2381		}
2382		pmap_install(oldpmap);
2383	}
2384}
2385
2386/*
2387 *	pmap_clear_reference:
2388 *
2389 *	Clear the reference bit on the specified physical page.
2390 */
2391void
2392pmap_clear_reference(vm_page_t m)
2393{
2394	pv_entry_t pv;
2395
2396	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2397		return;
2398
2399	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2400		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2401		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2402		if (pte->pte_a) {
2403			pte->pte_a = 0;
2404			pmap_update_vhpt(pte, pv->pv_va);
2405			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2406		}
2407		pmap_install(oldpmap);
2408	}
2409}
2410
2411/*
2412 * Miscellaneous support routines follow
2413 */
2414
2415static void
2416ia64_protection_init()
2417{
2418	int prot, *kp, *up;
2419
2420	kp = protection_codes[0];
2421	up = protection_codes[1];
2422
2423	for (prot = 0; prot < 8; prot++) {
2424		switch (prot) {
2425		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
2426			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2427			*up++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2428			break;
2429
2430		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
2431			*kp++ = (PTE_AR_X_RX << 2) | PTE_PL_KERN;
2432			*up++ = (PTE_AR_X_RX << 2) | PTE_PL_USER;
2433			break;
2434
2435		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
2436			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2437			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2438			break;
2439
2440		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
2441			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2442			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2443			break;
2444
2445		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
2446			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2447			*up++ = (PTE_AR_R << 2) | PTE_PL_USER;
2448			break;
2449
2450		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
2451			*kp++ = (PTE_AR_RX << 2) | PTE_PL_KERN;
2452			*up++ = (PTE_AR_RX << 2) | PTE_PL_USER;
2453			break;
2454
2455		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
2456			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2457			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2458			break;
2459
2460		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
2461			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2462			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2463			break;
2464		}
2465	}
2466}
2467
2468/*
2469 * Map a set of physical memory pages into the kernel virtual
2470 * address space. Return a pointer to where it is mapped. This
2471 * routine is intended to be used for mapping device memory,
2472 * NOT real memory.
2473 */
2474void *
2475pmap_mapdev(vm_offset_t pa, vm_size_t size)
2476{
2477	return (void*) IA64_PHYS_TO_RR6(pa);
2478}
2479
2480/*
2481 * 'Unmap' a range mapped by pmap_mapdev().
2482 */
2483void
2484pmap_unmapdev(vm_offset_t va, vm_size_t size)
2485{
2486	return;
2487}
2488
2489/*
2490 * perform the pmap work for mincore
2491 */
2492int
2493pmap_mincore(pmap_t pmap, vm_offset_t addr)
2494{
2495	pmap_t oldpmap;
2496	struct ia64_lpte *pte;
2497	int val = 0;
2498
2499	oldpmap = pmap_install(pmap);
2500	pte = pmap_find_vhpt(addr);
2501	pmap_install(oldpmap);
2502
2503	if (!pte)
2504		return 0;
2505
2506	if (pmap_pte_v(pte)) {
2507		vm_page_t m;
2508		vm_offset_t pa;
2509
2510		val = MINCORE_INCORE;
2511		if ((pte->pte_ig & PTE_IG_MANAGED) == 0)
2512			return val;
2513
2514		pa = pmap_pte_pa(pte);
2515
2516		m = PHYS_TO_VM_PAGE(pa);
2517
2518		/*
2519		 * Modified by us
2520		 */
2521		if (pte->pte_d)
2522			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2523		/*
2524		 * Modified by someone
2525		 */
2526		else if (pmap_is_modified(m))
2527			val |= MINCORE_MODIFIED_OTHER;
2528		/*
2529		 * Referenced by us
2530		 */
2531		if (pte->pte_a)
2532			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2533
2534		/*
2535		 * Referenced by someone
2536		 */
2537		else if (pmap_ts_referenced(m)) {
2538			val |= MINCORE_REFERENCED_OTHER;
2539			vm_page_flag_set(m, PG_REFERENCED);
2540		}
2541	}
2542	return val;
2543}
2544
2545void
2546pmap_activate(struct thread *td)
2547{
2548	pmap_install(vmspace_pmap(td->td_proc->p_vmspace));
2549}
2550
2551pmap_t
2552pmap_install(pmap_t pmap)
2553{
2554	pmap_t oldpmap;
2555	int i;
2556
2557	critical_enter();
2558
2559	oldpmap = PCPU_GET(current_pmap);
2560
2561	if (pmap == oldpmap || pmap == kernel_pmap) {
2562		critical_exit();
2563		return pmap;
2564	}
2565
2566	if (oldpmap) {
2567		atomic_clear_32(&pmap->pm_active, PCPU_GET(cpumask));
2568	}
2569
2570	PCPU_SET(current_pmap, pmap);
2571	if (!pmap) {
2572		/*
2573		 * RIDs 0..4 have no mappings to make sure we generate
2574		 * page faults on accesses.
2575		 */
2576		ia64_set_rr(IA64_RR_BASE(0), (0 << 8)|(PAGE_SHIFT << 2)|1);
2577		ia64_set_rr(IA64_RR_BASE(1), (1 << 8)|(PAGE_SHIFT << 2)|1);
2578		ia64_set_rr(IA64_RR_BASE(2), (2 << 8)|(PAGE_SHIFT << 2)|1);
2579		ia64_set_rr(IA64_RR_BASE(3), (3 << 8)|(PAGE_SHIFT << 2)|1);
2580		ia64_set_rr(IA64_RR_BASE(4), (4 << 8)|(PAGE_SHIFT << 2)|1);
2581		critical_exit();
2582		return oldpmap;
2583	}
2584
2585	atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask));
2586
2587	for (i = 0; i < 5; i++)
2588		ia64_set_rr(IA64_RR_BASE(i),
2589			    (pmap->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2590
2591	critical_exit();
2592	return oldpmap;
2593}
2594
2595vm_offset_t
2596pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2597{
2598
2599	return addr;
2600}
2601
2602#include "opt_ddb.h"
2603
2604#ifdef DDB
2605
2606#include <ddb/ddb.h>
2607
2608static const char*	psnames[] = {
2609	"1B",	"2B",	"4B",	"8B",
2610	"16B",	"32B",	"64B",	"128B",
2611	"256B",	"512B",	"1K",	"2K",
2612	"4K",	"8K",	"16K",	"32K",
2613	"64K",	"128K",	"256K",	"512K",
2614	"1M",	"2M",	"4M",	"8M",
2615	"16M",	"32M",	"64M",	"128M",
2616	"256M",	"512M",	"1G",	"2G"
2617};
2618
2619static void
2620print_trs(int type)
2621{
2622	struct ia64_pal_result	res;
2623	int			i, maxtr;
2624	struct {
2625		struct ia64_pte	pte;
2626		struct ia64_itir itir;
2627		struct ia64_ifa ifa;
2628		struct ia64_rr	rr;
2629	}			buf;
2630	static const char*	manames[] = {
2631		"WB",	"bad",	"bad",	"bad",
2632		"UC",	"UCE",	"WC",	"NaT",
2633
2634	};
2635
2636	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2637	if (res.pal_status != 0) {
2638		db_printf("Can't get VM summary\n");
2639		return;
2640	}
2641
2642	if (type == 0)
2643		maxtr = (res.pal_result[0] >> 40) & 0xff;
2644	else
2645		maxtr = (res.pal_result[0] >> 32) & 0xff;
2646
2647	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2648	for (i = 0; i <= maxtr; i++) {
2649		bzero(&buf, sizeof(buf));
2650		res = ia64_call_pal_stacked_physical
2651			(PAL_VM_TR_READ, i, type, ia64_tpa((u_int64_t) &buf));
2652		if (!(res.pal_result[0] & 1))
2653			buf.pte.pte_ar = 0;
2654		if (!(res.pal_result[0] & 2))
2655			buf.pte.pte_pl = 0;
2656		if (!(res.pal_result[0] & 4))
2657			buf.pte.pte_d = 0;
2658		if (!(res.pal_result[0] & 8))
2659			buf.pte.pte_ma = 0;
2660		db_printf(
2661			"%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s %d %06x\n",
2662			buf.ifa.ifa_ig & 1,
2663			buf.rr.rr_rid,
2664			buf.ifa.ifa_vpn,
2665			buf.pte.pte_ppn,
2666			psnames[buf.itir.itir_ps],
2667			buf.pte.pte_ed,
2668			buf.pte.pte_ar,
2669			buf.pte.pte_pl,
2670			buf.pte.pte_d,
2671			buf.pte.pte_a,
2672			manames[buf.pte.pte_ma],
2673			buf.pte.pte_p,
2674			buf.itir.itir_key);
2675	}
2676}
2677
2678DB_COMMAND(itr, db_itr)
2679{
2680	print_trs(0);
2681}
2682
2683DB_COMMAND(dtr, db_dtr)
2684{
2685	print_trs(1);
2686}
2687
2688DB_COMMAND(rr, db_rr)
2689{
2690	int i;
2691	u_int64_t t;
2692	struct ia64_rr rr;
2693
2694	printf("RR RID    PgSz VE\n");
2695	for (i = 0; i < 8; i++) {
2696		__asm __volatile ("mov %0=rr[%1]"
2697				  : "=r"(t)
2698				  : "r"(IA64_RR_BASE(i)));
2699		*(u_int64_t *) &rr = t;
2700		printf("%d  %06x %4s %d\n",
2701		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2702	}
2703}
2704
2705DB_COMMAND(thash, db_thash)
2706{
2707	if (!have_addr)
2708		return;
2709
2710	db_printf("%p\n", (void *) ia64_thash(addr));
2711}
2712
2713DB_COMMAND(ttag, db_ttag)
2714{
2715	if (!have_addr)
2716		return;
2717
2718	db_printf("0x%lx\n", ia64_ttag(addr));
2719}
2720
2721#endif
2722