pmap.c revision 195840
1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 */
47
48#include <sys/cdefs.h>
49__FBSDID("$FreeBSD: head/sys/ia64/ia64/pmap.c 195840 2009-07-24 13:50:29Z jhb $");
50
51#include <sys/param.h>
52#include <sys/kernel.h>
53#include <sys/lock.h>
54#include <sys/mman.h>
55#include <sys/mutex.h>
56#include <sys/proc.h>
57#include <sys/smp.h>
58#include <sys/sysctl.h>
59#include <sys/systm.h>
60
61#include <vm/vm.h>
62#include <vm/vm_page.h>
63#include <vm/vm_map.h>
64#include <vm/vm_object.h>
65#include <vm/vm_pageout.h>
66#include <vm/uma.h>
67
68#include <machine/md_var.h>
69#include <machine/pal.h>
70
71/*
72 *	Manages physical address maps.
73 *
74 *	In addition to hardware address maps, this
75 *	module is called upon to provide software-use-only
76 *	maps which may or may not be stored in the same
77 *	form as hardware maps.  These pseudo-maps are
78 *	used to store intermediate results from copy
79 *	operations to and from address spaces.
80 *
81 *	Since the information managed by this module is
82 *	also stored by the logical address mapping module,
83 *	this module may throw away valid virtual-to-physical
84 *	mappings at almost any time.  However, invalidations
85 *	of virtual-to-physical mappings must be done as
86 *	requested.
87 *
88 *	In order to cope with hardware architectures which
89 *	make virtual-to-physical map invalidates expensive,
90 *	this module may delay invalidate or reduced protection
91 *	operations until such time as they are actually
92 *	necessary.  This module is given full information as
93 *	to which processors are currently using which maps,
94 *	and to when physical maps must be made correct.
95 */
96
97/*
98 * Following the Linux model, region IDs are allocated in groups of
99 * eight so that a single region ID can be used for as many RRs as we
100 * want by encoding the RR number into the low bits of the ID.
101 *
102 * We reserve region ID 0 for the kernel and allocate the remaining
103 * IDs for user pmaps.
104 *
105 * Region 0..4
106 *	User virtually mapped
107 *
108 * Region 5
109 *	Kernel virtually mapped
110 *
111 * Region 6
112 *	Kernel physically mapped uncacheable
113 *
114 * Region 7
115 *	Kernel physically mapped cacheable
116 */
117
118/* XXX move to a header. */
119extern uint64_t ia64_gateway_page[];
120
121#ifndef PMAP_SHPGPERPROC
122#define PMAP_SHPGPERPROC 200
123#endif
124
125#if !defined(DIAGNOSTIC)
126#define PMAP_INLINE __inline
127#else
128#define PMAP_INLINE
129#endif
130
131#define	pmap_accessed(lpte)		((lpte)->pte & PTE_ACCESSED)
132#define	pmap_dirty(lpte)		((lpte)->pte & PTE_DIRTY)
133#define	pmap_exec(lpte)			((lpte)->pte & PTE_AR_RX)
134#define	pmap_managed(lpte)		((lpte)->pte & PTE_MANAGED)
135#define	pmap_ppn(lpte)			((lpte)->pte & PTE_PPN_MASK)
136#define	pmap_present(lpte)		((lpte)->pte & PTE_PRESENT)
137#define	pmap_prot(lpte)			(((lpte)->pte & PTE_PROT_MASK) >> 56)
138#define	pmap_wired(lpte)		((lpte)->pte & PTE_WIRED)
139
140#define	pmap_clear_accessed(lpte)	(lpte)->pte &= ~PTE_ACCESSED
141#define	pmap_clear_dirty(lpte)		(lpte)->pte &= ~PTE_DIRTY
142#define	pmap_clear_present(lpte)	(lpte)->pte &= ~PTE_PRESENT
143#define	pmap_clear_wired(lpte)		(lpte)->pte &= ~PTE_WIRED
144
145#define	pmap_set_wired(lpte)		(lpte)->pte |= PTE_WIRED
146
147/*
148 * The VHPT bucket head structure.
149 */
150struct ia64_bucket {
151	uint64_t	chain;
152	struct mtx	mutex;
153	u_int		length;
154};
155
156/*
157 * Statically allocated kernel pmap
158 */
159struct pmap kernel_pmap_store;
160
161vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
162vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
163
164/*
165 * Kernel virtual memory management.
166 */
167static int nkpt;
168struct ia64_lpte ***ia64_kptdir;
169#define KPTE_DIR0_INDEX(va) \
170	(((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1))
171#define KPTE_DIR1_INDEX(va) \
172	(((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
173#define KPTE_PTE_INDEX(va) \
174	(((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
175#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
176
177vm_offset_t kernel_vm_end;
178
179/* Values for ptc.e. XXX values for SKI. */
180static uint64_t pmap_ptc_e_base = 0x100000000;
181static uint64_t pmap_ptc_e_count1 = 3;
182static uint64_t pmap_ptc_e_count2 = 2;
183static uint64_t pmap_ptc_e_stride1 = 0x2000;
184static uint64_t pmap_ptc_e_stride2 = 0x100000000;
185struct mtx pmap_ptcmutex;
186
187/*
188 * Data for the RID allocator
189 */
190static int pmap_ridcount;
191static int pmap_rididx;
192static int pmap_ridmapsz;
193static int pmap_ridmax;
194static uint64_t *pmap_ridmap;
195struct mtx pmap_ridmutex;
196
197/*
198 * Data for the pv entry allocation mechanism
199 */
200static uma_zone_t pvzone;
201static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
202
203/*
204 * Data for allocating PTEs for user processes.
205 */
206static uma_zone_t ptezone;
207
208/*
209 * Virtual Hash Page Table (VHPT) data.
210 */
211/* SYSCTL_DECL(_machdep); */
212SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
213
214struct ia64_bucket *pmap_vhpt_bucket;
215
216int pmap_vhpt_nbuckets;
217SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
218    &pmap_vhpt_nbuckets, 0, "");
219
220uint64_t pmap_vhpt_base[MAXCPU];
221
222int pmap_vhpt_log2size = 0;
223TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
224SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
225    &pmap_vhpt_log2size, 0, "");
226
227static int pmap_vhpt_inserts;
228SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
229    &pmap_vhpt_inserts, 0, "");
230
231static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
232SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
233    NULL, 0, pmap_vhpt_population, "I", "");
234
235static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
236
237static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
238static pv_entry_t get_pv_entry(pmap_t locked_pmap);
239
240static void	pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
241		    vm_page_t m, vm_prot_t prot);
242static void	pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va);
243static void	pmap_invalidate_all(pmap_t pmap);
244static int	pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
245		    vm_offset_t va, pv_entry_t pv, int freepte);
246static int	pmap_remove_vhpt(vm_offset_t va);
247static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
248		    vm_page_t m);
249
250vm_offset_t
251pmap_steal_memory(vm_size_t size)
252{
253	vm_size_t bank_size;
254	vm_offset_t pa, va;
255
256	size = round_page(size);
257
258	bank_size = phys_avail[1] - phys_avail[0];
259	while (size > bank_size) {
260		int i;
261		for (i = 0; phys_avail[i+2]; i+= 2) {
262			phys_avail[i] = phys_avail[i+2];
263			phys_avail[i+1] = phys_avail[i+3];
264		}
265		phys_avail[i] = 0;
266		phys_avail[i+1] = 0;
267		if (!phys_avail[0])
268			panic("pmap_steal_memory: out of memory");
269		bank_size = phys_avail[1] - phys_avail[0];
270	}
271
272	pa = phys_avail[0];
273	phys_avail[0] += size;
274
275	va = IA64_PHYS_TO_RR7(pa);
276	bzero((caddr_t) va, size);
277	return va;
278}
279
280/*
281 *	Bootstrap the system enough to run with virtual memory.
282 */
283void
284pmap_bootstrap()
285{
286	struct ia64_pal_result res;
287	struct ia64_lpte *pte;
288	vm_offset_t base, limit;
289	size_t size;
290	int i, j, count, ridbits;
291
292	/*
293	 * Query the PAL Code to find the loop parameters for the
294	 * ptc.e instruction.
295	 */
296	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
297	if (res.pal_status != 0)
298		panic("Can't configure ptc.e parameters");
299	pmap_ptc_e_base = res.pal_result[0];
300	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
301	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
302	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
303	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
304	if (bootverbose)
305		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
306		       "stride1=0x%lx, stride2=0x%lx\n",
307		       pmap_ptc_e_base,
308		       pmap_ptc_e_count1,
309		       pmap_ptc_e_count2,
310		       pmap_ptc_e_stride1,
311		       pmap_ptc_e_stride2);
312	mtx_init(&pmap_ptcmutex, "Global PTC lock", NULL, MTX_SPIN);
313
314	/*
315	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
316	 *
317	 * We currently need at least 19 bits in the RID because PID_MAX
318	 * can only be encoded in 17 bits and we need RIDs for 5 regions
319	 * per process. With PID_MAX equalling 99999 this means that we
320	 * need to be able to encode 499995 (=5*PID_MAX).
321	 * The Itanium processor only has 18 bits and the architected
322	 * minimum is exactly that. So, we cannot use a PID based scheme
323	 * in those cases. Enter pmap_ridmap...
324	 * We should avoid the map when running on a processor that has
325	 * implemented enough bits. This means that we should pass the
326	 * process/thread ID to pmap. This we currently don't do, so we
327	 * use the map anyway. However, we don't want to allocate a map
328	 * that is large enough to cover the range dictated by the number
329	 * of bits in the RID, because that may result in a RID map of
330	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
331	 * The bottomline: we create a 32KB map when the processor only
332	 * implements 18 bits (or when we can't figure it out). Otherwise
333	 * we create a 64KB map.
334	 */
335	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
336	if (res.pal_status != 0) {
337		if (bootverbose)
338			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
339		ridbits = 18; /* guaranteed minimum */
340	} else {
341		ridbits = (res.pal_result[1] >> 8) & 0xff;
342		if (bootverbose)
343			printf("Processor supports %d Region ID bits\n",
344			    ridbits);
345	}
346	if (ridbits > 19)
347		ridbits = 19;
348
349	pmap_ridmax = (1 << ridbits);
350	pmap_ridmapsz = pmap_ridmax / 64;
351	pmap_ridmap = (uint64_t *)pmap_steal_memory(pmap_ridmax / 8);
352	pmap_ridmap[0] |= 0xff;
353	pmap_rididx = 0;
354	pmap_ridcount = 8;
355	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
356
357	/*
358	 * Allocate some memory for initial kernel 'page tables'.
359	 */
360	ia64_kptdir = (void *)pmap_steal_memory(PAGE_SIZE);
361	nkpt = 0;
362	kernel_vm_end = VM_MIN_KERNEL_ADDRESS - VM_GATEWAY_SIZE;
363
364	for (i = 0; phys_avail[i+2]; i+= 2)
365		;
366	count = i+2;
367
368	/*
369	 * Figure out a useful size for the VHPT, based on the size of
370	 * physical memory and try to locate a region which is large
371	 * enough to contain the VHPT (which must be a power of two in
372	 * size and aligned to a natural boundary).
373	 * We silently bump up the VHPT size to the minimum size if the
374	 * user has set the tunable too small. Likewise, the VHPT size
375	 * is silently capped to the maximum allowed.
376	 */
377	TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
378	if (pmap_vhpt_log2size == 0) {
379		pmap_vhpt_log2size = 15;
380		size = 1UL << pmap_vhpt_log2size;
381		while (size < Maxmem * 32) {
382			pmap_vhpt_log2size++;
383			size <<= 1;
384		}
385	} else if (pmap_vhpt_log2size < 15)
386		pmap_vhpt_log2size = 15;
387	if (pmap_vhpt_log2size > 61)
388		pmap_vhpt_log2size = 61;
389
390	pmap_vhpt_base[0] = 0;
391	base = limit = 0;
392	size = 1UL << pmap_vhpt_log2size;
393	while (pmap_vhpt_base[0] == 0) {
394		if (bootverbose)
395			printf("Trying VHPT size 0x%lx\n", size);
396		for (i = 0; i < count; i += 2) {
397			base = (phys_avail[i] + size - 1) & ~(size - 1);
398			limit = base + MAXCPU * size;
399			if (limit <= phys_avail[i+1])
400				/*
401				 * VHPT can fit in this region
402				 */
403				break;
404		}
405		if (!phys_avail[i]) {
406			/* Can't fit, try next smaller size. */
407			pmap_vhpt_log2size--;
408			size >>= 1;
409		} else
410			pmap_vhpt_base[0] = IA64_PHYS_TO_RR7(base);
411	}
412	if (pmap_vhpt_log2size < 15)
413		panic("Can't find space for VHPT");
414
415	if (bootverbose)
416		printf("Putting VHPT at 0x%lx\n", base);
417
418	if (base != phys_avail[i]) {
419		/* Split this region. */
420		if (bootverbose)
421			printf("Splitting [%p-%p]\n", (void *)phys_avail[i],
422			    (void *)phys_avail[i+1]);
423		for (j = count; j > i; j -= 2) {
424			phys_avail[j] = phys_avail[j-2];
425			phys_avail[j+1] = phys_avail[j-2+1];
426		}
427		phys_avail[i+1] = base;
428		phys_avail[i+2] = limit;
429	} else
430		phys_avail[i] = limit;
431
432	pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
433
434	pmap_vhpt_bucket = (void *)pmap_steal_memory(pmap_vhpt_nbuckets *
435	    sizeof(struct ia64_bucket));
436	pte = (struct ia64_lpte *)pmap_vhpt_base[0];
437	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
438		pte[i].pte = 0;
439		pte[i].itir = 0;
440		pte[i].tag = 1UL << 63;	/* Invalid tag */
441		pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
442		/* Stolen memory is zeroed! */
443		mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
444		    MTX_NOWITNESS | MTX_SPIN);
445	}
446
447	for (i = 1; i < MAXCPU; i++) {
448		pmap_vhpt_base[i] = pmap_vhpt_base[i - 1] + size;
449		bcopy((void *)pmap_vhpt_base[i - 1], (void *)pmap_vhpt_base[i],
450		    size);
451	}
452
453	map_vhpt(pmap_vhpt_base[0]);
454	ia64_set_pta(pmap_vhpt_base[0] + (1 << 8) +
455	    (pmap_vhpt_log2size << 2) + 1);
456	ia64_srlz_i();
457
458	virtual_avail = VM_MIN_KERNEL_ADDRESS;
459	virtual_end = VM_MAX_KERNEL_ADDRESS;
460
461	/*
462	 * Initialize the kernel pmap (which is statically allocated).
463	 */
464	PMAP_LOCK_INIT(kernel_pmap);
465	for (i = 0; i < 5; i++)
466		kernel_pmap->pm_rid[i] = 0;
467	kernel_pmap->pm_active = 1;
468	TAILQ_INIT(&kernel_pmap->pm_pvlist);
469	PCPU_SET(current_pmap, kernel_pmap);
470
471	/*
472	 * Region 5 is mapped via the vhpt.
473	 */
474	ia64_set_rr(IA64_RR_BASE(5),
475		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
476
477	/*
478	 * Region 6 is direct mapped UC and region 7 is direct mapped
479	 * WC. The details of this is controlled by the Alt {I,D}TLB
480	 * handlers. Here we just make sure that they have the largest
481	 * possible page size to minimise TLB usage.
482	 */
483	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (IA64_ID_PAGE_SHIFT << 2));
484	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (IA64_ID_PAGE_SHIFT << 2));
485	ia64_srlz_d();
486
487	/*
488	 * Clear out any random TLB entries left over from booting.
489	 */
490	pmap_invalidate_all(kernel_pmap);
491
492	map_gateway_page();
493}
494
495static int
496pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
497{
498	int count, error, i;
499
500	count = 0;
501	for (i = 0; i < pmap_vhpt_nbuckets; i++)
502		count += pmap_vhpt_bucket[i].length;
503
504	error = SYSCTL_OUT(req, &count, sizeof(count));
505	return (error);
506}
507
508/*
509 *	Initialize a vm_page's machine-dependent fields.
510 */
511void
512pmap_page_init(vm_page_t m)
513{
514
515	TAILQ_INIT(&m->md.pv_list);
516	m->md.pv_list_count = 0;
517}
518
519/*
520 *	Initialize the pmap module.
521 *	Called by vm_init, to initialize any structures that the pmap
522 *	system needs to map virtual memory.
523 */
524void
525pmap_init(void)
526{
527	int shpgperproc = PMAP_SHPGPERPROC;
528
529	/*
530	 * Initialize the address space (zone) for the pv entries.  Set a
531	 * high water mark so that the system can recover from excessive
532	 * numbers of pv entries.
533	 */
534	pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
535	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
536	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
537	pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
538	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
539	pv_entry_high_water = 9 * (pv_entry_max / 10);
540
541	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
542	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
543}
544
545
546/***************************************************
547 * Manipulate TLBs for a pmap
548 ***************************************************/
549
550static void
551pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
552{
553	struct ia64_lpte *pte;
554	int i, vhpt_ofs;
555
556	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
557		("invalidating TLB for non-current pmap"));
558
559	vhpt_ofs = ia64_thash(va) - pmap_vhpt_base[PCPU_GET(cpuid)];
560	critical_enter();
561	for (i = 0; i < MAXCPU; i++) {
562		pte = (struct ia64_lpte *)(pmap_vhpt_base[i] + vhpt_ofs);
563		if (pte->tag == ia64_ttag(va))
564			pte->tag = 1UL << 63;
565	}
566	critical_exit();
567	mtx_lock_spin(&pmap_ptcmutex);
568	ia64_ptc_ga(va, PAGE_SHIFT << 2);
569	mtx_unlock_spin(&pmap_ptcmutex);
570}
571
572static void
573pmap_invalidate_all_1(void *arg)
574{
575	uint64_t addr;
576	int i, j;
577
578	critical_enter();
579	addr = pmap_ptc_e_base;
580	for (i = 0; i < pmap_ptc_e_count1; i++) {
581		for (j = 0; j < pmap_ptc_e_count2; j++) {
582			ia64_ptc_e(addr);
583			addr += pmap_ptc_e_stride2;
584		}
585		addr += pmap_ptc_e_stride1;
586	}
587	critical_exit();
588}
589
590static void
591pmap_invalidate_all(pmap_t pmap)
592{
593
594	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
595		("invalidating TLB for non-current pmap"));
596
597#ifdef SMP
598	if (mp_ncpus > 1)
599		smp_rendezvous(NULL, pmap_invalidate_all_1, NULL, NULL);
600	else
601#endif
602	pmap_invalidate_all_1(NULL);
603}
604
605static uint32_t
606pmap_allocate_rid(void)
607{
608	uint64_t bit, bits;
609	int rid;
610
611	mtx_lock(&pmap_ridmutex);
612	if (pmap_ridcount == pmap_ridmax)
613		panic("pmap_allocate_rid: All Region IDs used");
614
615	/* Find an index with a free bit. */
616	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
617		pmap_rididx++;
618		if (pmap_rididx == pmap_ridmapsz)
619			pmap_rididx = 0;
620	}
621	rid = pmap_rididx * 64;
622
623	/* Find a free bit. */
624	bit = 1UL;
625	while (bits & bit) {
626		rid++;
627		bit <<= 1;
628	}
629
630	pmap_ridmap[pmap_rididx] |= bit;
631	pmap_ridcount++;
632	mtx_unlock(&pmap_ridmutex);
633
634	return rid;
635}
636
637static void
638pmap_free_rid(uint32_t rid)
639{
640	uint64_t bit;
641	int idx;
642
643	idx = rid / 64;
644	bit = ~(1UL << (rid & 63));
645
646	mtx_lock(&pmap_ridmutex);
647	pmap_ridmap[idx] &= bit;
648	pmap_ridcount--;
649	mtx_unlock(&pmap_ridmutex);
650}
651
652/***************************************************
653 * Page table page management routines.....
654 ***************************************************/
655
656void
657pmap_pinit0(struct pmap *pmap)
658{
659	/* kernel_pmap is the same as any other pmap. */
660	pmap_pinit(pmap);
661}
662
663/*
664 * Initialize a preallocated and zeroed pmap structure,
665 * such as one in a vmspace structure.
666 */
667int
668pmap_pinit(struct pmap *pmap)
669{
670	int i;
671
672	PMAP_LOCK_INIT(pmap);
673	for (i = 0; i < 5; i++)
674		pmap->pm_rid[i] = pmap_allocate_rid();
675	pmap->pm_active = 0;
676	TAILQ_INIT(&pmap->pm_pvlist);
677	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
678	return (1);
679}
680
681/***************************************************
682 * Pmap allocation/deallocation routines.
683 ***************************************************/
684
685/*
686 * Release any resources held by the given physical map.
687 * Called when a pmap initialized by pmap_pinit is being released.
688 * Should only be called if the map contains no valid mappings.
689 */
690void
691pmap_release(pmap_t pmap)
692{
693	int i;
694
695	for (i = 0; i < 5; i++)
696		if (pmap->pm_rid[i])
697			pmap_free_rid(pmap->pm_rid[i]);
698	PMAP_LOCK_DESTROY(pmap);
699}
700
701/*
702 * grow the number of kernel page table entries, if needed
703 */
704void
705pmap_growkernel(vm_offset_t addr)
706{
707	struct ia64_lpte **dir1;
708	struct ia64_lpte *leaf;
709	vm_page_t nkpg;
710
711	while (kernel_vm_end <= addr) {
712		if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64)
713			panic("%s: out of kernel address space", __func__);
714
715		dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)];
716		if (dir1 == NULL) {
717			nkpg = vm_page_alloc(NULL, nkpt++,
718			    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
719			if (!nkpg)
720				panic("%s: cannot add dir. page", __func__);
721
722			dir1 = (struct ia64_lpte **)
723			    IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
724			bzero(dir1, PAGE_SIZE);
725			ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1;
726		}
727
728		nkpg = vm_page_alloc(NULL, nkpt++,
729		    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
730		if (!nkpg)
731			panic("%s: cannot add PTE page", __func__);
732
733		leaf = (struct ia64_lpte *)
734		    IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
735		bzero(leaf, PAGE_SIZE);
736		dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf;
737
738		kernel_vm_end += PAGE_SIZE * NKPTEPG;
739	}
740}
741
742/***************************************************
743 * page management routines.
744 ***************************************************/
745
746/*
747 * free the pv_entry back to the free list
748 */
749static PMAP_INLINE void
750free_pv_entry(pv_entry_t pv)
751{
752	pv_entry_count--;
753	uma_zfree(pvzone, pv);
754}
755
756/*
757 * get a new pv_entry, allocating a block from the system
758 * when needed.
759 */
760static pv_entry_t
761get_pv_entry(pmap_t locked_pmap)
762{
763	static const struct timeval printinterval = { 60, 0 };
764	static struct timeval lastprint;
765	struct vpgqueues *vpq;
766	struct ia64_lpte *pte;
767	pmap_t oldpmap, pmap;
768	pv_entry_t allocated_pv, next_pv, pv;
769	vm_offset_t va;
770	vm_page_t m;
771
772	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
773	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
774	allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
775	if (allocated_pv != NULL) {
776		pv_entry_count++;
777		if (pv_entry_count > pv_entry_high_water)
778			pagedaemon_wakeup();
779		else
780			return (allocated_pv);
781	}
782
783	/*
784	 * Reclaim pv entries: At first, destroy mappings to inactive
785	 * pages.  After that, if a pv entry is still needed, destroy
786	 * mappings to active pages.
787	 */
788	if (ratecheck(&lastprint, &printinterval))
789		printf("Approaching the limit on PV entries, "
790		    "increase the vm.pmap.shpgperproc tunable.\n");
791	vpq = &vm_page_queues[PQ_INACTIVE];
792retry:
793	TAILQ_FOREACH(m, &vpq->pl, pageq) {
794		if (m->hold_count || m->busy)
795			continue;
796		TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
797			va = pv->pv_va;
798			pmap = pv->pv_pmap;
799			/* Avoid deadlock and lock recursion. */
800			if (pmap > locked_pmap)
801				PMAP_LOCK(pmap);
802			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
803				continue;
804			pmap->pm_stats.resident_count--;
805			oldpmap = pmap_switch(pmap);
806			pte = pmap_find_vhpt(va);
807			KASSERT(pte != NULL, ("pte"));
808			pmap_remove_vhpt(va);
809			pmap_invalidate_page(pmap, va);
810			pmap_switch(oldpmap);
811			if (pmap_accessed(pte))
812				vm_page_flag_set(m, PG_REFERENCED);
813			if (pmap_dirty(pte))
814				vm_page_dirty(m);
815			pmap_free_pte(pte, va);
816			TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
817			m->md.pv_list_count--;
818			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
819			if (TAILQ_EMPTY(&m->md.pv_list))
820				vm_page_flag_clear(m, PG_WRITEABLE);
821			if (pmap != locked_pmap)
822				PMAP_UNLOCK(pmap);
823			if (allocated_pv == NULL)
824				allocated_pv = pv;
825			else
826				free_pv_entry(pv);
827		}
828	}
829	if (allocated_pv == NULL) {
830		if (vpq == &vm_page_queues[PQ_INACTIVE]) {
831			vpq = &vm_page_queues[PQ_ACTIVE];
832			goto retry;
833		}
834		panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
835	}
836	return (allocated_pv);
837}
838
839/*
840 * Conditionally create a pv entry.
841 */
842static boolean_t
843pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
844{
845	pv_entry_t pv;
846
847	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
848	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
849	if (pv_entry_count < pv_entry_high_water &&
850	    (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) {
851		pv_entry_count++;
852		pv->pv_va = va;
853		pv->pv_pmap = pmap;
854		TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
855		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
856		m->md.pv_list_count++;
857		return (TRUE);
858	} else
859		return (FALSE);
860}
861
862/*
863 * Add an ia64_lpte to the VHPT.
864 */
865static void
866pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
867{
868	struct ia64_bucket *bckt;
869	struct ia64_lpte *vhpte;
870	uint64_t pte_pa;
871
872	/* Can fault, so get it out of the way. */
873	pte_pa = ia64_tpa((vm_offset_t)pte);
874
875	vhpte = (struct ia64_lpte *)ia64_thash(va);
876	bckt = (struct ia64_bucket *)vhpte->chain;
877
878	mtx_lock_spin(&bckt->mutex);
879	pte->chain = bckt->chain;
880	ia64_mf();
881	bckt->chain = pte_pa;
882
883	pmap_vhpt_inserts++;
884	bckt->length++;
885	mtx_unlock_spin(&bckt->mutex);
886}
887
888/*
889 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
890 * worked or an appropriate error code otherwise.
891 */
892static int
893pmap_remove_vhpt(vm_offset_t va)
894{
895	struct ia64_bucket *bckt;
896	struct ia64_lpte *pte;
897	struct ia64_lpte *lpte;
898	struct ia64_lpte *vhpte;
899	uint64_t chain, tag;
900
901	tag = ia64_ttag(va);
902	vhpte = (struct ia64_lpte *)ia64_thash(va);
903	bckt = (struct ia64_bucket *)vhpte->chain;
904
905	lpte = NULL;
906	mtx_lock_spin(&bckt->mutex);
907	chain = bckt->chain;
908	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
909	while (chain != 0 && pte->tag != tag) {
910		lpte = pte;
911		chain = pte->chain;
912		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
913	}
914	if (chain == 0) {
915		mtx_unlock_spin(&bckt->mutex);
916		return (ENOENT);
917	}
918
919	/* Snip this pv_entry out of the collision chain. */
920	if (lpte == NULL)
921		bckt->chain = pte->chain;
922	else
923		lpte->chain = pte->chain;
924	ia64_mf();
925
926	bckt->length--;
927	mtx_unlock_spin(&bckt->mutex);
928	return (0);
929}
930
931/*
932 * Find the ia64_lpte for the given va, if any.
933 */
934static struct ia64_lpte *
935pmap_find_vhpt(vm_offset_t va)
936{
937	struct ia64_bucket *bckt;
938	struct ia64_lpte *pte;
939	uint64_t chain, tag;
940
941	tag = ia64_ttag(va);
942	pte = (struct ia64_lpte *)ia64_thash(va);
943	bckt = (struct ia64_bucket *)pte->chain;
944
945	mtx_lock_spin(&bckt->mutex);
946	chain = bckt->chain;
947	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
948	while (chain != 0 && pte->tag != tag) {
949		chain = pte->chain;
950		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
951	}
952	mtx_unlock_spin(&bckt->mutex);
953	return ((chain != 0) ? pte : NULL);
954}
955
956/*
957 * Remove an entry from the list of managed mappings.
958 */
959static int
960pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
961{
962	if (!pv) {
963		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
964			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
965				if (pmap == pv->pv_pmap && va == pv->pv_va)
966					break;
967			}
968		} else {
969			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
970				if (va == pv->pv_va)
971					break;
972			}
973		}
974	}
975
976	if (pv) {
977		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
978		m->md.pv_list_count--;
979		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
980			vm_page_flag_clear(m, PG_WRITEABLE);
981
982		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
983		free_pv_entry(pv);
984		return 0;
985	} else {
986		return ENOENT;
987	}
988}
989
990/*
991 * Create a pv entry for page at pa for
992 * (pmap, va).
993 */
994static void
995pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
996{
997	pv_entry_t pv;
998
999	pv = get_pv_entry(pmap);
1000	pv->pv_pmap = pmap;
1001	pv->pv_va = va;
1002
1003	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1004	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1005	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1006	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1007	m->md.pv_list_count++;
1008}
1009
1010/*
1011 *	Routine:	pmap_extract
1012 *	Function:
1013 *		Extract the physical page address associated
1014 *		with the given map/virtual_address pair.
1015 */
1016vm_paddr_t
1017pmap_extract(pmap_t pmap, vm_offset_t va)
1018{
1019	struct ia64_lpte *pte;
1020	pmap_t oldpmap;
1021	vm_paddr_t pa;
1022
1023	pa = 0;
1024	PMAP_LOCK(pmap);
1025	oldpmap = pmap_switch(pmap);
1026	pte = pmap_find_vhpt(va);
1027	if (pte != NULL && pmap_present(pte))
1028		pa = pmap_ppn(pte);
1029	pmap_switch(oldpmap);
1030	PMAP_UNLOCK(pmap);
1031	return (pa);
1032}
1033
1034/*
1035 *	Routine:	pmap_extract_and_hold
1036 *	Function:
1037 *		Atomically extract and hold the physical page
1038 *		with the given pmap and virtual address pair
1039 *		if that mapping permits the given protection.
1040 */
1041vm_page_t
1042pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1043{
1044	struct ia64_lpte *pte;
1045	pmap_t oldpmap;
1046	vm_page_t m;
1047
1048	m = NULL;
1049	vm_page_lock_queues();
1050	PMAP_LOCK(pmap);
1051	oldpmap = pmap_switch(pmap);
1052	pte = pmap_find_vhpt(va);
1053	if (pte != NULL && pmap_present(pte) &&
1054	    (pmap_prot(pte) & prot) == prot) {
1055		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1056		vm_page_hold(m);
1057	}
1058	vm_page_unlock_queues();
1059	pmap_switch(oldpmap);
1060	PMAP_UNLOCK(pmap);
1061	return (m);
1062}
1063
1064/***************************************************
1065 * Low level mapping routines.....
1066 ***************************************************/
1067
1068/*
1069 * Find the kernel lpte for mapping the given virtual address, which
1070 * must be in the part of region 5 which we can cover with our kernel
1071 * 'page tables'.
1072 */
1073static struct ia64_lpte *
1074pmap_find_kpte(vm_offset_t va)
1075{
1076	struct ia64_lpte **dir1;
1077	struct ia64_lpte *leaf;
1078
1079	KASSERT((va >> 61) == 5,
1080		("kernel mapping 0x%lx not in region 5", va));
1081	KASSERT(va < kernel_vm_end,
1082		("kernel mapping 0x%lx out of range", va));
1083
1084	dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)];
1085	leaf = dir1[KPTE_DIR1_INDEX(va)];
1086	return (&leaf[KPTE_PTE_INDEX(va)]);
1087}
1088
1089/*
1090 * Find a pte suitable for mapping a user-space address. If one exists
1091 * in the VHPT, that one will be returned, otherwise a new pte is
1092 * allocated.
1093 */
1094static struct ia64_lpte *
1095pmap_find_pte(vm_offset_t va)
1096{
1097	struct ia64_lpte *pte;
1098
1099	if (va >= VM_MAXUSER_ADDRESS)
1100		return pmap_find_kpte(va);
1101
1102	pte = pmap_find_vhpt(va);
1103	if (pte == NULL) {
1104		pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1105		pte->tag = 1UL << 63;
1106	}
1107	return (pte);
1108}
1109
1110/*
1111 * Free a pte which is now unused. This simply returns it to the zone
1112 * allocator if it is a user mapping. For kernel mappings, clear the
1113 * valid bit to make it clear that the mapping is not currently used.
1114 */
1115static void
1116pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1117{
1118	if (va < VM_MAXUSER_ADDRESS)
1119		uma_zfree(ptezone, pte);
1120	else
1121		pmap_clear_present(pte);
1122}
1123
1124static PMAP_INLINE void
1125pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1126{
1127	static long prot2ar[4] = {
1128		PTE_AR_R,		/* VM_PROT_NONE */
1129		PTE_AR_RW,		/* VM_PROT_WRITE */
1130		PTE_AR_RX|PTE_ED,	/* VM_PROT_EXECUTE */
1131		PTE_AR_RWX|PTE_ED	/* VM_PROT_WRITE|VM_PROT_EXECUTE */
1132	};
1133
1134	pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED);
1135	pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1136	pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1137	    ? PTE_PL_KERN : PTE_PL_USER;
1138	pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1139}
1140
1141/*
1142 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1143 * the pte was orginally valid, then its assumed to already be in the
1144 * VHPT.
1145 * This functions does not set the protection bits.  It's expected
1146 * that those have been set correctly prior to calling this function.
1147 */
1148static void
1149pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1150    boolean_t wired, boolean_t managed)
1151{
1152
1153	pte->pte &= PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED;
1154	pte->pte |= PTE_PRESENT | PTE_MA_WB;
1155	pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1156	pte->pte |= (wired) ? PTE_WIRED : 0;
1157	pte->pte |= pa & PTE_PPN_MASK;
1158
1159	pte->itir = PAGE_SHIFT << 2;
1160
1161	pte->tag = ia64_ttag(va);
1162}
1163
1164/*
1165 * Remove the (possibly managed) mapping represented by pte from the
1166 * given pmap.
1167 */
1168static int
1169pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1170		pv_entry_t pv, int freepte)
1171{
1172	int error;
1173	vm_page_t m;
1174
1175	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1176		("removing pte for non-current pmap"));
1177
1178	/*
1179	 * First remove from the VHPT.
1180	 */
1181	error = pmap_remove_vhpt(va);
1182	if (error)
1183		return (error);
1184
1185	pmap_invalidate_page(pmap, va);
1186
1187	if (pmap_wired(pte))
1188		pmap->pm_stats.wired_count -= 1;
1189
1190	pmap->pm_stats.resident_count -= 1;
1191	if (pmap_managed(pte)) {
1192		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1193		if (pmap_dirty(pte))
1194			vm_page_dirty(m);
1195		if (pmap_accessed(pte))
1196			vm_page_flag_set(m, PG_REFERENCED);
1197
1198		error = pmap_remove_entry(pmap, m, va, pv);
1199	}
1200	if (freepte)
1201		pmap_free_pte(pte, va);
1202
1203	return (error);
1204}
1205
1206/*
1207 * Extract the physical page address associated with a kernel
1208 * virtual address.
1209 */
1210vm_paddr_t
1211pmap_kextract(vm_offset_t va)
1212{
1213	struct ia64_lpte *pte;
1214	vm_offset_t gwpage;
1215
1216	KASSERT(va >= IA64_RR_BASE(5), ("Must be kernel VA"));
1217
1218	/* Regions 6 and 7 are direct mapped. */
1219	if (va >= IA64_RR_BASE(6))
1220		return (IA64_RR_MASK(va));
1221
1222	/* EPC gateway page? */
1223	gwpage = (vm_offset_t)ia64_get_k5();
1224	if (va >= gwpage && va < gwpage + VM_GATEWAY_SIZE)
1225		return (IA64_RR_MASK((vm_offset_t)ia64_gateway_page));
1226
1227	/* Bail out if the virtual address is beyond our limits. */
1228	if (va >= kernel_vm_end)
1229		return (0);
1230
1231	pte = pmap_find_kpte(va);
1232	if (!pmap_present(pte))
1233		return (0);
1234	return (pmap_ppn(pte) | (va & PAGE_MASK));
1235}
1236
1237/*
1238 * Add a list of wired pages to the kva this routine is only used for
1239 * temporary kernel mappings that do not need to have page modification
1240 * or references recorded.  Note that old mappings are simply written
1241 * over.  The page is effectively wired, but it's customary to not have
1242 * the PTE reflect that, nor update statistics.
1243 */
1244void
1245pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1246{
1247	struct ia64_lpte *pte;
1248	int i;
1249
1250	for (i = 0; i < count; i++) {
1251		pte = pmap_find_kpte(va);
1252		if (pmap_present(pte))
1253			pmap_invalidate_page(kernel_pmap, va);
1254		else
1255			pmap_enter_vhpt(pte, va);
1256		pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1257		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1258		va += PAGE_SIZE;
1259	}
1260}
1261
1262/*
1263 * this routine jerks page mappings from the
1264 * kernel -- it is meant only for temporary mappings.
1265 */
1266void
1267pmap_qremove(vm_offset_t va, int count)
1268{
1269	struct ia64_lpte *pte;
1270	int i;
1271
1272	for (i = 0; i < count; i++) {
1273		pte = pmap_find_kpte(va);
1274		if (pmap_present(pte)) {
1275			pmap_remove_vhpt(va);
1276			pmap_invalidate_page(kernel_pmap, va);
1277			pmap_clear_present(pte);
1278		}
1279		va += PAGE_SIZE;
1280	}
1281}
1282
1283/*
1284 * Add a wired page to the kva.  As for pmap_qenter(), it's customary
1285 * to not have the PTE reflect that, nor update statistics.
1286 */
1287void
1288pmap_kenter(vm_offset_t va, vm_offset_t pa)
1289{
1290	struct ia64_lpte *pte;
1291
1292	pte = pmap_find_kpte(va);
1293	if (pmap_present(pte))
1294		pmap_invalidate_page(kernel_pmap, va);
1295	else
1296		pmap_enter_vhpt(pte, va);
1297	pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1298	pmap_set_pte(pte, va, pa, FALSE, FALSE);
1299}
1300
1301/*
1302 * Remove a page from the kva
1303 */
1304void
1305pmap_kremove(vm_offset_t va)
1306{
1307	struct ia64_lpte *pte;
1308
1309	pte = pmap_find_kpte(va);
1310	if (pmap_present(pte)) {
1311		pmap_remove_vhpt(va);
1312		pmap_invalidate_page(kernel_pmap, va);
1313		pmap_clear_present(pte);
1314	}
1315}
1316
1317/*
1318 *	Used to map a range of physical addresses into kernel
1319 *	virtual address space.
1320 *
1321 *	The value passed in '*virt' is a suggested virtual address for
1322 *	the mapping. Architectures which can support a direct-mapped
1323 *	physical to virtual region can return the appropriate address
1324 *	within that region, leaving '*virt' unchanged. Other
1325 *	architectures should map the pages starting at '*virt' and
1326 *	update '*virt' with the first usable address after the mapped
1327 *	region.
1328 */
1329vm_offset_t
1330pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1331{
1332	return IA64_PHYS_TO_RR7(start);
1333}
1334
1335/*
1336 * Remove a single page from a process address space
1337 */
1338static void
1339pmap_remove_page(pmap_t pmap, vm_offset_t va)
1340{
1341	struct ia64_lpte *pte;
1342
1343	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1344		("removing page for non-current pmap"));
1345
1346	pte = pmap_find_vhpt(va);
1347	if (pte != NULL)
1348		pmap_remove_pte(pmap, pte, va, 0, 1);
1349	return;
1350}
1351
1352/*
1353 *	Remove the given range of addresses from the specified map.
1354 *
1355 *	It is assumed that the start and end are properly
1356 *	rounded to the page size.
1357 */
1358void
1359pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1360{
1361	pmap_t oldpmap;
1362	vm_offset_t va;
1363	pv_entry_t npv, pv;
1364	struct ia64_lpte *pte;
1365
1366	if (pmap->pm_stats.resident_count == 0)
1367		return;
1368
1369	vm_page_lock_queues();
1370	PMAP_LOCK(pmap);
1371	oldpmap = pmap_switch(pmap);
1372
1373	/*
1374	 * special handling of removing one page.  a very
1375	 * common operation and easy to short circuit some
1376	 * code.
1377	 */
1378	if (sva + PAGE_SIZE == eva) {
1379		pmap_remove_page(pmap, sva);
1380		goto out;
1381	}
1382
1383	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1384		TAILQ_FOREACH_SAFE(pv, &pmap->pm_pvlist, pv_plist, npv) {
1385			va = pv->pv_va;
1386			if (va >= sva && va < eva) {
1387				pte = pmap_find_vhpt(va);
1388				KASSERT(pte != NULL, ("pte"));
1389				pmap_remove_pte(pmap, pte, va, pv, 1);
1390			}
1391		}
1392	} else {
1393		for (va = sva; va < eva; va += PAGE_SIZE) {
1394			pte = pmap_find_vhpt(va);
1395			if (pte != NULL)
1396				pmap_remove_pte(pmap, pte, va, 0, 1);
1397		}
1398	}
1399
1400out:
1401	vm_page_unlock_queues();
1402	pmap_switch(oldpmap);
1403	PMAP_UNLOCK(pmap);
1404}
1405
1406/*
1407 *	Routine:	pmap_remove_all
1408 *	Function:
1409 *		Removes this physical page from
1410 *		all physical maps in which it resides.
1411 *		Reflects back modify bits to the pager.
1412 *
1413 *	Notes:
1414 *		Original versions of this routine were very
1415 *		inefficient because they iteratively called
1416 *		pmap_remove (slow...)
1417 */
1418
1419void
1420pmap_remove_all(vm_page_t m)
1421{
1422	pmap_t oldpmap;
1423	pv_entry_t pv;
1424
1425#if defined(DIAGNOSTIC)
1426	/*
1427	 * XXX This makes pmap_remove_all() illegal for non-managed pages!
1428	 */
1429	if (m->flags & PG_FICTITIOUS) {
1430		panic("pmap_remove_all: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1431	}
1432#endif
1433	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1434	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1435		struct ia64_lpte *pte;
1436		pmap_t pmap = pv->pv_pmap;
1437		vm_offset_t va = pv->pv_va;
1438
1439		PMAP_LOCK(pmap);
1440		oldpmap = pmap_switch(pmap);
1441		pte = pmap_find_vhpt(va);
1442		KASSERT(pte != NULL, ("pte"));
1443		if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1444			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1445		pmap_remove_pte(pmap, pte, va, pv, 1);
1446		pmap_switch(oldpmap);
1447		PMAP_UNLOCK(pmap);
1448	}
1449	vm_page_flag_clear(m, PG_WRITEABLE);
1450}
1451
1452/*
1453 *	Set the physical protection on the
1454 *	specified range of this map as requested.
1455 */
1456void
1457pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1458{
1459	pmap_t oldpmap;
1460	struct ia64_lpte *pte;
1461
1462	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1463		pmap_remove(pmap, sva, eva);
1464		return;
1465	}
1466
1467	if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
1468	    (VM_PROT_WRITE|VM_PROT_EXECUTE))
1469		return;
1470
1471	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1472		panic("pmap_protect: unaligned addresses");
1473
1474	vm_page_lock_queues();
1475	PMAP_LOCK(pmap);
1476	oldpmap = pmap_switch(pmap);
1477	for ( ; sva < eva; sva += PAGE_SIZE) {
1478		/* If page is invalid, skip this page */
1479		pte = pmap_find_vhpt(sva);
1480		if (pte == NULL)
1481			continue;
1482
1483		/* If there's no change, skip it too */
1484		if (pmap_prot(pte) == prot)
1485			continue;
1486
1487		if (pmap_managed(pte)) {
1488			vm_offset_t pa = pmap_ppn(pte);
1489			vm_page_t m = PHYS_TO_VM_PAGE(pa);
1490
1491			if (pmap_dirty(pte)) {
1492				vm_page_dirty(m);
1493				pmap_clear_dirty(pte);
1494			}
1495
1496			if (pmap_accessed(pte)) {
1497				vm_page_flag_set(m, PG_REFERENCED);
1498				pmap_clear_accessed(pte);
1499			}
1500		}
1501
1502		if (prot & VM_PROT_EXECUTE)
1503			ia64_sync_icache(sva, PAGE_SIZE);
1504
1505		pmap_pte_prot(pmap, pte, prot);
1506		pmap_invalidate_page(pmap, sva);
1507	}
1508	vm_page_unlock_queues();
1509	pmap_switch(oldpmap);
1510	PMAP_UNLOCK(pmap);
1511}
1512
1513/*
1514 *	Insert the given physical page (p) at
1515 *	the specified virtual address (v) in the
1516 *	target physical map with the protection requested.
1517 *
1518 *	If specified, the page will be wired down, meaning
1519 *	that the related pte can not be reclaimed.
1520 *
1521 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1522 *	or lose information.  That is, this routine must actually
1523 *	insert this page into the given map NOW.
1524 */
1525void
1526pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
1527    vm_prot_t prot, boolean_t wired)
1528{
1529	pmap_t oldpmap;
1530	vm_offset_t pa;
1531	vm_offset_t opa;
1532	struct ia64_lpte origpte;
1533	struct ia64_lpte *pte;
1534	boolean_t icache_inval, managed;
1535
1536	vm_page_lock_queues();
1537	PMAP_LOCK(pmap);
1538	oldpmap = pmap_switch(pmap);
1539
1540	va &= ~PAGE_MASK;
1541#ifdef DIAGNOSTIC
1542	if (va > VM_MAX_KERNEL_ADDRESS)
1543		panic("pmap_enter: toobig");
1544#endif
1545
1546	/*
1547	 * Find (or create) a pte for the given mapping.
1548	 */
1549	while ((pte = pmap_find_pte(va)) == NULL) {
1550		pmap_switch(oldpmap);
1551		PMAP_UNLOCK(pmap);
1552		vm_page_unlock_queues();
1553		VM_WAIT;
1554		vm_page_lock_queues();
1555		PMAP_LOCK(pmap);
1556		oldpmap = pmap_switch(pmap);
1557	}
1558	origpte = *pte;
1559	if (!pmap_present(pte)) {
1560		opa = ~0UL;
1561		pmap_enter_vhpt(pte, va);
1562	} else
1563		opa = pmap_ppn(pte);
1564	managed = FALSE;
1565	pa = VM_PAGE_TO_PHYS(m);
1566
1567	icache_inval = (prot & VM_PROT_EXECUTE) ? TRUE : FALSE;
1568
1569	/*
1570	 * Mapping has not changed, must be protection or wiring change.
1571	 */
1572	if (opa == pa) {
1573		/*
1574		 * Wiring change, just update stats. We don't worry about
1575		 * wiring PT pages as they remain resident as long as there
1576		 * are valid mappings in them. Hence, if a user page is wired,
1577		 * the PT page will be also.
1578		 */
1579		if (wired && !pmap_wired(&origpte))
1580			pmap->pm_stats.wired_count++;
1581		else if (!wired && pmap_wired(&origpte))
1582			pmap->pm_stats.wired_count--;
1583
1584		managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1585
1586		/*
1587		 * We might be turning off write access to the page,
1588		 * so we go ahead and sense modify status. Otherwise,
1589		 * we can avoid I-cache invalidation if the page
1590		 * already allowed execution.
1591		 */
1592		if (managed && pmap_dirty(&origpte))
1593			vm_page_dirty(m);
1594		else if (pmap_exec(&origpte))
1595			icache_inval = FALSE;
1596
1597		pmap_invalidate_page(pmap, va);
1598		goto validate;
1599	}
1600
1601	/*
1602	 * Mapping has changed, invalidate old range and fall
1603	 * through to handle validating new mapping.
1604	 */
1605	if (opa != ~0UL) {
1606		pmap_remove_pte(pmap, pte, va, 0, 0);
1607		pmap_enter_vhpt(pte, va);
1608	}
1609
1610	/*
1611	 * Enter on the PV list if part of our managed memory.
1612	 */
1613	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1614		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1615		    ("pmap_enter: managed mapping within the clean submap"));
1616		pmap_insert_entry(pmap, va, m);
1617		managed = TRUE;
1618	}
1619
1620	/*
1621	 * Increment counters
1622	 */
1623	pmap->pm_stats.resident_count++;
1624	if (wired)
1625		pmap->pm_stats.wired_count++;
1626
1627validate:
1628
1629	/*
1630	 * Now validate mapping with desired protection/wiring. This
1631	 * adds the pte to the VHPT if necessary.
1632	 */
1633	pmap_pte_prot(pmap, pte, prot);
1634	pmap_set_pte(pte, va, pa, wired, managed);
1635
1636	/* Invalidate the I-cache when needed. */
1637	if (icache_inval)
1638		ia64_sync_icache(va, PAGE_SIZE);
1639
1640	if ((prot & VM_PROT_WRITE) != 0)
1641		vm_page_flag_set(m, PG_WRITEABLE);
1642	vm_page_unlock_queues();
1643	pmap_switch(oldpmap);
1644	PMAP_UNLOCK(pmap);
1645}
1646
1647/*
1648 * Maps a sequence of resident pages belonging to the same object.
1649 * The sequence begins with the given page m_start.  This page is
1650 * mapped at the given virtual address start.  Each subsequent page is
1651 * mapped at a virtual address that is offset from start by the same
1652 * amount as the page is offset from m_start within the object.  The
1653 * last page in the sequence is the page with the largest offset from
1654 * m_start that can be mapped at a virtual address less than the given
1655 * virtual address end.  Not every virtual page between start and end
1656 * is mapped; only those for which a resident page exists with the
1657 * corresponding offset from m_start are mapped.
1658 */
1659void
1660pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1661    vm_page_t m_start, vm_prot_t prot)
1662{
1663	pmap_t oldpmap;
1664	vm_page_t m;
1665	vm_pindex_t diff, psize;
1666
1667	VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
1668	psize = atop(end - start);
1669	m = m_start;
1670	PMAP_LOCK(pmap);
1671	oldpmap = pmap_switch(pmap);
1672	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1673		pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1674		m = TAILQ_NEXT(m, listq);
1675	}
1676	pmap_switch(oldpmap);
1677 	PMAP_UNLOCK(pmap);
1678}
1679
1680/*
1681 * this code makes some *MAJOR* assumptions:
1682 * 1. Current pmap & pmap exists.
1683 * 2. Not wired.
1684 * 3. Read access.
1685 * 4. No page table pages.
1686 * but is *MUCH* faster than pmap_enter...
1687 */
1688
1689void
1690pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1691{
1692	pmap_t oldpmap;
1693
1694	PMAP_LOCK(pmap);
1695	oldpmap = pmap_switch(pmap);
1696	pmap_enter_quick_locked(pmap, va, m, prot);
1697	pmap_switch(oldpmap);
1698	PMAP_UNLOCK(pmap);
1699}
1700
1701static void
1702pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1703    vm_prot_t prot)
1704{
1705	struct ia64_lpte *pte;
1706	boolean_t managed;
1707
1708	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1709	    (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
1710	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1711	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1712	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1713
1714	if ((pte = pmap_find_pte(va)) == NULL)
1715		return;
1716
1717	if (!pmap_present(pte)) {
1718		/* Enter on the PV list if the page is managed. */
1719		if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1720			if (!pmap_try_insert_pv_entry(pmap, va, m)) {
1721				pmap_free_pte(pte, va);
1722				return;
1723			}
1724			managed = TRUE;
1725		} else
1726			managed = FALSE;
1727
1728		/* Increment counters. */
1729		pmap->pm_stats.resident_count++;
1730
1731		/* Initialise with R/O protection and enter into VHPT. */
1732		pmap_enter_vhpt(pte, va);
1733		pmap_pte_prot(pmap, pte,
1734		    prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1735		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1736
1737		if (prot & VM_PROT_EXECUTE)
1738			ia64_sync_icache(va, PAGE_SIZE);
1739	}
1740}
1741
1742/*
1743 * pmap_object_init_pt preloads the ptes for a given object
1744 * into the specified pmap.  This eliminates the blast of soft
1745 * faults on process startup and immediately after an mmap.
1746 */
1747void
1748pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1749		    vm_object_t object, vm_pindex_t pindex,
1750		    vm_size_t size)
1751{
1752
1753	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1754	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1755	    ("pmap_object_init_pt: non-device object"));
1756}
1757
1758/*
1759 *	Routine:	pmap_change_wiring
1760 *	Function:	Change the wiring attribute for a map/virtual-address
1761 *			pair.
1762 *	In/out conditions:
1763 *			The mapping must already exist in the pmap.
1764 */
1765void
1766pmap_change_wiring(pmap, va, wired)
1767	register pmap_t pmap;
1768	vm_offset_t va;
1769	boolean_t wired;
1770{
1771	pmap_t oldpmap;
1772	struct ia64_lpte *pte;
1773
1774	PMAP_LOCK(pmap);
1775	oldpmap = pmap_switch(pmap);
1776
1777	pte = pmap_find_vhpt(va);
1778	KASSERT(pte != NULL, ("pte"));
1779	if (wired && !pmap_wired(pte)) {
1780		pmap->pm_stats.wired_count++;
1781		pmap_set_wired(pte);
1782	} else if (!wired && pmap_wired(pte)) {
1783		pmap->pm_stats.wired_count--;
1784		pmap_clear_wired(pte);
1785	}
1786
1787	pmap_switch(oldpmap);
1788	PMAP_UNLOCK(pmap);
1789}
1790
1791
1792
1793/*
1794 *	Copy the range specified by src_addr/len
1795 *	from the source map to the range dst_addr/len
1796 *	in the destination map.
1797 *
1798 *	This routine is only advisory and need not do anything.
1799 */
1800
1801void
1802pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
1803	  vm_offset_t src_addr)
1804{
1805}
1806
1807
1808/*
1809 *	pmap_zero_page zeros the specified hardware page by
1810 *	mapping it into virtual memory and using bzero to clear
1811 *	its contents.
1812 */
1813
1814void
1815pmap_zero_page(vm_page_t m)
1816{
1817	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1818	bzero((caddr_t) va, PAGE_SIZE);
1819}
1820
1821
1822/*
1823 *	pmap_zero_page_area zeros the specified hardware page by
1824 *	mapping it into virtual memory and using bzero to clear
1825 *	its contents.
1826 *
1827 *	off and size must reside within a single page.
1828 */
1829
1830void
1831pmap_zero_page_area(vm_page_t m, int off, int size)
1832{
1833	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1834	bzero((char *)(caddr_t)va + off, size);
1835}
1836
1837
1838/*
1839 *	pmap_zero_page_idle zeros the specified hardware page by
1840 *	mapping it into virtual memory and using bzero to clear
1841 *	its contents.  This is for the vm_idlezero process.
1842 */
1843
1844void
1845pmap_zero_page_idle(vm_page_t m)
1846{
1847	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1848	bzero((caddr_t) va, PAGE_SIZE);
1849}
1850
1851
1852/*
1853 *	pmap_copy_page copies the specified (machine independent)
1854 *	page by mapping the page into virtual memory and using
1855 *	bcopy to copy the page, one machine dependent page at a
1856 *	time.
1857 */
1858void
1859pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1860{
1861	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
1862	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
1863	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
1864}
1865
1866/*
1867 * Returns true if the pmap's pv is one of the first
1868 * 16 pvs linked to from this page.  This count may
1869 * be changed upwards or downwards in the future; it
1870 * is only necessary that true be returned for a small
1871 * subset of pmaps for proper page aging.
1872 */
1873boolean_t
1874pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
1875{
1876	pv_entry_t pv;
1877	int loops = 0;
1878
1879	if (m->flags & PG_FICTITIOUS)
1880		return FALSE;
1881
1882	/*
1883	 * Not found, check current mappings returning immediately if found.
1884	 */
1885	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1886	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1887		if (pv->pv_pmap == pmap) {
1888			return TRUE;
1889		}
1890		loops++;
1891		if (loops >= 16)
1892			break;
1893	}
1894	return (FALSE);
1895}
1896
1897/*
1898 *	pmap_page_wired_mappings:
1899 *
1900 *	Return the number of managed mappings to the given physical page
1901 *	that are wired.
1902 */
1903int
1904pmap_page_wired_mappings(vm_page_t m)
1905{
1906	struct ia64_lpte *pte;
1907	pmap_t oldpmap, pmap;
1908	pv_entry_t pv;
1909	int count;
1910
1911	count = 0;
1912	if ((m->flags & PG_FICTITIOUS) != 0)
1913		return (count);
1914	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1915	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1916		pmap = pv->pv_pmap;
1917		PMAP_LOCK(pmap);
1918		oldpmap = pmap_switch(pmap);
1919		pte = pmap_find_vhpt(pv->pv_va);
1920		KASSERT(pte != NULL, ("pte"));
1921		if (pmap_wired(pte))
1922			count++;
1923		pmap_switch(oldpmap);
1924		PMAP_UNLOCK(pmap);
1925	}
1926	return (count);
1927}
1928
1929/*
1930 * Remove all pages from specified address space
1931 * this aids process exit speeds.  Also, this code
1932 * is special cased for current process only, but
1933 * can have the more generic (and slightly slower)
1934 * mode enabled.  This is much faster than pmap_remove
1935 * in the case of running down an entire address space.
1936 */
1937void
1938pmap_remove_pages(pmap_t pmap)
1939{
1940	pmap_t oldpmap;
1941	pv_entry_t pv, npv;
1942
1943	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
1944		printf("warning: pmap_remove_pages called with non-current pmap\n");
1945		return;
1946	}
1947
1948	vm_page_lock_queues();
1949	PMAP_LOCK(pmap);
1950	oldpmap = pmap_switch(pmap);
1951
1952	for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
1953		struct ia64_lpte *pte;
1954
1955		npv = TAILQ_NEXT(pv, pv_plist);
1956
1957		pte = pmap_find_vhpt(pv->pv_va);
1958		KASSERT(pte != NULL, ("pte"));
1959		if (!pmap_wired(pte))
1960			pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
1961	}
1962
1963	pmap_switch(oldpmap);
1964	PMAP_UNLOCK(pmap);
1965	vm_page_unlock_queues();
1966}
1967
1968/*
1969 *	pmap_ts_referenced:
1970 *
1971 *	Return a count of reference bits for a page, clearing those bits.
1972 *	It is not necessary for every reference bit to be cleared, but it
1973 *	is necessary that 0 only be returned when there are truly no
1974 *	reference bits set.
1975 *
1976 *	XXX: The exact number of bits to check and clear is a matter that
1977 *	should be tested and standardized at some point in the future for
1978 *	optimal aging of shared pages.
1979 */
1980int
1981pmap_ts_referenced(vm_page_t m)
1982{
1983	struct ia64_lpte *pte;
1984	pmap_t oldpmap;
1985	pv_entry_t pv;
1986	int count = 0;
1987
1988	if (m->flags & PG_FICTITIOUS)
1989		return 0;
1990
1991	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1992		PMAP_LOCK(pv->pv_pmap);
1993		oldpmap = pmap_switch(pv->pv_pmap);
1994		pte = pmap_find_vhpt(pv->pv_va);
1995		KASSERT(pte != NULL, ("pte"));
1996		if (pmap_accessed(pte)) {
1997			count++;
1998			pmap_clear_accessed(pte);
1999			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2000		}
2001		pmap_switch(oldpmap);
2002		PMAP_UNLOCK(pv->pv_pmap);
2003	}
2004
2005	return count;
2006}
2007
2008/*
2009 *	pmap_is_modified:
2010 *
2011 *	Return whether or not the specified physical page was modified
2012 *	in any physical maps.
2013 */
2014boolean_t
2015pmap_is_modified(vm_page_t m)
2016{
2017	struct ia64_lpte *pte;
2018	pmap_t oldpmap;
2019	pv_entry_t pv;
2020	boolean_t rv;
2021
2022	rv = FALSE;
2023	if (m->flags & PG_FICTITIOUS)
2024		return (rv);
2025
2026	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2027		PMAP_LOCK(pv->pv_pmap);
2028		oldpmap = pmap_switch(pv->pv_pmap);
2029		pte = pmap_find_vhpt(pv->pv_va);
2030		pmap_switch(oldpmap);
2031		KASSERT(pte != NULL, ("pte"));
2032		rv = pmap_dirty(pte) ? TRUE : FALSE;
2033		PMAP_UNLOCK(pv->pv_pmap);
2034		if (rv)
2035			break;
2036	}
2037
2038	return (rv);
2039}
2040
2041/*
2042 *	pmap_is_prefaultable:
2043 *
2044 *	Return whether or not the specified virtual address is elgible
2045 *	for prefault.
2046 */
2047boolean_t
2048pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2049{
2050	struct ia64_lpte *pte;
2051
2052	pte = pmap_find_vhpt(addr);
2053	if (pte != NULL && pmap_present(pte))
2054		return (FALSE);
2055	return (TRUE);
2056}
2057
2058/*
2059 *	Clear the modify bits on the specified physical page.
2060 */
2061void
2062pmap_clear_modify(vm_page_t m)
2063{
2064	struct ia64_lpte *pte;
2065	pmap_t oldpmap;
2066	pv_entry_t pv;
2067
2068	if (m->flags & PG_FICTITIOUS)
2069		return;
2070
2071	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2072		PMAP_LOCK(pv->pv_pmap);
2073		oldpmap = pmap_switch(pv->pv_pmap);
2074		pte = pmap_find_vhpt(pv->pv_va);
2075		KASSERT(pte != NULL, ("pte"));
2076		if (pmap_dirty(pte)) {
2077			pmap_clear_dirty(pte);
2078			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2079		}
2080		pmap_switch(oldpmap);
2081		PMAP_UNLOCK(pv->pv_pmap);
2082	}
2083}
2084
2085/*
2086 *	pmap_clear_reference:
2087 *
2088 *	Clear the reference bit on the specified physical page.
2089 */
2090void
2091pmap_clear_reference(vm_page_t m)
2092{
2093	struct ia64_lpte *pte;
2094	pmap_t oldpmap;
2095	pv_entry_t pv;
2096
2097	if (m->flags & PG_FICTITIOUS)
2098		return;
2099
2100	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2101		PMAP_LOCK(pv->pv_pmap);
2102		oldpmap = pmap_switch(pv->pv_pmap);
2103		pte = pmap_find_vhpt(pv->pv_va);
2104		KASSERT(pte != NULL, ("pte"));
2105		if (pmap_accessed(pte)) {
2106			pmap_clear_accessed(pte);
2107			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2108		}
2109		pmap_switch(oldpmap);
2110		PMAP_UNLOCK(pv->pv_pmap);
2111	}
2112}
2113
2114/*
2115 * Clear the write and modified bits in each of the given page's mappings.
2116 */
2117void
2118pmap_remove_write(vm_page_t m)
2119{
2120	struct ia64_lpte *pte;
2121	pmap_t oldpmap, pmap;
2122	pv_entry_t pv;
2123	vm_prot_t prot;
2124
2125	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2126	if ((m->flags & PG_FICTITIOUS) != 0 ||
2127	    (m->flags & PG_WRITEABLE) == 0)
2128		return;
2129	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2130		pmap = pv->pv_pmap;
2131		PMAP_LOCK(pmap);
2132		oldpmap = pmap_switch(pmap);
2133		pte = pmap_find_vhpt(pv->pv_va);
2134		KASSERT(pte != NULL, ("pte"));
2135		prot = pmap_prot(pte);
2136		if ((prot & VM_PROT_WRITE) != 0) {
2137			if (pmap_dirty(pte)) {
2138				vm_page_dirty(m);
2139				pmap_clear_dirty(pte);
2140			}
2141			prot &= ~VM_PROT_WRITE;
2142			pmap_pte_prot(pmap, pte, prot);
2143			pmap_invalidate_page(pmap, pv->pv_va);
2144		}
2145		pmap_switch(oldpmap);
2146		PMAP_UNLOCK(pmap);
2147	}
2148	vm_page_flag_clear(m, PG_WRITEABLE);
2149}
2150
2151/*
2152 * Map a set of physical memory pages into the kernel virtual
2153 * address space. Return a pointer to where it is mapped. This
2154 * routine is intended to be used for mapping device memory,
2155 * NOT real memory.
2156 */
2157void *
2158pmap_mapdev(vm_offset_t pa, vm_size_t size)
2159{
2160	return (void*) IA64_PHYS_TO_RR6(pa);
2161}
2162
2163/*
2164 * 'Unmap' a range mapped by pmap_mapdev().
2165 */
2166void
2167pmap_unmapdev(vm_offset_t va, vm_size_t size)
2168{
2169	return;
2170}
2171
2172/*
2173 * perform the pmap work for mincore
2174 */
2175int
2176pmap_mincore(pmap_t pmap, vm_offset_t addr)
2177{
2178	pmap_t oldpmap;
2179	struct ia64_lpte *pte, tpte;
2180	int val = 0;
2181
2182	PMAP_LOCK(pmap);
2183	oldpmap = pmap_switch(pmap);
2184	pte = pmap_find_vhpt(addr);
2185	if (pte != NULL) {
2186		tpte = *pte;
2187		pte = &tpte;
2188	}
2189	pmap_switch(oldpmap);
2190	PMAP_UNLOCK(pmap);
2191
2192	if (pte == NULL)
2193		return 0;
2194
2195	if (pmap_present(pte)) {
2196		vm_page_t m;
2197		vm_offset_t pa;
2198
2199		val = MINCORE_INCORE;
2200		if (!pmap_managed(pte))
2201			return val;
2202
2203		pa = pmap_ppn(pte);
2204
2205		m = PHYS_TO_VM_PAGE(pa);
2206
2207		/*
2208		 * Modified by us
2209		 */
2210		if (pmap_dirty(pte))
2211			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2212		else {
2213			/*
2214			 * Modified by someone
2215			 */
2216			vm_page_lock_queues();
2217			if (pmap_is_modified(m))
2218				val |= MINCORE_MODIFIED_OTHER;
2219			vm_page_unlock_queues();
2220		}
2221		/*
2222		 * Referenced by us
2223		 */
2224		if (pmap_accessed(pte))
2225			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2226		else {
2227			/*
2228			 * Referenced by someone
2229			 */
2230			vm_page_lock_queues();
2231			if (pmap_ts_referenced(m)) {
2232				val |= MINCORE_REFERENCED_OTHER;
2233				vm_page_flag_set(m, PG_REFERENCED);
2234			}
2235			vm_page_unlock_queues();
2236		}
2237	}
2238	return val;
2239}
2240
2241void
2242pmap_activate(struct thread *td)
2243{
2244	pmap_switch(vmspace_pmap(td->td_proc->p_vmspace));
2245}
2246
2247pmap_t
2248pmap_switch(pmap_t pm)
2249{
2250	pmap_t prevpm;
2251	int i;
2252
2253	critical_enter();
2254	prevpm = PCPU_GET(current_pmap);
2255	if (prevpm == pm)
2256		goto out;
2257	if (prevpm != NULL)
2258		atomic_clear_32(&prevpm->pm_active, PCPU_GET(cpumask));
2259	if (pm == NULL) {
2260		for (i = 0; i < 5; i++) {
2261			ia64_set_rr(IA64_RR_BASE(i),
2262			    (i << 8)|(PAGE_SHIFT << 2)|1);
2263		}
2264	} else {
2265		for (i = 0; i < 5; i++) {
2266			ia64_set_rr(IA64_RR_BASE(i),
2267			    (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2268		}
2269		atomic_set_32(&pm->pm_active, PCPU_GET(cpumask));
2270	}
2271	PCPU_SET(current_pmap, pm);
2272	ia64_srlz_d();
2273
2274out:
2275	critical_exit();
2276	return (prevpm);
2277}
2278
2279/*
2280 *	Increase the starting virtual address of the given mapping if a
2281 *	different alignment might result in more superpage mappings.
2282 */
2283void
2284pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2285    vm_offset_t *addr, vm_size_t size)
2286{
2287}
2288
2289#include "opt_ddb.h"
2290
2291#ifdef DDB
2292
2293#include <ddb/ddb.h>
2294
2295static const char*	psnames[] = {
2296	"1B",	"2B",	"4B",	"8B",
2297	"16B",	"32B",	"64B",	"128B",
2298	"256B",	"512B",	"1K",	"2K",
2299	"4K",	"8K",	"16K",	"32K",
2300	"64K",	"128K",	"256K",	"512K",
2301	"1M",	"2M",	"4M",	"8M",
2302	"16M",	"32M",	"64M",	"128M",
2303	"256M",	"512M",	"1G",	"2G"
2304};
2305
2306static void
2307print_trs(int type)
2308{
2309	struct ia64_pal_result res;
2310	int i, maxtr;
2311	struct {
2312		pt_entry_t	pte;
2313		uint64_t	itir;
2314		uint64_t	ifa;
2315		struct ia64_rr	rr;
2316	} buf;
2317	static const char *manames[] = {
2318		"WB",	"bad",	"bad",	"bad",
2319		"UC",	"UCE",	"WC",	"NaT",
2320	};
2321
2322	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2323	if (res.pal_status != 0) {
2324		db_printf("Can't get VM summary\n");
2325		return;
2326	}
2327
2328	if (type == 0)
2329		maxtr = (res.pal_result[0] >> 40) & 0xff;
2330	else
2331		maxtr = (res.pal_result[0] >> 32) & 0xff;
2332
2333	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2334	for (i = 0; i <= maxtr; i++) {
2335		bzero(&buf, sizeof(buf));
2336		res = ia64_call_pal_stacked_physical
2337			(PAL_VM_TR_READ, i, type, ia64_tpa((uint64_t) &buf));
2338		if (!(res.pal_result[0] & 1))
2339			buf.pte &= ~PTE_AR_MASK;
2340		if (!(res.pal_result[0] & 2))
2341			buf.pte &= ~PTE_PL_MASK;
2342		if (!(res.pal_result[0] & 4))
2343			pmap_clear_dirty(&buf);
2344		if (!(res.pal_result[0] & 8))
2345			buf.pte &= ~PTE_MA_MASK;
2346		db_printf("%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s "
2347		    "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2348		    buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2349		    psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2350		    (buf.pte & PTE_ED) ? 1 : 0,
2351		    (int)(buf.pte & PTE_AR_MASK) >> 9,
2352		    (int)(buf.pte & PTE_PL_MASK) >> 7,
2353		    (pmap_dirty(&buf)) ? 1 : 0,
2354		    (pmap_accessed(&buf)) ? 1 : 0,
2355		    manames[(buf.pte & PTE_MA_MASK) >> 2],
2356		    (pmap_present(&buf)) ? 1 : 0,
2357		    (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2358	}
2359}
2360
2361DB_COMMAND(itr, db_itr)
2362{
2363	print_trs(0);
2364}
2365
2366DB_COMMAND(dtr, db_dtr)
2367{
2368	print_trs(1);
2369}
2370
2371DB_COMMAND(rr, db_rr)
2372{
2373	int i;
2374	uint64_t t;
2375	struct ia64_rr rr;
2376
2377	printf("RR RID    PgSz VE\n");
2378	for (i = 0; i < 8; i++) {
2379		__asm __volatile ("mov %0=rr[%1]"
2380				  : "=r"(t)
2381				  : "r"(IA64_RR_BASE(i)));
2382		*(uint64_t *) &rr = t;
2383		printf("%d  %06x %4s %d\n",
2384		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2385	}
2386}
2387
2388DB_COMMAND(thash, db_thash)
2389{
2390	if (!have_addr)
2391		return;
2392
2393	db_printf("%p\n", (void *) ia64_thash(addr));
2394}
2395
2396DB_COMMAND(ttag, db_ttag)
2397{
2398	if (!have_addr)
2399		return;
2400
2401	db_printf("0x%lx\n", ia64_ttag(addr));
2402}
2403
2404DB_COMMAND(kpte, db_kpte)
2405{
2406	struct ia64_lpte *pte;
2407
2408	if (!have_addr) {
2409		db_printf("usage: kpte <kva>\n");
2410		return;
2411	}
2412	if (addr < VM_MIN_KERNEL_ADDRESS) {
2413		db_printf("kpte: error: invalid <kva>\n");
2414		return;
2415	}
2416	pte = pmap_find_kpte(addr);
2417	db_printf("kpte at %p:\n", pte);
2418	db_printf("  pte  =%016lx\n", pte->pte);
2419	db_printf("  itir =%016lx\n", pte->itir);
2420	db_printf("  tag  =%016lx\n", pte->tag);
2421	db_printf("  chain=%016lx\n", pte->chain);
2422}
2423
2424#endif
2425