pmap.c revision 208504
1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 */
47
48#include <sys/cdefs.h>
49__FBSDID("$FreeBSD: head/sys/ia64/ia64/pmap.c 208504 2010-05-24 14:26:57Z alc $");
50
51#include <sys/param.h>
52#include <sys/kernel.h>
53#include <sys/lock.h>
54#include <sys/mman.h>
55#include <sys/mutex.h>
56#include <sys/proc.h>
57#include <sys/smp.h>
58#include <sys/sysctl.h>
59#include <sys/systm.h>
60
61#include <vm/vm.h>
62#include <vm/vm_page.h>
63#include <vm/vm_map.h>
64#include <vm/vm_object.h>
65#include <vm/vm_pageout.h>
66#include <vm/uma.h>
67
68#include <machine/md_var.h>
69#include <machine/pal.h>
70
71/*
72 *	Manages physical address maps.
73 *
74 *	In addition to hardware address maps, this
75 *	module is called upon to provide software-use-only
76 *	maps which may or may not be stored in the same
77 *	form as hardware maps.  These pseudo-maps are
78 *	used to store intermediate results from copy
79 *	operations to and from address spaces.
80 *
81 *	Since the information managed by this module is
82 *	also stored by the logical address mapping module,
83 *	this module may throw away valid virtual-to-physical
84 *	mappings at almost any time.  However, invalidations
85 *	of virtual-to-physical mappings must be done as
86 *	requested.
87 *
88 *	In order to cope with hardware architectures which
89 *	make virtual-to-physical map invalidates expensive,
90 *	this module may delay invalidate or reduced protection
91 *	operations until such time as they are actually
92 *	necessary.  This module is given full information as
93 *	to which processors are currently using which maps,
94 *	and to when physical maps must be made correct.
95 */
96
97/*
98 * Following the Linux model, region IDs are allocated in groups of
99 * eight so that a single region ID can be used for as many RRs as we
100 * want by encoding the RR number into the low bits of the ID.
101 *
102 * We reserve region ID 0 for the kernel and allocate the remaining
103 * IDs for user pmaps.
104 *
105 * Region 0..4
106 *	User virtually mapped
107 *
108 * Region 5
109 *	Kernel virtually mapped
110 *
111 * Region 6
112 *	Kernel physically mapped uncacheable
113 *
114 * Region 7
115 *	Kernel physically mapped cacheable
116 */
117
118/* XXX move to a header. */
119extern uint64_t ia64_gateway_page[];
120
121#ifndef PMAP_SHPGPERPROC
122#define PMAP_SHPGPERPROC 200
123#endif
124
125#if !defined(DIAGNOSTIC)
126#define PMAP_INLINE __inline
127#else
128#define PMAP_INLINE
129#endif
130
131#define	pmap_accessed(lpte)		((lpte)->pte & PTE_ACCESSED)
132#define	pmap_dirty(lpte)		((lpte)->pte & PTE_DIRTY)
133#define	pmap_exec(lpte)			((lpte)->pte & PTE_AR_RX)
134#define	pmap_managed(lpte)		((lpte)->pte & PTE_MANAGED)
135#define	pmap_ppn(lpte)			((lpte)->pte & PTE_PPN_MASK)
136#define	pmap_present(lpte)		((lpte)->pte & PTE_PRESENT)
137#define	pmap_prot(lpte)			(((lpte)->pte & PTE_PROT_MASK) >> 56)
138#define	pmap_wired(lpte)		((lpte)->pte & PTE_WIRED)
139
140#define	pmap_clear_accessed(lpte)	(lpte)->pte &= ~PTE_ACCESSED
141#define	pmap_clear_dirty(lpte)		(lpte)->pte &= ~PTE_DIRTY
142#define	pmap_clear_present(lpte)	(lpte)->pte &= ~PTE_PRESENT
143#define	pmap_clear_wired(lpte)		(lpte)->pte &= ~PTE_WIRED
144
145#define	pmap_set_wired(lpte)		(lpte)->pte |= PTE_WIRED
146
147/*
148 * The VHPT bucket head structure.
149 */
150struct ia64_bucket {
151	uint64_t	chain;
152	struct mtx	mutex;
153	u_int		length;
154};
155
156/*
157 * Statically allocated kernel pmap
158 */
159struct pmap kernel_pmap_store;
160
161vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
162vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
163
164/*
165 * Kernel virtual memory management.
166 */
167static int nkpt;
168struct ia64_lpte ***ia64_kptdir;
169#define KPTE_DIR0_INDEX(va) \
170	(((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1))
171#define KPTE_DIR1_INDEX(va) \
172	(((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
173#define KPTE_PTE_INDEX(va) \
174	(((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
175#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
176
177vm_offset_t kernel_vm_end;
178
179/* Values for ptc.e. XXX values for SKI. */
180static uint64_t pmap_ptc_e_base = 0x100000000;
181static uint64_t pmap_ptc_e_count1 = 3;
182static uint64_t pmap_ptc_e_count2 = 2;
183static uint64_t pmap_ptc_e_stride1 = 0x2000;
184static uint64_t pmap_ptc_e_stride2 = 0x100000000;
185struct mtx pmap_ptcmutex;
186
187/*
188 * Data for the RID allocator
189 */
190static int pmap_ridcount;
191static int pmap_rididx;
192static int pmap_ridmapsz;
193static int pmap_ridmax;
194static uint64_t *pmap_ridmap;
195struct mtx pmap_ridmutex;
196
197/*
198 * Data for the pv entry allocation mechanism
199 */
200static uma_zone_t pvzone;
201static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
202
203/*
204 * Data for allocating PTEs for user processes.
205 */
206static uma_zone_t ptezone;
207
208/*
209 * Virtual Hash Page Table (VHPT) data.
210 */
211/* SYSCTL_DECL(_machdep); */
212SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
213
214struct ia64_bucket *pmap_vhpt_bucket;
215
216int pmap_vhpt_nbuckets;
217SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
218    &pmap_vhpt_nbuckets, 0, "");
219
220int pmap_vhpt_log2size = 0;
221TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
222SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
223    &pmap_vhpt_log2size, 0, "");
224
225static int pmap_vhpt_inserts;
226SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
227    &pmap_vhpt_inserts, 0, "");
228
229static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
230SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
231    NULL, 0, pmap_vhpt_population, "I", "");
232
233static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
234
235static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
236static pv_entry_t get_pv_entry(pmap_t locked_pmap);
237
238static void	pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
239		    vm_page_t m, vm_prot_t prot);
240static void	pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va);
241static void	pmap_invalidate_all(void);
242static int	pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
243		    vm_offset_t va, pv_entry_t pv, int freepte);
244static int	pmap_remove_vhpt(vm_offset_t va);
245static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
246		    vm_page_t m);
247
248vm_offset_t
249pmap_steal_memory(vm_size_t size)
250{
251	vm_size_t bank_size;
252	vm_offset_t pa, va;
253
254	size = round_page(size);
255
256	bank_size = phys_avail[1] - phys_avail[0];
257	while (size > bank_size) {
258		int i;
259		for (i = 0; phys_avail[i+2]; i+= 2) {
260			phys_avail[i] = phys_avail[i+2];
261			phys_avail[i+1] = phys_avail[i+3];
262		}
263		phys_avail[i] = 0;
264		phys_avail[i+1] = 0;
265		if (!phys_avail[0])
266			panic("pmap_steal_memory: out of memory");
267		bank_size = phys_avail[1] - phys_avail[0];
268	}
269
270	pa = phys_avail[0];
271	phys_avail[0] += size;
272
273	va = IA64_PHYS_TO_RR7(pa);
274	bzero((caddr_t) va, size);
275	return va;
276}
277
278static void
279pmap_initialize_vhpt(vm_offset_t vhpt)
280{
281	struct ia64_lpte *pte;
282	u_int i;
283
284	pte = (struct ia64_lpte *)vhpt;
285	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
286		pte[i].pte = 0;
287		pte[i].itir = 0;
288		pte[i].tag = 1UL << 63; /* Invalid tag */
289		pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
290	}
291}
292
293#ifdef SMP
294MALLOC_DECLARE(M_SMP);
295
296vm_offset_t
297pmap_alloc_vhpt(void)
298{
299	vm_offset_t vhpt;
300	vm_size_t size;
301
302	size = 1UL << pmap_vhpt_log2size;
303	vhpt = (uintptr_t)contigmalloc(size, M_SMP, 0, 0UL, ~0UL, size, 0UL);
304	if (vhpt != 0) {
305		vhpt = IA64_PHYS_TO_RR7(ia64_tpa(vhpt));
306		pmap_initialize_vhpt(vhpt);
307	}
308	return (vhpt);
309}
310#endif
311
312/*
313 *	Bootstrap the system enough to run with virtual memory.
314 */
315void
316pmap_bootstrap()
317{
318	struct ia64_pal_result res;
319	vm_offset_t base;
320	size_t size;
321	int i, j, count, ridbits;
322
323	/*
324	 * Query the PAL Code to find the loop parameters for the
325	 * ptc.e instruction.
326	 */
327	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
328	if (res.pal_status != 0)
329		panic("Can't configure ptc.e parameters");
330	pmap_ptc_e_base = res.pal_result[0];
331	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
332	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
333	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
334	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
335	if (bootverbose)
336		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
337		       "stride1=0x%lx, stride2=0x%lx\n",
338		       pmap_ptc_e_base,
339		       pmap_ptc_e_count1,
340		       pmap_ptc_e_count2,
341		       pmap_ptc_e_stride1,
342		       pmap_ptc_e_stride2);
343	mtx_init(&pmap_ptcmutex, "Global PTC lock", NULL, MTX_SPIN);
344
345	/*
346	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
347	 *
348	 * We currently need at least 19 bits in the RID because PID_MAX
349	 * can only be encoded in 17 bits and we need RIDs for 5 regions
350	 * per process. With PID_MAX equalling 99999 this means that we
351	 * need to be able to encode 499995 (=5*PID_MAX).
352	 * The Itanium processor only has 18 bits and the architected
353	 * minimum is exactly that. So, we cannot use a PID based scheme
354	 * in those cases. Enter pmap_ridmap...
355	 * We should avoid the map when running on a processor that has
356	 * implemented enough bits. This means that we should pass the
357	 * process/thread ID to pmap. This we currently don't do, so we
358	 * use the map anyway. However, we don't want to allocate a map
359	 * that is large enough to cover the range dictated by the number
360	 * of bits in the RID, because that may result in a RID map of
361	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
362	 * The bottomline: we create a 32KB map when the processor only
363	 * implements 18 bits (or when we can't figure it out). Otherwise
364	 * we create a 64KB map.
365	 */
366	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
367	if (res.pal_status != 0) {
368		if (bootverbose)
369			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
370		ridbits = 18; /* guaranteed minimum */
371	} else {
372		ridbits = (res.pal_result[1] >> 8) & 0xff;
373		if (bootverbose)
374			printf("Processor supports %d Region ID bits\n",
375			    ridbits);
376	}
377	if (ridbits > 19)
378		ridbits = 19;
379
380	pmap_ridmax = (1 << ridbits);
381	pmap_ridmapsz = pmap_ridmax / 64;
382	pmap_ridmap = (uint64_t *)pmap_steal_memory(pmap_ridmax / 8);
383	pmap_ridmap[0] |= 0xff;
384	pmap_rididx = 0;
385	pmap_ridcount = 8;
386	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
387
388	/*
389	 * Allocate some memory for initial kernel 'page tables'.
390	 */
391	ia64_kptdir = (void *)pmap_steal_memory(PAGE_SIZE);
392	nkpt = 0;
393	kernel_vm_end = VM_MIN_KERNEL_ADDRESS - VM_GATEWAY_SIZE;
394
395	for (i = 0; phys_avail[i+2]; i+= 2)
396		;
397	count = i+2;
398
399	TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
400	if (pmap_vhpt_log2size == 0)
401		pmap_vhpt_log2size = 20;
402	else if (pmap_vhpt_log2size < 15)
403		pmap_vhpt_log2size = 15;
404	else if (pmap_vhpt_log2size > 61)
405		pmap_vhpt_log2size = 61;
406
407	base = 0;
408	size = 1UL << pmap_vhpt_log2size;
409	for (i = 0; i < count; i += 2) {
410		base = (phys_avail[i] + size - 1) & ~(size - 1);
411		if (base + size <= phys_avail[i+1])
412			break;
413	}
414	if (!phys_avail[i])
415		panic("Unable to allocate VHPT");
416
417	if (base != phys_avail[i]) {
418		/* Split this region. */
419		for (j = count; j > i; j -= 2) {
420			phys_avail[j] = phys_avail[j-2];
421			phys_avail[j+1] = phys_avail[j-2+1];
422		}
423		phys_avail[i+1] = base;
424		phys_avail[i+2] = base + size;
425	} else
426		phys_avail[i] = base + size;
427
428	base = IA64_PHYS_TO_RR7(base);
429	PCPU_SET(md.vhpt, base);
430	if (bootverbose)
431		printf("VHPT: address=%#lx, size=%#lx\n", base, size);
432
433	pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
434	pmap_vhpt_bucket = (void *)pmap_steal_memory(pmap_vhpt_nbuckets *
435	    sizeof(struct ia64_bucket));
436	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
437		/* Stolen memory is zeroed. */
438		mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
439		    MTX_NOWITNESS | MTX_SPIN);
440	}
441
442	pmap_initialize_vhpt(base);
443	map_vhpt(base);
444	ia64_set_pta(base + (1 << 8) + (pmap_vhpt_log2size << 2) + 1);
445	ia64_srlz_i();
446
447	virtual_avail = VM_MIN_KERNEL_ADDRESS;
448	virtual_end = VM_MAX_KERNEL_ADDRESS;
449
450	/*
451	 * Initialize the kernel pmap (which is statically allocated).
452	 */
453	PMAP_LOCK_INIT(kernel_pmap);
454	for (i = 0; i < 5; i++)
455		kernel_pmap->pm_rid[i] = 0;
456	TAILQ_INIT(&kernel_pmap->pm_pvlist);
457	PCPU_SET(md.current_pmap, kernel_pmap);
458
459	/*
460	 * Region 5 is mapped via the vhpt.
461	 */
462	ia64_set_rr(IA64_RR_BASE(5),
463		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
464
465	/*
466	 * Region 6 is direct mapped UC and region 7 is direct mapped
467	 * WC. The details of this is controlled by the Alt {I,D}TLB
468	 * handlers. Here we just make sure that they have the largest
469	 * possible page size to minimise TLB usage.
470	 */
471	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (IA64_ID_PAGE_SHIFT << 2));
472	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (IA64_ID_PAGE_SHIFT << 2));
473	ia64_srlz_d();
474
475	/*
476	 * Clear out any random TLB entries left over from booting.
477	 */
478	pmap_invalidate_all();
479
480	map_gateway_page();
481}
482
483static int
484pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
485{
486	int count, error, i;
487
488	count = 0;
489	for (i = 0; i < pmap_vhpt_nbuckets; i++)
490		count += pmap_vhpt_bucket[i].length;
491
492	error = SYSCTL_OUT(req, &count, sizeof(count));
493	return (error);
494}
495
496/*
497 *	Initialize a vm_page's machine-dependent fields.
498 */
499void
500pmap_page_init(vm_page_t m)
501{
502
503	TAILQ_INIT(&m->md.pv_list);
504	m->md.pv_list_count = 0;
505}
506
507/*
508 *	Initialize the pmap module.
509 *	Called by vm_init, to initialize any structures that the pmap
510 *	system needs to map virtual memory.
511 */
512void
513pmap_init(void)
514{
515	int shpgperproc = PMAP_SHPGPERPROC;
516
517	/*
518	 * Initialize the address space (zone) for the pv entries.  Set a
519	 * high water mark so that the system can recover from excessive
520	 * numbers of pv entries.
521	 */
522	pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
523	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
524	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
525	pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
526	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
527	pv_entry_high_water = 9 * (pv_entry_max / 10);
528
529	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
530	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
531}
532
533
534/***************************************************
535 * Manipulate TLBs for a pmap
536 ***************************************************/
537
538static void
539pmap_invalidate_page(vm_offset_t va)
540{
541	struct ia64_lpte *pte;
542	struct pcpu *pc;
543	uint64_t tag;
544	u_int vhpt_ofs;
545
546	critical_enter();
547	vhpt_ofs = ia64_thash(va) - PCPU_GET(md.vhpt);
548	tag = ia64_ttag(va);
549	SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
550		pte = (struct ia64_lpte *)(pc->pc_md.vhpt + vhpt_ofs);
551		atomic_cmpset_64(&pte->tag, tag, 1UL << 63);
552	}
553	critical_exit();
554	mtx_lock_spin(&pmap_ptcmutex);
555	ia64_ptc_ga(va, PAGE_SHIFT << 2);
556	mtx_unlock_spin(&pmap_ptcmutex);
557}
558
559static void
560pmap_invalidate_all_1(void *arg)
561{
562	uint64_t addr;
563	int i, j;
564
565	critical_enter();
566	addr = pmap_ptc_e_base;
567	for (i = 0; i < pmap_ptc_e_count1; i++) {
568		for (j = 0; j < pmap_ptc_e_count2; j++) {
569			ia64_ptc_e(addr);
570			addr += pmap_ptc_e_stride2;
571		}
572		addr += pmap_ptc_e_stride1;
573	}
574	critical_exit();
575}
576
577static void
578pmap_invalidate_all(void)
579{
580
581#ifdef SMP
582	if (mp_ncpus > 1) {
583		smp_rendezvous(NULL, pmap_invalidate_all_1, NULL, NULL);
584		return;
585	}
586#endif
587	pmap_invalidate_all_1(NULL);
588}
589
590static uint32_t
591pmap_allocate_rid(void)
592{
593	uint64_t bit, bits;
594	int rid;
595
596	mtx_lock(&pmap_ridmutex);
597	if (pmap_ridcount == pmap_ridmax)
598		panic("pmap_allocate_rid: All Region IDs used");
599
600	/* Find an index with a free bit. */
601	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
602		pmap_rididx++;
603		if (pmap_rididx == pmap_ridmapsz)
604			pmap_rididx = 0;
605	}
606	rid = pmap_rididx * 64;
607
608	/* Find a free bit. */
609	bit = 1UL;
610	while (bits & bit) {
611		rid++;
612		bit <<= 1;
613	}
614
615	pmap_ridmap[pmap_rididx] |= bit;
616	pmap_ridcount++;
617	mtx_unlock(&pmap_ridmutex);
618
619	return rid;
620}
621
622static void
623pmap_free_rid(uint32_t rid)
624{
625	uint64_t bit;
626	int idx;
627
628	idx = rid / 64;
629	bit = ~(1UL << (rid & 63));
630
631	mtx_lock(&pmap_ridmutex);
632	pmap_ridmap[idx] &= bit;
633	pmap_ridcount--;
634	mtx_unlock(&pmap_ridmutex);
635}
636
637/***************************************************
638 * Page table page management routines.....
639 ***************************************************/
640
641void
642pmap_pinit0(struct pmap *pmap)
643{
644	/* kernel_pmap is the same as any other pmap. */
645	pmap_pinit(pmap);
646}
647
648/*
649 * Initialize a preallocated and zeroed pmap structure,
650 * such as one in a vmspace structure.
651 */
652int
653pmap_pinit(struct pmap *pmap)
654{
655	int i;
656
657	PMAP_LOCK_INIT(pmap);
658	for (i = 0; i < 5; i++)
659		pmap->pm_rid[i] = pmap_allocate_rid();
660	TAILQ_INIT(&pmap->pm_pvlist);
661	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
662	return (1);
663}
664
665/***************************************************
666 * Pmap allocation/deallocation routines.
667 ***************************************************/
668
669/*
670 * Release any resources held by the given physical map.
671 * Called when a pmap initialized by pmap_pinit is being released.
672 * Should only be called if the map contains no valid mappings.
673 */
674void
675pmap_release(pmap_t pmap)
676{
677	int i;
678
679	for (i = 0; i < 5; i++)
680		if (pmap->pm_rid[i])
681			pmap_free_rid(pmap->pm_rid[i]);
682	PMAP_LOCK_DESTROY(pmap);
683}
684
685/*
686 * grow the number of kernel page table entries, if needed
687 */
688void
689pmap_growkernel(vm_offset_t addr)
690{
691	struct ia64_lpte **dir1;
692	struct ia64_lpte *leaf;
693	vm_page_t nkpg;
694
695	while (kernel_vm_end <= addr) {
696		if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64)
697			panic("%s: out of kernel address space", __func__);
698
699		dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)];
700		if (dir1 == NULL) {
701			nkpg = vm_page_alloc(NULL, nkpt++,
702			    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
703			if (!nkpg)
704				panic("%s: cannot add dir. page", __func__);
705
706			dir1 = (struct ia64_lpte **)
707			    IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
708			bzero(dir1, PAGE_SIZE);
709			ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1;
710		}
711
712		nkpg = vm_page_alloc(NULL, nkpt++,
713		    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
714		if (!nkpg)
715			panic("%s: cannot add PTE page", __func__);
716
717		leaf = (struct ia64_lpte *)
718		    IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
719		bzero(leaf, PAGE_SIZE);
720		dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf;
721
722		kernel_vm_end += PAGE_SIZE * NKPTEPG;
723	}
724}
725
726/***************************************************
727 * page management routines.
728 ***************************************************/
729
730/*
731 * free the pv_entry back to the free list
732 */
733static PMAP_INLINE void
734free_pv_entry(pv_entry_t pv)
735{
736	pv_entry_count--;
737	uma_zfree(pvzone, pv);
738}
739
740/*
741 * get a new pv_entry, allocating a block from the system
742 * when needed.
743 */
744static pv_entry_t
745get_pv_entry(pmap_t locked_pmap)
746{
747	static const struct timeval printinterval = { 60, 0 };
748	static struct timeval lastprint;
749	struct vpgqueues *vpq;
750	struct ia64_lpte *pte;
751	pmap_t oldpmap, pmap;
752	pv_entry_t allocated_pv, next_pv, pv;
753	vm_offset_t va;
754	vm_page_t m;
755
756	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
757	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
758	allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
759	if (allocated_pv != NULL) {
760		pv_entry_count++;
761		if (pv_entry_count > pv_entry_high_water)
762			pagedaemon_wakeup();
763		else
764			return (allocated_pv);
765	}
766
767	/*
768	 * Reclaim pv entries: At first, destroy mappings to inactive
769	 * pages.  After that, if a pv entry is still needed, destroy
770	 * mappings to active pages.
771	 */
772	if (ratecheck(&lastprint, &printinterval))
773		printf("Approaching the limit on PV entries, "
774		    "increase the vm.pmap.shpgperproc tunable.\n");
775	vpq = &vm_page_queues[PQ_INACTIVE];
776retry:
777	TAILQ_FOREACH(m, &vpq->pl, pageq) {
778		if (m->hold_count || m->busy)
779			continue;
780		TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
781			va = pv->pv_va;
782			pmap = pv->pv_pmap;
783			/* Avoid deadlock and lock recursion. */
784			if (pmap > locked_pmap)
785				PMAP_LOCK(pmap);
786			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
787				continue;
788			pmap->pm_stats.resident_count--;
789			oldpmap = pmap_switch(pmap);
790			pte = pmap_find_vhpt(va);
791			KASSERT(pte != NULL, ("pte"));
792			pmap_remove_vhpt(va);
793			pmap_invalidate_page(va);
794			pmap_switch(oldpmap);
795			if (pmap_accessed(pte))
796				vm_page_flag_set(m, PG_REFERENCED);
797			if (pmap_dirty(pte))
798				vm_page_dirty(m);
799			pmap_free_pte(pte, va);
800			TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
801			m->md.pv_list_count--;
802			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
803			if (TAILQ_EMPTY(&m->md.pv_list))
804				vm_page_flag_clear(m, PG_WRITEABLE);
805			if (pmap != locked_pmap)
806				PMAP_UNLOCK(pmap);
807			if (allocated_pv == NULL)
808				allocated_pv = pv;
809			else
810				free_pv_entry(pv);
811		}
812	}
813	if (allocated_pv == NULL) {
814		if (vpq == &vm_page_queues[PQ_INACTIVE]) {
815			vpq = &vm_page_queues[PQ_ACTIVE];
816			goto retry;
817		}
818		panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
819	}
820	return (allocated_pv);
821}
822
823/*
824 * Conditionally create a pv entry.
825 */
826static boolean_t
827pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
828{
829	pv_entry_t pv;
830
831	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
832	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
833	if (pv_entry_count < pv_entry_high_water &&
834	    (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) {
835		pv_entry_count++;
836		pv->pv_va = va;
837		pv->pv_pmap = pmap;
838		TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
839		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
840		m->md.pv_list_count++;
841		return (TRUE);
842	} else
843		return (FALSE);
844}
845
846/*
847 * Add an ia64_lpte to the VHPT.
848 */
849static void
850pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
851{
852	struct ia64_bucket *bckt;
853	struct ia64_lpte *vhpte;
854	uint64_t pte_pa;
855
856	/* Can fault, so get it out of the way. */
857	pte_pa = ia64_tpa((vm_offset_t)pte);
858
859	vhpte = (struct ia64_lpte *)ia64_thash(va);
860	bckt = (struct ia64_bucket *)vhpte->chain;
861
862	mtx_lock_spin(&bckt->mutex);
863	pte->chain = bckt->chain;
864	ia64_mf();
865	bckt->chain = pte_pa;
866
867	pmap_vhpt_inserts++;
868	bckt->length++;
869	mtx_unlock_spin(&bckt->mutex);
870}
871
872/*
873 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
874 * worked or an appropriate error code otherwise.
875 */
876static int
877pmap_remove_vhpt(vm_offset_t va)
878{
879	struct ia64_bucket *bckt;
880	struct ia64_lpte *pte;
881	struct ia64_lpte *lpte;
882	struct ia64_lpte *vhpte;
883	uint64_t chain, tag;
884
885	tag = ia64_ttag(va);
886	vhpte = (struct ia64_lpte *)ia64_thash(va);
887	bckt = (struct ia64_bucket *)vhpte->chain;
888
889	lpte = NULL;
890	mtx_lock_spin(&bckt->mutex);
891	chain = bckt->chain;
892	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
893	while (chain != 0 && pte->tag != tag) {
894		lpte = pte;
895		chain = pte->chain;
896		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
897	}
898	if (chain == 0) {
899		mtx_unlock_spin(&bckt->mutex);
900		return (ENOENT);
901	}
902
903	/* Snip this pv_entry out of the collision chain. */
904	if (lpte == NULL)
905		bckt->chain = pte->chain;
906	else
907		lpte->chain = pte->chain;
908	ia64_mf();
909
910	bckt->length--;
911	mtx_unlock_spin(&bckt->mutex);
912	return (0);
913}
914
915/*
916 * Find the ia64_lpte for the given va, if any.
917 */
918static struct ia64_lpte *
919pmap_find_vhpt(vm_offset_t va)
920{
921	struct ia64_bucket *bckt;
922	struct ia64_lpte *pte;
923	uint64_t chain, tag;
924
925	tag = ia64_ttag(va);
926	pte = (struct ia64_lpte *)ia64_thash(va);
927	bckt = (struct ia64_bucket *)pte->chain;
928
929	mtx_lock_spin(&bckt->mutex);
930	chain = bckt->chain;
931	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
932	while (chain != 0 && pte->tag != tag) {
933		chain = pte->chain;
934		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
935	}
936	mtx_unlock_spin(&bckt->mutex);
937	return ((chain != 0) ? pte : NULL);
938}
939
940/*
941 * Remove an entry from the list of managed mappings.
942 */
943static int
944pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
945{
946	if (!pv) {
947		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
948			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
949				if (pmap == pv->pv_pmap && va == pv->pv_va)
950					break;
951			}
952		} else {
953			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
954				if (va == pv->pv_va)
955					break;
956			}
957		}
958	}
959
960	if (pv) {
961		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
962		m->md.pv_list_count--;
963		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
964			vm_page_flag_clear(m, PG_WRITEABLE);
965
966		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
967		free_pv_entry(pv);
968		return 0;
969	} else {
970		return ENOENT;
971	}
972}
973
974/*
975 * Create a pv entry for page at pa for
976 * (pmap, va).
977 */
978static void
979pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
980{
981	pv_entry_t pv;
982
983	pv = get_pv_entry(pmap);
984	pv->pv_pmap = pmap;
985	pv->pv_va = va;
986
987	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
988	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
989	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
990	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
991	m->md.pv_list_count++;
992}
993
994/*
995 *	Routine:	pmap_extract
996 *	Function:
997 *		Extract the physical page address associated
998 *		with the given map/virtual_address pair.
999 */
1000vm_paddr_t
1001pmap_extract(pmap_t pmap, vm_offset_t va)
1002{
1003	struct ia64_lpte *pte;
1004	pmap_t oldpmap;
1005	vm_paddr_t pa;
1006
1007	pa = 0;
1008	PMAP_LOCK(pmap);
1009	oldpmap = pmap_switch(pmap);
1010	pte = pmap_find_vhpt(va);
1011	if (pte != NULL && pmap_present(pte))
1012		pa = pmap_ppn(pte);
1013	pmap_switch(oldpmap);
1014	PMAP_UNLOCK(pmap);
1015	return (pa);
1016}
1017
1018/*
1019 *	Routine:	pmap_extract_and_hold
1020 *	Function:
1021 *		Atomically extract and hold the physical page
1022 *		with the given pmap and virtual address pair
1023 *		if that mapping permits the given protection.
1024 */
1025vm_page_t
1026pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1027{
1028	struct ia64_lpte *pte;
1029	pmap_t oldpmap;
1030	vm_page_t m;
1031	vm_paddr_t pa;
1032
1033	pa = 0;
1034	m = NULL;
1035	PMAP_LOCK(pmap);
1036	oldpmap = pmap_switch(pmap);
1037retry:
1038	pte = pmap_find_vhpt(va);
1039	if (pte != NULL && pmap_present(pte) &&
1040	    (pmap_prot(pte) & prot) == prot) {
1041		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1042		if (vm_page_pa_tryrelock(pmap, pmap_ppn(pte), &pa))
1043			goto retry;
1044		vm_page_hold(m);
1045	}
1046	PA_UNLOCK_COND(pa);
1047	pmap_switch(oldpmap);
1048	PMAP_UNLOCK(pmap);
1049	return (m);
1050}
1051
1052/***************************************************
1053 * Low level mapping routines.....
1054 ***************************************************/
1055
1056/*
1057 * Find the kernel lpte for mapping the given virtual address, which
1058 * must be in the part of region 5 which we can cover with our kernel
1059 * 'page tables'.
1060 */
1061static struct ia64_lpte *
1062pmap_find_kpte(vm_offset_t va)
1063{
1064	struct ia64_lpte **dir1;
1065	struct ia64_lpte *leaf;
1066
1067	KASSERT((va >> 61) == 5,
1068		("kernel mapping 0x%lx not in region 5", va));
1069	KASSERT(va < kernel_vm_end,
1070		("kernel mapping 0x%lx out of range", va));
1071
1072	dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)];
1073	leaf = dir1[KPTE_DIR1_INDEX(va)];
1074	return (&leaf[KPTE_PTE_INDEX(va)]);
1075}
1076
1077/*
1078 * Find a pte suitable for mapping a user-space address. If one exists
1079 * in the VHPT, that one will be returned, otherwise a new pte is
1080 * allocated.
1081 */
1082static struct ia64_lpte *
1083pmap_find_pte(vm_offset_t va)
1084{
1085	struct ia64_lpte *pte;
1086
1087	if (va >= VM_MAXUSER_ADDRESS)
1088		return pmap_find_kpte(va);
1089
1090	pte = pmap_find_vhpt(va);
1091	if (pte == NULL) {
1092		pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1093		pte->tag = 1UL << 63;
1094	}
1095	return (pte);
1096}
1097
1098/*
1099 * Free a pte which is now unused. This simply returns it to the zone
1100 * allocator if it is a user mapping. For kernel mappings, clear the
1101 * valid bit to make it clear that the mapping is not currently used.
1102 */
1103static void
1104pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1105{
1106	if (va < VM_MAXUSER_ADDRESS)
1107		uma_zfree(ptezone, pte);
1108	else
1109		pmap_clear_present(pte);
1110}
1111
1112static PMAP_INLINE void
1113pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1114{
1115	static long prot2ar[4] = {
1116		PTE_AR_R,		/* VM_PROT_NONE */
1117		PTE_AR_RW,		/* VM_PROT_WRITE */
1118		PTE_AR_RX|PTE_ED,	/* VM_PROT_EXECUTE */
1119		PTE_AR_RWX|PTE_ED	/* VM_PROT_WRITE|VM_PROT_EXECUTE */
1120	};
1121
1122	pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED);
1123	pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1124	pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1125	    ? PTE_PL_KERN : PTE_PL_USER;
1126	pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1127}
1128
1129/*
1130 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1131 * the pte was orginally valid, then its assumed to already be in the
1132 * VHPT.
1133 * This functions does not set the protection bits.  It's expected
1134 * that those have been set correctly prior to calling this function.
1135 */
1136static void
1137pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1138    boolean_t wired, boolean_t managed)
1139{
1140
1141	pte->pte &= PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED;
1142	pte->pte |= PTE_PRESENT | PTE_MA_WB;
1143	pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1144	pte->pte |= (wired) ? PTE_WIRED : 0;
1145	pte->pte |= pa & PTE_PPN_MASK;
1146
1147	pte->itir = PAGE_SHIFT << 2;
1148
1149	pte->tag = ia64_ttag(va);
1150}
1151
1152/*
1153 * Remove the (possibly managed) mapping represented by pte from the
1154 * given pmap.
1155 */
1156static int
1157pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1158		pv_entry_t pv, int freepte)
1159{
1160	int error;
1161	vm_page_t m;
1162
1163	/*
1164	 * First remove from the VHPT.
1165	 */
1166	error = pmap_remove_vhpt(va);
1167	if (error)
1168		return (error);
1169
1170	pmap_invalidate_page(va);
1171
1172	if (pmap_wired(pte))
1173		pmap->pm_stats.wired_count -= 1;
1174
1175	pmap->pm_stats.resident_count -= 1;
1176	if (pmap_managed(pte)) {
1177		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1178		if (pmap_dirty(pte))
1179			vm_page_dirty(m);
1180		if (pmap_accessed(pte))
1181			vm_page_flag_set(m, PG_REFERENCED);
1182
1183		error = pmap_remove_entry(pmap, m, va, pv);
1184	}
1185	if (freepte)
1186		pmap_free_pte(pte, va);
1187
1188	return (error);
1189}
1190
1191/*
1192 * Extract the physical page address associated with a kernel
1193 * virtual address.
1194 */
1195vm_paddr_t
1196pmap_kextract(vm_offset_t va)
1197{
1198	struct ia64_lpte *pte;
1199	vm_offset_t gwpage;
1200
1201	KASSERT(va >= IA64_RR_BASE(5), ("Must be kernel VA"));
1202
1203	/* Regions 6 and 7 are direct mapped. */
1204	if (va >= IA64_RR_BASE(6))
1205		return (IA64_RR_MASK(va));
1206
1207	/* EPC gateway page? */
1208	gwpage = (vm_offset_t)ia64_get_k5();
1209	if (va >= gwpage && va < gwpage + VM_GATEWAY_SIZE)
1210		return (IA64_RR_MASK((vm_offset_t)ia64_gateway_page));
1211
1212	/* Bail out if the virtual address is beyond our limits. */
1213	if (va >= kernel_vm_end)
1214		return (0);
1215
1216	pte = pmap_find_kpte(va);
1217	if (!pmap_present(pte))
1218		return (0);
1219	return (pmap_ppn(pte) | (va & PAGE_MASK));
1220}
1221
1222/*
1223 * Add a list of wired pages to the kva this routine is only used for
1224 * temporary kernel mappings that do not need to have page modification
1225 * or references recorded.  Note that old mappings are simply written
1226 * over.  The page is effectively wired, but it's customary to not have
1227 * the PTE reflect that, nor update statistics.
1228 */
1229void
1230pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1231{
1232	struct ia64_lpte *pte;
1233	int i;
1234
1235	for (i = 0; i < count; i++) {
1236		pte = pmap_find_kpte(va);
1237		if (pmap_present(pte))
1238			pmap_invalidate_page(va);
1239		else
1240			pmap_enter_vhpt(pte, va);
1241		pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1242		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1243		va += PAGE_SIZE;
1244	}
1245}
1246
1247/*
1248 * this routine jerks page mappings from the
1249 * kernel -- it is meant only for temporary mappings.
1250 */
1251void
1252pmap_qremove(vm_offset_t va, int count)
1253{
1254	struct ia64_lpte *pte;
1255	int i;
1256
1257	for (i = 0; i < count; i++) {
1258		pte = pmap_find_kpte(va);
1259		if (pmap_present(pte)) {
1260			pmap_remove_vhpt(va);
1261			pmap_invalidate_page(va);
1262			pmap_clear_present(pte);
1263		}
1264		va += PAGE_SIZE;
1265	}
1266}
1267
1268/*
1269 * Add a wired page to the kva.  As for pmap_qenter(), it's customary
1270 * to not have the PTE reflect that, nor update statistics.
1271 */
1272void
1273pmap_kenter(vm_offset_t va, vm_offset_t pa)
1274{
1275	struct ia64_lpte *pte;
1276
1277	pte = pmap_find_kpte(va);
1278	if (pmap_present(pte))
1279		pmap_invalidate_page(va);
1280	else
1281		pmap_enter_vhpt(pte, va);
1282	pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1283	pmap_set_pte(pte, va, pa, FALSE, FALSE);
1284}
1285
1286/*
1287 * Remove a page from the kva
1288 */
1289void
1290pmap_kremove(vm_offset_t va)
1291{
1292	struct ia64_lpte *pte;
1293
1294	pte = pmap_find_kpte(va);
1295	if (pmap_present(pte)) {
1296		pmap_remove_vhpt(va);
1297		pmap_invalidate_page(va);
1298		pmap_clear_present(pte);
1299	}
1300}
1301
1302/*
1303 *	Used to map a range of physical addresses into kernel
1304 *	virtual address space.
1305 *
1306 *	The value passed in '*virt' is a suggested virtual address for
1307 *	the mapping. Architectures which can support a direct-mapped
1308 *	physical to virtual region can return the appropriate address
1309 *	within that region, leaving '*virt' unchanged. Other
1310 *	architectures should map the pages starting at '*virt' and
1311 *	update '*virt' with the first usable address after the mapped
1312 *	region.
1313 */
1314vm_offset_t
1315pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1316{
1317	return IA64_PHYS_TO_RR7(start);
1318}
1319
1320/*
1321 *	Remove the given range of addresses from the specified map.
1322 *
1323 *	It is assumed that the start and end are properly
1324 *	rounded to the page size.
1325 */
1326void
1327pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1328{
1329	pmap_t oldpmap;
1330	vm_offset_t va;
1331	pv_entry_t npv, pv;
1332	struct ia64_lpte *pte;
1333
1334	if (pmap->pm_stats.resident_count == 0)
1335		return;
1336
1337	vm_page_lock_queues();
1338	PMAP_LOCK(pmap);
1339	oldpmap = pmap_switch(pmap);
1340
1341	/*
1342	 * special handling of removing one page.  a very
1343	 * common operation and easy to short circuit some
1344	 * code.
1345	 */
1346	if (sva + PAGE_SIZE == eva) {
1347		pte = pmap_find_vhpt(sva);
1348		if (pte != NULL)
1349			pmap_remove_pte(pmap, pte, sva, 0, 1);
1350		goto out;
1351	}
1352
1353	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1354		TAILQ_FOREACH_SAFE(pv, &pmap->pm_pvlist, pv_plist, npv) {
1355			va = pv->pv_va;
1356			if (va >= sva && va < eva) {
1357				pte = pmap_find_vhpt(va);
1358				KASSERT(pte != NULL, ("pte"));
1359				pmap_remove_pte(pmap, pte, va, pv, 1);
1360			}
1361		}
1362	} else {
1363		for (va = sva; va < eva; va += PAGE_SIZE) {
1364			pte = pmap_find_vhpt(va);
1365			if (pte != NULL)
1366				pmap_remove_pte(pmap, pte, va, 0, 1);
1367		}
1368	}
1369
1370out:
1371	vm_page_unlock_queues();
1372	pmap_switch(oldpmap);
1373	PMAP_UNLOCK(pmap);
1374}
1375
1376/*
1377 *	Routine:	pmap_remove_all
1378 *	Function:
1379 *		Removes this physical page from
1380 *		all physical maps in which it resides.
1381 *		Reflects back modify bits to the pager.
1382 *
1383 *	Notes:
1384 *		Original versions of this routine were very
1385 *		inefficient because they iteratively called
1386 *		pmap_remove (slow...)
1387 */
1388
1389void
1390pmap_remove_all(vm_page_t m)
1391{
1392	pmap_t oldpmap;
1393	pv_entry_t pv;
1394
1395	KASSERT((m->flags & PG_FICTITIOUS) == 0,
1396	    ("pmap_remove_all: page %p is fictitious", m));
1397	vm_page_lock_queues();
1398	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1399		struct ia64_lpte *pte;
1400		pmap_t pmap = pv->pv_pmap;
1401		vm_offset_t va = pv->pv_va;
1402
1403		PMAP_LOCK(pmap);
1404		oldpmap = pmap_switch(pmap);
1405		pte = pmap_find_vhpt(va);
1406		KASSERT(pte != NULL, ("pte"));
1407		if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1408			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1409		pmap_remove_pte(pmap, pte, va, pv, 1);
1410		pmap_switch(oldpmap);
1411		PMAP_UNLOCK(pmap);
1412	}
1413	vm_page_flag_clear(m, PG_WRITEABLE);
1414	vm_page_unlock_queues();
1415}
1416
1417/*
1418 *	Set the physical protection on the
1419 *	specified range of this map as requested.
1420 */
1421void
1422pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1423{
1424	pmap_t oldpmap;
1425	struct ia64_lpte *pte;
1426
1427	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1428		pmap_remove(pmap, sva, eva);
1429		return;
1430	}
1431
1432	if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
1433	    (VM_PROT_WRITE|VM_PROT_EXECUTE))
1434		return;
1435
1436	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1437		panic("pmap_protect: unaligned addresses");
1438
1439	vm_page_lock_queues();
1440	PMAP_LOCK(pmap);
1441	oldpmap = pmap_switch(pmap);
1442	for ( ; sva < eva; sva += PAGE_SIZE) {
1443		/* If page is invalid, skip this page */
1444		pte = pmap_find_vhpt(sva);
1445		if (pte == NULL)
1446			continue;
1447
1448		/* If there's no change, skip it too */
1449		if (pmap_prot(pte) == prot)
1450			continue;
1451
1452		if ((prot & VM_PROT_WRITE) == 0 &&
1453		    pmap_managed(pte) && pmap_dirty(pte)) {
1454			vm_paddr_t pa = pmap_ppn(pte);
1455			vm_page_t m = PHYS_TO_VM_PAGE(pa);
1456
1457			vm_page_dirty(m);
1458			pmap_clear_dirty(pte);
1459		}
1460
1461		if (prot & VM_PROT_EXECUTE)
1462			ia64_sync_icache(sva, PAGE_SIZE);
1463
1464		pmap_pte_prot(pmap, pte, prot);
1465		pmap_invalidate_page(sva);
1466	}
1467	vm_page_unlock_queues();
1468	pmap_switch(oldpmap);
1469	PMAP_UNLOCK(pmap);
1470}
1471
1472/*
1473 *	Insert the given physical page (p) at
1474 *	the specified virtual address (v) in the
1475 *	target physical map with the protection requested.
1476 *
1477 *	If specified, the page will be wired down, meaning
1478 *	that the related pte can not be reclaimed.
1479 *
1480 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1481 *	or lose information.  That is, this routine must actually
1482 *	insert this page into the given map NOW.
1483 */
1484void
1485pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
1486    vm_prot_t prot, boolean_t wired)
1487{
1488	pmap_t oldpmap;
1489	vm_offset_t pa;
1490	vm_offset_t opa;
1491	struct ia64_lpte origpte;
1492	struct ia64_lpte *pte;
1493	boolean_t icache_inval, managed;
1494
1495	vm_page_lock_queues();
1496	PMAP_LOCK(pmap);
1497	oldpmap = pmap_switch(pmap);
1498
1499	va &= ~PAGE_MASK;
1500 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
1501	KASSERT((m->oflags & VPO_BUSY) != 0,
1502	    ("pmap_enter: page %p is not busy", m));
1503
1504	/*
1505	 * Find (or create) a pte for the given mapping.
1506	 */
1507	while ((pte = pmap_find_pte(va)) == NULL) {
1508		pmap_switch(oldpmap);
1509		PMAP_UNLOCK(pmap);
1510		vm_page_unlock_queues();
1511		VM_WAIT;
1512		vm_page_lock_queues();
1513		PMAP_LOCK(pmap);
1514		oldpmap = pmap_switch(pmap);
1515	}
1516	origpte = *pte;
1517	if (!pmap_present(pte)) {
1518		opa = ~0UL;
1519		pmap_enter_vhpt(pte, va);
1520	} else
1521		opa = pmap_ppn(pte);
1522	managed = FALSE;
1523	pa = VM_PAGE_TO_PHYS(m);
1524
1525	icache_inval = (prot & VM_PROT_EXECUTE) ? TRUE : FALSE;
1526
1527	/*
1528	 * Mapping has not changed, must be protection or wiring change.
1529	 */
1530	if (opa == pa) {
1531		/*
1532		 * Wiring change, just update stats. We don't worry about
1533		 * wiring PT pages as they remain resident as long as there
1534		 * are valid mappings in them. Hence, if a user page is wired,
1535		 * the PT page will be also.
1536		 */
1537		if (wired && !pmap_wired(&origpte))
1538			pmap->pm_stats.wired_count++;
1539		else if (!wired && pmap_wired(&origpte))
1540			pmap->pm_stats.wired_count--;
1541
1542		managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1543
1544		/*
1545		 * We might be turning off write access to the page,
1546		 * so we go ahead and sense modify status. Otherwise,
1547		 * we can avoid I-cache invalidation if the page
1548		 * already allowed execution.
1549		 */
1550		if (managed && pmap_dirty(&origpte))
1551			vm_page_dirty(m);
1552		else if (pmap_exec(&origpte))
1553			icache_inval = FALSE;
1554
1555		pmap_invalidate_page(va);
1556		goto validate;
1557	}
1558
1559	/*
1560	 * Mapping has changed, invalidate old range and fall
1561	 * through to handle validating new mapping.
1562	 */
1563	if (opa != ~0UL) {
1564		pmap_remove_pte(pmap, pte, va, 0, 0);
1565		pmap_enter_vhpt(pte, va);
1566	}
1567
1568	/*
1569	 * Enter on the PV list if part of our managed memory.
1570	 */
1571	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1572		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1573		    ("pmap_enter: managed mapping within the clean submap"));
1574		pmap_insert_entry(pmap, va, m);
1575		managed = TRUE;
1576	}
1577
1578	/*
1579	 * Increment counters
1580	 */
1581	pmap->pm_stats.resident_count++;
1582	if (wired)
1583		pmap->pm_stats.wired_count++;
1584
1585validate:
1586
1587	/*
1588	 * Now validate mapping with desired protection/wiring. This
1589	 * adds the pte to the VHPT if necessary.
1590	 */
1591	pmap_pte_prot(pmap, pte, prot);
1592	pmap_set_pte(pte, va, pa, wired, managed);
1593
1594	/* Invalidate the I-cache when needed. */
1595	if (icache_inval)
1596		ia64_sync_icache(va, PAGE_SIZE);
1597
1598	if ((prot & VM_PROT_WRITE) != 0)
1599		vm_page_flag_set(m, PG_WRITEABLE);
1600	vm_page_unlock_queues();
1601	pmap_switch(oldpmap);
1602	PMAP_UNLOCK(pmap);
1603}
1604
1605/*
1606 * Maps a sequence of resident pages belonging to the same object.
1607 * The sequence begins with the given page m_start.  This page is
1608 * mapped at the given virtual address start.  Each subsequent page is
1609 * mapped at a virtual address that is offset from start by the same
1610 * amount as the page is offset from m_start within the object.  The
1611 * last page in the sequence is the page with the largest offset from
1612 * m_start that can be mapped at a virtual address less than the given
1613 * virtual address end.  Not every virtual page between start and end
1614 * is mapped; only those for which a resident page exists with the
1615 * corresponding offset from m_start are mapped.
1616 */
1617void
1618pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1619    vm_page_t m_start, vm_prot_t prot)
1620{
1621	pmap_t oldpmap;
1622	vm_page_t m;
1623	vm_pindex_t diff, psize;
1624
1625	VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
1626	psize = atop(end - start);
1627	m = m_start;
1628	PMAP_LOCK(pmap);
1629	oldpmap = pmap_switch(pmap);
1630	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1631		pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1632		m = TAILQ_NEXT(m, listq);
1633	}
1634	pmap_switch(oldpmap);
1635 	PMAP_UNLOCK(pmap);
1636}
1637
1638/*
1639 * this code makes some *MAJOR* assumptions:
1640 * 1. Current pmap & pmap exists.
1641 * 2. Not wired.
1642 * 3. Read access.
1643 * 4. No page table pages.
1644 * but is *MUCH* faster than pmap_enter...
1645 */
1646
1647void
1648pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1649{
1650	pmap_t oldpmap;
1651
1652	vm_page_lock_queues();
1653	PMAP_LOCK(pmap);
1654	oldpmap = pmap_switch(pmap);
1655	pmap_enter_quick_locked(pmap, va, m, prot);
1656	vm_page_unlock_queues();
1657	pmap_switch(oldpmap);
1658	PMAP_UNLOCK(pmap);
1659}
1660
1661static void
1662pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1663    vm_prot_t prot)
1664{
1665	struct ia64_lpte *pte;
1666	boolean_t managed;
1667
1668	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1669	    (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
1670	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1671	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1672	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1673
1674	if ((pte = pmap_find_pte(va)) == NULL)
1675		return;
1676
1677	if (!pmap_present(pte)) {
1678		/* Enter on the PV list if the page is managed. */
1679		if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1680			if (!pmap_try_insert_pv_entry(pmap, va, m)) {
1681				pmap_free_pte(pte, va);
1682				return;
1683			}
1684			managed = TRUE;
1685		} else
1686			managed = FALSE;
1687
1688		/* Increment counters. */
1689		pmap->pm_stats.resident_count++;
1690
1691		/* Initialise with R/O protection and enter into VHPT. */
1692		pmap_enter_vhpt(pte, va);
1693		pmap_pte_prot(pmap, pte,
1694		    prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1695		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1696
1697		if (prot & VM_PROT_EXECUTE)
1698			ia64_sync_icache(va, PAGE_SIZE);
1699	}
1700}
1701
1702/*
1703 * pmap_object_init_pt preloads the ptes for a given object
1704 * into the specified pmap.  This eliminates the blast of soft
1705 * faults on process startup and immediately after an mmap.
1706 */
1707void
1708pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1709		    vm_object_t object, vm_pindex_t pindex,
1710		    vm_size_t size)
1711{
1712
1713	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1714	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1715	    ("pmap_object_init_pt: non-device object"));
1716}
1717
1718/*
1719 *	Routine:	pmap_change_wiring
1720 *	Function:	Change the wiring attribute for a map/virtual-address
1721 *			pair.
1722 *	In/out conditions:
1723 *			The mapping must already exist in the pmap.
1724 */
1725void
1726pmap_change_wiring(pmap, va, wired)
1727	register pmap_t pmap;
1728	vm_offset_t va;
1729	boolean_t wired;
1730{
1731	pmap_t oldpmap;
1732	struct ia64_lpte *pte;
1733
1734	PMAP_LOCK(pmap);
1735	oldpmap = pmap_switch(pmap);
1736
1737	pte = pmap_find_vhpt(va);
1738	KASSERT(pte != NULL, ("pte"));
1739	if (wired && !pmap_wired(pte)) {
1740		pmap->pm_stats.wired_count++;
1741		pmap_set_wired(pte);
1742	} else if (!wired && pmap_wired(pte)) {
1743		pmap->pm_stats.wired_count--;
1744		pmap_clear_wired(pte);
1745	}
1746
1747	pmap_switch(oldpmap);
1748	PMAP_UNLOCK(pmap);
1749}
1750
1751
1752
1753/*
1754 *	Copy the range specified by src_addr/len
1755 *	from the source map to the range dst_addr/len
1756 *	in the destination map.
1757 *
1758 *	This routine is only advisory and need not do anything.
1759 */
1760
1761void
1762pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
1763	  vm_offset_t src_addr)
1764{
1765}
1766
1767
1768/*
1769 *	pmap_zero_page zeros the specified hardware page by
1770 *	mapping it into virtual memory and using bzero to clear
1771 *	its contents.
1772 */
1773
1774void
1775pmap_zero_page(vm_page_t m)
1776{
1777	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1778	bzero((caddr_t) va, PAGE_SIZE);
1779}
1780
1781
1782/*
1783 *	pmap_zero_page_area zeros the specified hardware page by
1784 *	mapping it into virtual memory and using bzero to clear
1785 *	its contents.
1786 *
1787 *	off and size must reside within a single page.
1788 */
1789
1790void
1791pmap_zero_page_area(vm_page_t m, int off, int size)
1792{
1793	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1794	bzero((char *)(caddr_t)va + off, size);
1795}
1796
1797
1798/*
1799 *	pmap_zero_page_idle zeros the specified hardware page by
1800 *	mapping it into virtual memory and using bzero to clear
1801 *	its contents.  This is for the vm_idlezero process.
1802 */
1803
1804void
1805pmap_zero_page_idle(vm_page_t m)
1806{
1807	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1808	bzero((caddr_t) va, PAGE_SIZE);
1809}
1810
1811
1812/*
1813 *	pmap_copy_page copies the specified (machine independent)
1814 *	page by mapping the page into virtual memory and using
1815 *	bcopy to copy the page, one machine dependent page at a
1816 *	time.
1817 */
1818void
1819pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1820{
1821	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
1822	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
1823	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
1824}
1825
1826/*
1827 * Returns true if the pmap's pv is one of the first
1828 * 16 pvs linked to from this page.  This count may
1829 * be changed upwards or downwards in the future; it
1830 * is only necessary that true be returned for a small
1831 * subset of pmaps for proper page aging.
1832 */
1833boolean_t
1834pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
1835{
1836	pv_entry_t pv;
1837	int loops = 0;
1838
1839	if (m->flags & PG_FICTITIOUS)
1840		return FALSE;
1841
1842	/*
1843	 * Not found, check current mappings returning immediately if found.
1844	 */
1845	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1846	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1847		if (pv->pv_pmap == pmap) {
1848			return TRUE;
1849		}
1850		loops++;
1851		if (loops >= 16)
1852			break;
1853	}
1854	return (FALSE);
1855}
1856
1857/*
1858 *	pmap_page_wired_mappings:
1859 *
1860 *	Return the number of managed mappings to the given physical page
1861 *	that are wired.
1862 */
1863int
1864pmap_page_wired_mappings(vm_page_t m)
1865{
1866	struct ia64_lpte *pte;
1867	pmap_t oldpmap, pmap;
1868	pv_entry_t pv;
1869	int count;
1870
1871	count = 0;
1872	if ((m->flags & PG_FICTITIOUS) != 0)
1873		return (count);
1874	vm_page_lock_queues();
1875	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1876		pmap = pv->pv_pmap;
1877		PMAP_LOCK(pmap);
1878		oldpmap = pmap_switch(pmap);
1879		pte = pmap_find_vhpt(pv->pv_va);
1880		KASSERT(pte != NULL, ("pte"));
1881		if (pmap_wired(pte))
1882			count++;
1883		pmap_switch(oldpmap);
1884		PMAP_UNLOCK(pmap);
1885	}
1886	vm_page_unlock_queues();
1887	return (count);
1888}
1889
1890/*
1891 * Remove all pages from specified address space
1892 * this aids process exit speeds.  Also, this code
1893 * is special cased for current process only, but
1894 * can have the more generic (and slightly slower)
1895 * mode enabled.  This is much faster than pmap_remove
1896 * in the case of running down an entire address space.
1897 */
1898void
1899pmap_remove_pages(pmap_t pmap)
1900{
1901	pmap_t oldpmap;
1902	pv_entry_t pv, npv;
1903
1904	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
1905		printf("warning: %s called with non-current pmap\n",
1906		    __func__);
1907		return;
1908	}
1909
1910	vm_page_lock_queues();
1911	PMAP_LOCK(pmap);
1912	oldpmap = pmap_switch(pmap);
1913
1914	for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
1915		struct ia64_lpte *pte;
1916
1917		npv = TAILQ_NEXT(pv, pv_plist);
1918
1919		pte = pmap_find_vhpt(pv->pv_va);
1920		KASSERT(pte != NULL, ("pte"));
1921		if (!pmap_wired(pte))
1922			pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
1923	}
1924
1925	pmap_switch(oldpmap);
1926	PMAP_UNLOCK(pmap);
1927	vm_page_unlock_queues();
1928}
1929
1930/*
1931 *	pmap_ts_referenced:
1932 *
1933 *	Return a count of reference bits for a page, clearing those bits.
1934 *	It is not necessary for every reference bit to be cleared, but it
1935 *	is necessary that 0 only be returned when there are truly no
1936 *	reference bits set.
1937 *
1938 *	XXX: The exact number of bits to check and clear is a matter that
1939 *	should be tested and standardized at some point in the future for
1940 *	optimal aging of shared pages.
1941 */
1942int
1943pmap_ts_referenced(vm_page_t m)
1944{
1945	struct ia64_lpte *pte;
1946	pmap_t oldpmap;
1947	pv_entry_t pv;
1948	int count = 0;
1949
1950	if (m->flags & PG_FICTITIOUS)
1951		return 0;
1952
1953	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1954		PMAP_LOCK(pv->pv_pmap);
1955		oldpmap = pmap_switch(pv->pv_pmap);
1956		pte = pmap_find_vhpt(pv->pv_va);
1957		KASSERT(pte != NULL, ("pte"));
1958		if (pmap_accessed(pte)) {
1959			count++;
1960			pmap_clear_accessed(pte);
1961			pmap_invalidate_page(pv->pv_va);
1962		}
1963		pmap_switch(oldpmap);
1964		PMAP_UNLOCK(pv->pv_pmap);
1965	}
1966
1967	return count;
1968}
1969
1970/*
1971 *	pmap_is_modified:
1972 *
1973 *	Return whether or not the specified physical page was modified
1974 *	in any physical maps.
1975 */
1976boolean_t
1977pmap_is_modified(vm_page_t m)
1978{
1979	struct ia64_lpte *pte;
1980	pmap_t oldpmap;
1981	pv_entry_t pv;
1982	boolean_t rv;
1983
1984	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
1985	    ("pmap_is_modified: page %p is not managed", m));
1986	rv = FALSE;
1987
1988	/*
1989	 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be
1990	 * concurrently set while the object is locked.  Thus, if PG_WRITEABLE
1991	 * is clear, no PTEs can be dirty.
1992	 */
1993	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
1994	if ((m->oflags & VPO_BUSY) == 0 &&
1995	    (m->flags & PG_WRITEABLE) == 0)
1996		return (rv);
1997	vm_page_lock_queues();
1998	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1999		PMAP_LOCK(pv->pv_pmap);
2000		oldpmap = pmap_switch(pv->pv_pmap);
2001		pte = pmap_find_vhpt(pv->pv_va);
2002		pmap_switch(oldpmap);
2003		KASSERT(pte != NULL, ("pte"));
2004		rv = pmap_dirty(pte) ? TRUE : FALSE;
2005		PMAP_UNLOCK(pv->pv_pmap);
2006		if (rv)
2007			break;
2008	}
2009	vm_page_unlock_queues();
2010	return (rv);
2011}
2012
2013/*
2014 *	pmap_is_prefaultable:
2015 *
2016 *	Return whether or not the specified virtual address is elgible
2017 *	for prefault.
2018 */
2019boolean_t
2020pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2021{
2022	struct ia64_lpte *pte;
2023
2024	pte = pmap_find_vhpt(addr);
2025	if (pte != NULL && pmap_present(pte))
2026		return (FALSE);
2027	return (TRUE);
2028}
2029
2030/*
2031 *	pmap_is_referenced:
2032 *
2033 *	Return whether or not the specified physical page was referenced
2034 *	in any physical maps.
2035 */
2036boolean_t
2037pmap_is_referenced(vm_page_t m)
2038{
2039	struct ia64_lpte *pte;
2040	pmap_t oldpmap;
2041	pv_entry_t pv;
2042	boolean_t rv;
2043
2044	rv = FALSE;
2045	if (m->flags & PG_FICTITIOUS)
2046		return (rv);
2047	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2048		PMAP_LOCK(pv->pv_pmap);
2049		oldpmap = pmap_switch(pv->pv_pmap);
2050		pte = pmap_find_vhpt(pv->pv_va);
2051		pmap_switch(oldpmap);
2052		KASSERT(pte != NULL, ("pte"));
2053		rv = pmap_accessed(pte) ? TRUE : FALSE;
2054		PMAP_UNLOCK(pv->pv_pmap);
2055		if (rv)
2056			break;
2057	}
2058	return (rv);
2059}
2060
2061/*
2062 *	Clear the modify bits on the specified physical page.
2063 */
2064void
2065pmap_clear_modify(vm_page_t m)
2066{
2067	struct ia64_lpte *pte;
2068	pmap_t oldpmap;
2069	pv_entry_t pv;
2070
2071	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2072	    ("pmap_clear_modify: page %p is not managed", m));
2073	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2074	KASSERT((m->oflags & VPO_BUSY) == 0,
2075	    ("pmap_clear_modify: page %p is busy", m));
2076
2077	/*
2078	 * If the page is not PG_WRITEABLE, then no PTEs can be modified.
2079	 * If the object containing the page is locked and the page is not
2080	 * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set.
2081	 */
2082	if ((m->flags & PG_WRITEABLE) == 0)
2083		return;
2084	vm_page_lock_queues();
2085	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2086		PMAP_LOCK(pv->pv_pmap);
2087		oldpmap = pmap_switch(pv->pv_pmap);
2088		pte = pmap_find_vhpt(pv->pv_va);
2089		KASSERT(pte != NULL, ("pte"));
2090		if (pmap_dirty(pte)) {
2091			pmap_clear_dirty(pte);
2092			pmap_invalidate_page(pv->pv_va);
2093		}
2094		pmap_switch(oldpmap);
2095		PMAP_UNLOCK(pv->pv_pmap);
2096	}
2097	vm_page_unlock_queues();
2098}
2099
2100/*
2101 *	pmap_clear_reference:
2102 *
2103 *	Clear the reference bit on the specified physical page.
2104 */
2105void
2106pmap_clear_reference(vm_page_t m)
2107{
2108	struct ia64_lpte *pte;
2109	pmap_t oldpmap;
2110	pv_entry_t pv;
2111
2112	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2113	    ("pmap_clear_reference: page %p is not managed", m));
2114	vm_page_lock_queues();
2115	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2116		PMAP_LOCK(pv->pv_pmap);
2117		oldpmap = pmap_switch(pv->pv_pmap);
2118		pte = pmap_find_vhpt(pv->pv_va);
2119		KASSERT(pte != NULL, ("pte"));
2120		if (pmap_accessed(pte)) {
2121			pmap_clear_accessed(pte);
2122			pmap_invalidate_page(pv->pv_va);
2123		}
2124		pmap_switch(oldpmap);
2125		PMAP_UNLOCK(pv->pv_pmap);
2126	}
2127	vm_page_unlock_queues();
2128}
2129
2130/*
2131 * Clear the write and modified bits in each of the given page's mappings.
2132 */
2133void
2134pmap_remove_write(vm_page_t m)
2135{
2136	struct ia64_lpte *pte;
2137	pmap_t oldpmap, pmap;
2138	pv_entry_t pv;
2139	vm_prot_t prot;
2140
2141	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2142	    ("pmap_remove_write: page %p is not managed", m));
2143
2144	/*
2145	 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by
2146	 * another thread while the object is locked.  Thus, if PG_WRITEABLE
2147	 * is clear, no page table entries need updating.
2148	 */
2149	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2150	if ((m->oflags & VPO_BUSY) == 0 &&
2151	    (m->flags & PG_WRITEABLE) == 0)
2152		return;
2153	vm_page_lock_queues();
2154	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2155		pmap = pv->pv_pmap;
2156		PMAP_LOCK(pmap);
2157		oldpmap = pmap_switch(pmap);
2158		pte = pmap_find_vhpt(pv->pv_va);
2159		KASSERT(pte != NULL, ("pte"));
2160		prot = pmap_prot(pte);
2161		if ((prot & VM_PROT_WRITE) != 0) {
2162			if (pmap_dirty(pte)) {
2163				vm_page_dirty(m);
2164				pmap_clear_dirty(pte);
2165			}
2166			prot &= ~VM_PROT_WRITE;
2167			pmap_pte_prot(pmap, pte, prot);
2168			pmap_invalidate_page(pv->pv_va);
2169		}
2170		pmap_switch(oldpmap);
2171		PMAP_UNLOCK(pmap);
2172	}
2173	vm_page_flag_clear(m, PG_WRITEABLE);
2174	vm_page_unlock_queues();
2175}
2176
2177/*
2178 * Map a set of physical memory pages into the kernel virtual
2179 * address space. Return a pointer to where it is mapped. This
2180 * routine is intended to be used for mapping device memory,
2181 * NOT real memory.
2182 */
2183void *
2184pmap_mapdev(vm_paddr_t pa, vm_size_t size)
2185{
2186	vm_offset_t va;
2187
2188	va = pa | IA64_RR_BASE(6);
2189	return ((void *)va);
2190}
2191
2192/*
2193 * 'Unmap' a range mapped by pmap_mapdev().
2194 */
2195void
2196pmap_unmapdev(vm_offset_t va, vm_size_t size)
2197{
2198}
2199
2200/*
2201 * perform the pmap work for mincore
2202 */
2203int
2204pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
2205{
2206	pmap_t oldpmap;
2207	struct ia64_lpte *pte, tpte;
2208	vm_paddr_t pa;
2209	int val;
2210
2211	PMAP_LOCK(pmap);
2212retry:
2213	oldpmap = pmap_switch(pmap);
2214	pte = pmap_find_vhpt(addr);
2215	if (pte != NULL) {
2216		tpte = *pte;
2217		pte = &tpte;
2218	}
2219	pmap_switch(oldpmap);
2220	if (pte == NULL || !pmap_present(pte)) {
2221		val = 0;
2222		goto out;
2223	}
2224	val = MINCORE_INCORE;
2225	if (pmap_dirty(pte))
2226		val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
2227	if (pmap_accessed(pte))
2228		val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
2229	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
2230	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
2231	    pmap_managed(pte)) {
2232		pa = pmap_ppn(pte);
2233		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
2234		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
2235			goto retry;
2236	} else
2237out:
2238		PA_UNLOCK_COND(*locked_pa);
2239	PMAP_UNLOCK(pmap);
2240	return (val);
2241}
2242
2243void
2244pmap_activate(struct thread *td)
2245{
2246	pmap_switch(vmspace_pmap(td->td_proc->p_vmspace));
2247}
2248
2249pmap_t
2250pmap_switch(pmap_t pm)
2251{
2252	pmap_t prevpm;
2253	int i;
2254
2255	critical_enter();
2256	prevpm = PCPU_GET(md.current_pmap);
2257	if (prevpm == pm)
2258		goto out;
2259	if (pm == NULL) {
2260		for (i = 0; i < 5; i++) {
2261			ia64_set_rr(IA64_RR_BASE(i),
2262			    (i << 8)|(PAGE_SHIFT << 2)|1);
2263		}
2264	} else {
2265		for (i = 0; i < 5; i++) {
2266			ia64_set_rr(IA64_RR_BASE(i),
2267			    (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2268		}
2269	}
2270	PCPU_SET(md.current_pmap, pm);
2271	ia64_srlz_d();
2272
2273out:
2274	critical_exit();
2275	return (prevpm);
2276}
2277
2278void
2279pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2280{
2281	pmap_t oldpm;
2282	struct ia64_lpte *pte;
2283	vm_offset_t lim;
2284	vm_size_t len;
2285
2286	sz += va & 31;
2287	va &= ~31;
2288	sz = (sz + 31) & ~31;
2289
2290	PMAP_LOCK(pm);
2291	oldpm = pmap_switch(pm);
2292	while (sz > 0) {
2293		lim = round_page(va);
2294		len = MIN(lim - va, sz);
2295		pte = pmap_find_vhpt(va);
2296		if (pte != NULL && pmap_present(pte))
2297			ia64_sync_icache(va, len);
2298		va += len;
2299		sz -= len;
2300	}
2301	pmap_switch(oldpm);
2302	PMAP_UNLOCK(pm);
2303}
2304
2305/*
2306 *	Increase the starting virtual address of the given mapping if a
2307 *	different alignment might result in more superpage mappings.
2308 */
2309void
2310pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2311    vm_offset_t *addr, vm_size_t size)
2312{
2313}
2314
2315#include "opt_ddb.h"
2316
2317#ifdef DDB
2318
2319#include <ddb/ddb.h>
2320
2321static const char*	psnames[] = {
2322	"1B",	"2B",	"4B",	"8B",
2323	"16B",	"32B",	"64B",	"128B",
2324	"256B",	"512B",	"1K",	"2K",
2325	"4K",	"8K",	"16K",	"32K",
2326	"64K",	"128K",	"256K",	"512K",
2327	"1M",	"2M",	"4M",	"8M",
2328	"16M",	"32M",	"64M",	"128M",
2329	"256M",	"512M",	"1G",	"2G"
2330};
2331
2332static void
2333print_trs(int type)
2334{
2335	struct ia64_pal_result res;
2336	int i, maxtr;
2337	struct {
2338		pt_entry_t	pte;
2339		uint64_t	itir;
2340		uint64_t	ifa;
2341		struct ia64_rr	rr;
2342	} buf;
2343	static const char *manames[] = {
2344		"WB",	"bad",	"bad",	"bad",
2345		"UC",	"UCE",	"WC",	"NaT",
2346	};
2347
2348	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2349	if (res.pal_status != 0) {
2350		db_printf("Can't get VM summary\n");
2351		return;
2352	}
2353
2354	if (type == 0)
2355		maxtr = (res.pal_result[0] >> 40) & 0xff;
2356	else
2357		maxtr = (res.pal_result[0] >> 32) & 0xff;
2358
2359	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2360	for (i = 0; i <= maxtr; i++) {
2361		bzero(&buf, sizeof(buf));
2362		res = ia64_call_pal_stacked_physical
2363			(PAL_VM_TR_READ, i, type, ia64_tpa((uint64_t) &buf));
2364		if (!(res.pal_result[0] & 1))
2365			buf.pte &= ~PTE_AR_MASK;
2366		if (!(res.pal_result[0] & 2))
2367			buf.pte &= ~PTE_PL_MASK;
2368		if (!(res.pal_result[0] & 4))
2369			pmap_clear_dirty(&buf);
2370		if (!(res.pal_result[0] & 8))
2371			buf.pte &= ~PTE_MA_MASK;
2372		db_printf("%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s "
2373		    "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2374		    buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2375		    psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2376		    (buf.pte & PTE_ED) ? 1 : 0,
2377		    (int)(buf.pte & PTE_AR_MASK) >> 9,
2378		    (int)(buf.pte & PTE_PL_MASK) >> 7,
2379		    (pmap_dirty(&buf)) ? 1 : 0,
2380		    (pmap_accessed(&buf)) ? 1 : 0,
2381		    manames[(buf.pte & PTE_MA_MASK) >> 2],
2382		    (pmap_present(&buf)) ? 1 : 0,
2383		    (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2384	}
2385}
2386
2387DB_COMMAND(itr, db_itr)
2388{
2389	print_trs(0);
2390}
2391
2392DB_COMMAND(dtr, db_dtr)
2393{
2394	print_trs(1);
2395}
2396
2397DB_COMMAND(rr, db_rr)
2398{
2399	int i;
2400	uint64_t t;
2401	struct ia64_rr rr;
2402
2403	printf("RR RID    PgSz VE\n");
2404	for (i = 0; i < 8; i++) {
2405		__asm __volatile ("mov %0=rr[%1]"
2406				  : "=r"(t)
2407				  : "r"(IA64_RR_BASE(i)));
2408		*(uint64_t *) &rr = t;
2409		printf("%d  %06x %4s %d\n",
2410		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2411	}
2412}
2413
2414DB_COMMAND(thash, db_thash)
2415{
2416	if (!have_addr)
2417		return;
2418
2419	db_printf("%p\n", (void *) ia64_thash(addr));
2420}
2421
2422DB_COMMAND(ttag, db_ttag)
2423{
2424	if (!have_addr)
2425		return;
2426
2427	db_printf("0x%lx\n", ia64_ttag(addr));
2428}
2429
2430DB_COMMAND(kpte, db_kpte)
2431{
2432	struct ia64_lpte *pte;
2433
2434	if (!have_addr) {
2435		db_printf("usage: kpte <kva>\n");
2436		return;
2437	}
2438	if (addr < VM_MIN_KERNEL_ADDRESS) {
2439		db_printf("kpte: error: invalid <kva>\n");
2440		return;
2441	}
2442	pte = pmap_find_kpte(addr);
2443	db_printf("kpte at %p:\n", pte);
2444	db_printf("  pte  =%016lx\n", pte->pte);
2445	db_printf("  itir =%016lx\n", pte->itir);
2446	db_printf("  tag  =%016lx\n", pte->tag);
2447	db_printf("  chain=%016lx\n", pte->chain);
2448}
2449
2450#endif
2451