pmap.c revision 223732
1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 */
47
48#include <sys/cdefs.h>
49__FBSDID("$FreeBSD: head/sys/ia64/ia64/pmap.c 223732 2011-07-02 23:42:04Z alc $");
50
51#include <sys/param.h>
52#include <sys/kernel.h>
53#include <sys/lock.h>
54#include <sys/mman.h>
55#include <sys/mutex.h>
56#include <sys/proc.h>
57#include <sys/smp.h>
58#include <sys/sysctl.h>
59#include <sys/systm.h>
60
61#include <vm/vm.h>
62#include <vm/vm_page.h>
63#include <vm/vm_map.h>
64#include <vm/vm_object.h>
65#include <vm/vm_pageout.h>
66#include <vm/uma.h>
67
68#include <machine/bootinfo.h>
69#include <machine/md_var.h>
70#include <machine/pal.h>
71
72/*
73 *	Manages physical address maps.
74 *
75 *	In addition to hardware address maps, this
76 *	module is called upon to provide software-use-only
77 *	maps which may or may not be stored in the same
78 *	form as hardware maps.  These pseudo-maps are
79 *	used to store intermediate results from copy
80 *	operations to and from address spaces.
81 *
82 *	Since the information managed by this module is
83 *	also stored by the logical address mapping module,
84 *	this module may throw away valid virtual-to-physical
85 *	mappings at almost any time.  However, invalidations
86 *	of virtual-to-physical mappings must be done as
87 *	requested.
88 *
89 *	In order to cope with hardware architectures which
90 *	make virtual-to-physical map invalidates expensive,
91 *	this module may delay invalidate or reduced protection
92 *	operations until such time as they are actually
93 *	necessary.  This module is given full information as
94 *	to which processors are currently using which maps,
95 *	and to when physical maps must be made correct.
96 */
97
98/*
99 * Following the Linux model, region IDs are allocated in groups of
100 * eight so that a single region ID can be used for as many RRs as we
101 * want by encoding the RR number into the low bits of the ID.
102 *
103 * We reserve region ID 0 for the kernel and allocate the remaining
104 * IDs for user pmaps.
105 *
106 * Region 0-3:	User virtually mapped
107 * Region 4:	PBVM and special mappings
108 * Region 5:	Kernel virtual memory
109 * Region 6:	Direct-mapped uncacheable
110 * Region 7:	Direct-mapped cacheable
111 */
112
113/* XXX move to a header. */
114extern uint64_t ia64_gateway_page[];
115
116#ifndef PMAP_SHPGPERPROC
117#define PMAP_SHPGPERPROC 200
118#endif
119
120#if !defined(DIAGNOSTIC)
121#define PMAP_INLINE __inline
122#else
123#define PMAP_INLINE
124#endif
125
126#define	pmap_accessed(lpte)		((lpte)->pte & PTE_ACCESSED)
127#define	pmap_dirty(lpte)		((lpte)->pte & PTE_DIRTY)
128#define	pmap_exec(lpte)			((lpte)->pte & PTE_AR_RX)
129#define	pmap_managed(lpte)		((lpte)->pte & PTE_MANAGED)
130#define	pmap_ppn(lpte)			((lpte)->pte & PTE_PPN_MASK)
131#define	pmap_present(lpte)		((lpte)->pte & PTE_PRESENT)
132#define	pmap_prot(lpte)			(((lpte)->pte & PTE_PROT_MASK) >> 56)
133#define	pmap_wired(lpte)		((lpte)->pte & PTE_WIRED)
134
135#define	pmap_clear_accessed(lpte)	(lpte)->pte &= ~PTE_ACCESSED
136#define	pmap_clear_dirty(lpte)		(lpte)->pte &= ~PTE_DIRTY
137#define	pmap_clear_present(lpte)	(lpte)->pte &= ~PTE_PRESENT
138#define	pmap_clear_wired(lpte)		(lpte)->pte &= ~PTE_WIRED
139
140#define	pmap_set_wired(lpte)		(lpte)->pte |= PTE_WIRED
141
142/*
143 * The VHPT bucket head structure.
144 */
145struct ia64_bucket {
146	uint64_t	chain;
147	struct mtx	mutex;
148	u_int		length;
149};
150
151/*
152 * Statically allocated kernel pmap
153 */
154struct pmap kernel_pmap_store;
155
156vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
157vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
158
159/*
160 * Kernel virtual memory management.
161 */
162static int nkpt;
163extern struct ia64_lpte ***ia64_kptdir;
164
165#define KPTE_DIR0_INDEX(va) \
166	(((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1))
167#define KPTE_DIR1_INDEX(va) \
168	(((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
169#define KPTE_PTE_INDEX(va) \
170	(((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
171#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
172
173vm_offset_t kernel_vm_end;
174
175/* Values for ptc.e. XXX values for SKI. */
176static uint64_t pmap_ptc_e_base = 0x100000000;
177static uint64_t pmap_ptc_e_count1 = 3;
178static uint64_t pmap_ptc_e_count2 = 2;
179static uint64_t pmap_ptc_e_stride1 = 0x2000;
180static uint64_t pmap_ptc_e_stride2 = 0x100000000;
181
182struct mtx pmap_ptc_mutex;
183
184/*
185 * Data for the RID allocator
186 */
187static int pmap_ridcount;
188static int pmap_rididx;
189static int pmap_ridmapsz;
190static int pmap_ridmax;
191static uint64_t *pmap_ridmap;
192struct mtx pmap_ridmutex;
193
194/*
195 * Data for the pv entry allocation mechanism
196 */
197static uma_zone_t pvzone;
198static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
199
200/*
201 * Data for allocating PTEs for user processes.
202 */
203static uma_zone_t ptezone;
204
205/*
206 * Virtual Hash Page Table (VHPT) data.
207 */
208/* SYSCTL_DECL(_machdep); */
209SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
210
211struct ia64_bucket *pmap_vhpt_bucket;
212
213int pmap_vhpt_nbuckets;
214SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
215    &pmap_vhpt_nbuckets, 0, "");
216
217int pmap_vhpt_log2size = 0;
218TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
219SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
220    &pmap_vhpt_log2size, 0, "");
221
222static int pmap_vhpt_inserts;
223SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
224    &pmap_vhpt_inserts, 0, "");
225
226static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
227SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
228    NULL, 0, pmap_vhpt_population, "I", "");
229
230static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
231
232static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
233static pv_entry_t get_pv_entry(pmap_t locked_pmap);
234
235static void	pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
236		    vm_page_t m, vm_prot_t prot);
237static void	pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va);
238static void	pmap_invalidate_all(void);
239static int	pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
240		    vm_offset_t va, pv_entry_t pv, int freepte);
241static int	pmap_remove_vhpt(vm_offset_t va);
242static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
243		    vm_page_t m);
244
245vm_offset_t
246pmap_steal_memory(vm_size_t size)
247{
248	vm_size_t bank_size;
249	vm_offset_t pa, va;
250
251	size = round_page(size);
252
253	bank_size = phys_avail[1] - phys_avail[0];
254	while (size > bank_size) {
255		int i;
256		for (i = 0; phys_avail[i+2]; i+= 2) {
257			phys_avail[i] = phys_avail[i+2];
258			phys_avail[i+1] = phys_avail[i+3];
259		}
260		phys_avail[i] = 0;
261		phys_avail[i+1] = 0;
262		if (!phys_avail[0])
263			panic("pmap_steal_memory: out of memory");
264		bank_size = phys_avail[1] - phys_avail[0];
265	}
266
267	pa = phys_avail[0];
268	phys_avail[0] += size;
269
270	va = IA64_PHYS_TO_RR7(pa);
271	bzero((caddr_t) va, size);
272	return va;
273}
274
275static void
276pmap_initialize_vhpt(vm_offset_t vhpt)
277{
278	struct ia64_lpte *pte;
279	u_int i;
280
281	pte = (struct ia64_lpte *)vhpt;
282	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
283		pte[i].pte = 0;
284		pte[i].itir = 0;
285		pte[i].tag = 1UL << 63; /* Invalid tag */
286		pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
287	}
288}
289
290#ifdef SMP
291MALLOC_DECLARE(M_SMP);
292
293vm_offset_t
294pmap_alloc_vhpt(void)
295{
296	vm_offset_t vhpt;
297	vm_size_t size;
298
299	size = 1UL << pmap_vhpt_log2size;
300	vhpt = (uintptr_t)contigmalloc(size, M_SMP, 0, 0UL, ~0UL, size, 0UL);
301	if (vhpt != 0) {
302		vhpt = IA64_PHYS_TO_RR7(ia64_tpa(vhpt));
303		pmap_initialize_vhpt(vhpt);
304	}
305	return (vhpt);
306}
307#endif
308
309/*
310 *	Bootstrap the system enough to run with virtual memory.
311 */
312void
313pmap_bootstrap()
314{
315	struct ia64_pal_result res;
316	vm_offset_t base;
317	size_t size;
318	int i, j, count, ridbits;
319
320	/*
321	 * Query the PAL Code to find the loop parameters for the
322	 * ptc.e instruction.
323	 */
324	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
325	if (res.pal_status != 0)
326		panic("Can't configure ptc.e parameters");
327	pmap_ptc_e_base = res.pal_result[0];
328	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
329	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
330	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
331	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
332	if (bootverbose)
333		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
334		       "stride1=0x%lx, stride2=0x%lx\n",
335		       pmap_ptc_e_base,
336		       pmap_ptc_e_count1,
337		       pmap_ptc_e_count2,
338		       pmap_ptc_e_stride1,
339		       pmap_ptc_e_stride2);
340
341	mtx_init(&pmap_ptc_mutex, "PTC.G mutex", NULL, MTX_SPIN);
342
343	/*
344	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
345	 *
346	 * We currently need at least 19 bits in the RID because PID_MAX
347	 * can only be encoded in 17 bits and we need RIDs for 4 regions
348	 * per process. With PID_MAX equalling 99999 this means that we
349	 * need to be able to encode 399996 (=4*PID_MAX).
350	 * The Itanium processor only has 18 bits and the architected
351	 * minimum is exactly that. So, we cannot use a PID based scheme
352	 * in those cases. Enter pmap_ridmap...
353	 * We should avoid the map when running on a processor that has
354	 * implemented enough bits. This means that we should pass the
355	 * process/thread ID to pmap. This we currently don't do, so we
356	 * use the map anyway. However, we don't want to allocate a map
357	 * that is large enough to cover the range dictated by the number
358	 * of bits in the RID, because that may result in a RID map of
359	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
360	 * The bottomline: we create a 32KB map when the processor only
361	 * implements 18 bits (or when we can't figure it out). Otherwise
362	 * we create a 64KB map.
363	 */
364	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
365	if (res.pal_status != 0) {
366		if (bootverbose)
367			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
368		ridbits = 18; /* guaranteed minimum */
369	} else {
370		ridbits = (res.pal_result[1] >> 8) & 0xff;
371		if (bootverbose)
372			printf("Processor supports %d Region ID bits\n",
373			    ridbits);
374	}
375	if (ridbits > 19)
376		ridbits = 19;
377
378	pmap_ridmax = (1 << ridbits);
379	pmap_ridmapsz = pmap_ridmax / 64;
380	pmap_ridmap = (uint64_t *)pmap_steal_memory(pmap_ridmax / 8);
381	pmap_ridmap[0] |= 0xff;
382	pmap_rididx = 0;
383	pmap_ridcount = 8;
384	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
385
386	/*
387	 * Allocate some memory for initial kernel 'page tables'.
388	 */
389	ia64_kptdir = (void *)pmap_steal_memory(PAGE_SIZE);
390	nkpt = 0;
391	kernel_vm_end = VM_MIN_KERNEL_ADDRESS;
392
393	for (i = 0; phys_avail[i+2]; i+= 2)
394		;
395	count = i+2;
396
397	/*
398	 * Determine a valid (mappable) VHPT size.
399	 */
400	TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
401	if (pmap_vhpt_log2size == 0)
402		pmap_vhpt_log2size = 20;
403	else if (pmap_vhpt_log2size < 16)
404		pmap_vhpt_log2size = 16;
405	else if (pmap_vhpt_log2size > 28)
406		pmap_vhpt_log2size = 28;
407	if (pmap_vhpt_log2size & 1)
408		pmap_vhpt_log2size--;
409
410	base = 0;
411	size = 1UL << pmap_vhpt_log2size;
412	for (i = 0; i < count; i += 2) {
413		base = (phys_avail[i] + size - 1) & ~(size - 1);
414		if (base + size <= phys_avail[i+1])
415			break;
416	}
417	if (!phys_avail[i])
418		panic("Unable to allocate VHPT");
419
420	if (base != phys_avail[i]) {
421		/* Split this region. */
422		for (j = count; j > i; j -= 2) {
423			phys_avail[j] = phys_avail[j-2];
424			phys_avail[j+1] = phys_avail[j-2+1];
425		}
426		phys_avail[i+1] = base;
427		phys_avail[i+2] = base + size;
428	} else
429		phys_avail[i] = base + size;
430
431	base = IA64_PHYS_TO_RR7(base);
432	PCPU_SET(md.vhpt, base);
433	if (bootverbose)
434		printf("VHPT: address=%#lx, size=%#lx\n", base, size);
435
436	pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
437	pmap_vhpt_bucket = (void *)pmap_steal_memory(pmap_vhpt_nbuckets *
438	    sizeof(struct ia64_bucket));
439	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
440		/* Stolen memory is zeroed. */
441		mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
442		    MTX_NOWITNESS | MTX_SPIN);
443	}
444
445	pmap_initialize_vhpt(base);
446	map_vhpt(base);
447	ia64_set_pta(base + (1 << 8) + (pmap_vhpt_log2size << 2) + 1);
448	ia64_srlz_i();
449
450	virtual_avail = VM_MIN_KERNEL_ADDRESS;
451	virtual_end = VM_MAX_KERNEL_ADDRESS;
452
453	/*
454	 * Initialize the kernel pmap (which is statically allocated).
455	 */
456	PMAP_LOCK_INIT(kernel_pmap);
457	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
458		kernel_pmap->pm_rid[i] = 0;
459	TAILQ_INIT(&kernel_pmap->pm_pvlist);
460	PCPU_SET(md.current_pmap, kernel_pmap);
461
462	/* Region 5 is mapped via the VHPT. */
463	ia64_set_rr(IA64_RR_BASE(5), (5 << 8) | (PAGE_SHIFT << 2) | 1);
464
465	/*
466	 * Clear out any random TLB entries left over from booting.
467	 */
468	pmap_invalidate_all();
469
470	map_gateway_page();
471}
472
473static int
474pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
475{
476	int count, error, i;
477
478	count = 0;
479	for (i = 0; i < pmap_vhpt_nbuckets; i++)
480		count += pmap_vhpt_bucket[i].length;
481
482	error = SYSCTL_OUT(req, &count, sizeof(count));
483	return (error);
484}
485
486/*
487 *	Initialize a vm_page's machine-dependent fields.
488 */
489void
490pmap_page_init(vm_page_t m)
491{
492
493	TAILQ_INIT(&m->md.pv_list);
494	m->md.pv_list_count = 0;
495}
496
497/*
498 *	Initialize the pmap module.
499 *	Called by vm_init, to initialize any structures that the pmap
500 *	system needs to map virtual memory.
501 */
502void
503pmap_init(void)
504{
505	int shpgperproc = PMAP_SHPGPERPROC;
506
507	/*
508	 * Initialize the address space (zone) for the pv entries.  Set a
509	 * high water mark so that the system can recover from excessive
510	 * numbers of pv entries.
511	 */
512	pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
513	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
514	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
515	pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
516	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
517	pv_entry_high_water = 9 * (pv_entry_max / 10);
518
519	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
520	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
521}
522
523
524/***************************************************
525 * Manipulate TLBs for a pmap
526 ***************************************************/
527
528static void
529pmap_invalidate_page(vm_offset_t va)
530{
531	struct ia64_lpte *pte;
532	struct pcpu *pc;
533	uint64_t tag;
534	u_int vhpt_ofs;
535
536	critical_enter();
537
538	vhpt_ofs = ia64_thash(va) - PCPU_GET(md.vhpt);
539	tag = ia64_ttag(va);
540	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
541		pte = (struct ia64_lpte *)(pc->pc_md.vhpt + vhpt_ofs);
542		atomic_cmpset_64(&pte->tag, tag, 1UL << 63);
543	}
544
545	mtx_lock_spin(&pmap_ptc_mutex);
546
547	ia64_ptc_ga(va, PAGE_SHIFT << 2);
548	ia64_mf();
549	ia64_srlz_i();
550
551	mtx_unlock_spin(&pmap_ptc_mutex);
552
553	ia64_invala();
554
555	critical_exit();
556}
557
558static void
559pmap_invalidate_all_1(void *arg)
560{
561	uint64_t addr;
562	int i, j;
563
564	critical_enter();
565	addr = pmap_ptc_e_base;
566	for (i = 0; i < pmap_ptc_e_count1; i++) {
567		for (j = 0; j < pmap_ptc_e_count2; j++) {
568			ia64_ptc_e(addr);
569			addr += pmap_ptc_e_stride2;
570		}
571		addr += pmap_ptc_e_stride1;
572	}
573	critical_exit();
574}
575
576static void
577pmap_invalidate_all(void)
578{
579
580#ifdef SMP
581	if (mp_ncpus > 1) {
582		smp_rendezvous(NULL, pmap_invalidate_all_1, NULL, NULL);
583		return;
584	}
585#endif
586	pmap_invalidate_all_1(NULL);
587}
588
589static uint32_t
590pmap_allocate_rid(void)
591{
592	uint64_t bit, bits;
593	int rid;
594
595	mtx_lock(&pmap_ridmutex);
596	if (pmap_ridcount == pmap_ridmax)
597		panic("pmap_allocate_rid: All Region IDs used");
598
599	/* Find an index with a free bit. */
600	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
601		pmap_rididx++;
602		if (pmap_rididx == pmap_ridmapsz)
603			pmap_rididx = 0;
604	}
605	rid = pmap_rididx * 64;
606
607	/* Find a free bit. */
608	bit = 1UL;
609	while (bits & bit) {
610		rid++;
611		bit <<= 1;
612	}
613
614	pmap_ridmap[pmap_rididx] |= bit;
615	pmap_ridcount++;
616	mtx_unlock(&pmap_ridmutex);
617
618	return rid;
619}
620
621static void
622pmap_free_rid(uint32_t rid)
623{
624	uint64_t bit;
625	int idx;
626
627	idx = rid / 64;
628	bit = ~(1UL << (rid & 63));
629
630	mtx_lock(&pmap_ridmutex);
631	pmap_ridmap[idx] &= bit;
632	pmap_ridcount--;
633	mtx_unlock(&pmap_ridmutex);
634}
635
636/***************************************************
637 * Page table page management routines.....
638 ***************************************************/
639
640void
641pmap_pinit0(struct pmap *pmap)
642{
643	/* kernel_pmap is the same as any other pmap. */
644	pmap_pinit(pmap);
645}
646
647/*
648 * Initialize a preallocated and zeroed pmap structure,
649 * such as one in a vmspace structure.
650 */
651int
652pmap_pinit(struct pmap *pmap)
653{
654	int i;
655
656	PMAP_LOCK_INIT(pmap);
657	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
658		pmap->pm_rid[i] = pmap_allocate_rid();
659	TAILQ_INIT(&pmap->pm_pvlist);
660	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
661	return (1);
662}
663
664/***************************************************
665 * Pmap allocation/deallocation routines.
666 ***************************************************/
667
668/*
669 * Release any resources held by the given physical map.
670 * Called when a pmap initialized by pmap_pinit is being released.
671 * Should only be called if the map contains no valid mappings.
672 */
673void
674pmap_release(pmap_t pmap)
675{
676	int i;
677
678	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
679		if (pmap->pm_rid[i])
680			pmap_free_rid(pmap->pm_rid[i]);
681	PMAP_LOCK_DESTROY(pmap);
682}
683
684/*
685 * grow the number of kernel page table entries, if needed
686 */
687void
688pmap_growkernel(vm_offset_t addr)
689{
690	struct ia64_lpte **dir1;
691	struct ia64_lpte *leaf;
692	vm_page_t nkpg;
693
694	while (kernel_vm_end <= addr) {
695		if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64)
696			panic("%s: out of kernel address space", __func__);
697
698		dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)];
699		if (dir1 == NULL) {
700			nkpg = vm_page_alloc(NULL, nkpt++,
701			    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
702			if (!nkpg)
703				panic("%s: cannot add dir. page", __func__);
704
705			dir1 = (struct ia64_lpte **)
706			    IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
707			bzero(dir1, PAGE_SIZE);
708			ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1;
709		}
710
711		nkpg = vm_page_alloc(NULL, nkpt++,
712		    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
713		if (!nkpg)
714			panic("%s: cannot add PTE page", __func__);
715
716		leaf = (struct ia64_lpte *)
717		    IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
718		bzero(leaf, PAGE_SIZE);
719		dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf;
720
721		kernel_vm_end += PAGE_SIZE * NKPTEPG;
722	}
723}
724
725/***************************************************
726 * page management routines.
727 ***************************************************/
728
729/*
730 * free the pv_entry back to the free list
731 */
732static PMAP_INLINE void
733free_pv_entry(pv_entry_t pv)
734{
735	pv_entry_count--;
736	uma_zfree(pvzone, pv);
737}
738
739/*
740 * get a new pv_entry, allocating a block from the system
741 * when needed.
742 */
743static pv_entry_t
744get_pv_entry(pmap_t locked_pmap)
745{
746	static const struct timeval printinterval = { 60, 0 };
747	static struct timeval lastprint;
748	struct vpgqueues *vpq;
749	struct ia64_lpte *pte;
750	pmap_t oldpmap, pmap;
751	pv_entry_t allocated_pv, next_pv, pv;
752	vm_offset_t va;
753	vm_page_t m;
754
755	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
756	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
757	allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
758	if (allocated_pv != NULL) {
759		pv_entry_count++;
760		if (pv_entry_count > pv_entry_high_water)
761			pagedaemon_wakeup();
762		else
763			return (allocated_pv);
764	}
765
766	/*
767	 * Reclaim pv entries: At first, destroy mappings to inactive
768	 * pages.  After that, if a pv entry is still needed, destroy
769	 * mappings to active pages.
770	 */
771	if (ratecheck(&lastprint, &printinterval))
772		printf("Approaching the limit on PV entries, "
773		    "increase the vm.pmap.shpgperproc tunable.\n");
774	vpq = &vm_page_queues[PQ_INACTIVE];
775retry:
776	TAILQ_FOREACH(m, &vpq->pl, pageq) {
777		if ((m->flags & PG_MARKER) != 0 || m->hold_count || m->busy)
778			continue;
779		TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
780			va = pv->pv_va;
781			pmap = pv->pv_pmap;
782			/* Avoid deadlock and lock recursion. */
783			if (pmap > locked_pmap)
784				PMAP_LOCK(pmap);
785			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
786				continue;
787			pmap->pm_stats.resident_count--;
788			oldpmap = pmap_switch(pmap);
789			pte = pmap_find_vhpt(va);
790			KASSERT(pte != NULL, ("pte"));
791			pmap_remove_vhpt(va);
792			pmap_invalidate_page(va);
793			pmap_switch(oldpmap);
794			if (pmap_accessed(pte))
795				vm_page_flag_set(m, PG_REFERENCED);
796			if (pmap_dirty(pte))
797				vm_page_dirty(m);
798			pmap_free_pte(pte, va);
799			TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
800			m->md.pv_list_count--;
801			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
802			if (pmap != locked_pmap)
803				PMAP_UNLOCK(pmap);
804			if (allocated_pv == NULL)
805				allocated_pv = pv;
806			else
807				free_pv_entry(pv);
808		}
809		if (TAILQ_EMPTY(&m->md.pv_list))
810			vm_page_flag_clear(m, PG_WRITEABLE);
811	}
812	if (allocated_pv == NULL) {
813		if (vpq == &vm_page_queues[PQ_INACTIVE]) {
814			vpq = &vm_page_queues[PQ_ACTIVE];
815			goto retry;
816		}
817		panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
818	}
819	return (allocated_pv);
820}
821
822/*
823 * Conditionally create a pv entry.
824 */
825static boolean_t
826pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
827{
828	pv_entry_t pv;
829
830	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
831	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
832	if (pv_entry_count < pv_entry_high_water &&
833	    (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) {
834		pv_entry_count++;
835		pv->pv_va = va;
836		pv->pv_pmap = pmap;
837		TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
838		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
839		m->md.pv_list_count++;
840		return (TRUE);
841	} else
842		return (FALSE);
843}
844
845/*
846 * Add an ia64_lpte to the VHPT.
847 */
848static void
849pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
850{
851	struct ia64_bucket *bckt;
852	struct ia64_lpte *vhpte;
853	uint64_t pte_pa;
854
855	/* Can fault, so get it out of the way. */
856	pte_pa = ia64_tpa((vm_offset_t)pte);
857
858	vhpte = (struct ia64_lpte *)ia64_thash(va);
859	bckt = (struct ia64_bucket *)vhpte->chain;
860
861	mtx_lock_spin(&bckt->mutex);
862	pte->chain = bckt->chain;
863	ia64_mf();
864	bckt->chain = pte_pa;
865
866	pmap_vhpt_inserts++;
867	bckt->length++;
868	mtx_unlock_spin(&bckt->mutex);
869}
870
871/*
872 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
873 * worked or an appropriate error code otherwise.
874 */
875static int
876pmap_remove_vhpt(vm_offset_t va)
877{
878	struct ia64_bucket *bckt;
879	struct ia64_lpte *pte;
880	struct ia64_lpte *lpte;
881	struct ia64_lpte *vhpte;
882	uint64_t chain, tag;
883
884	tag = ia64_ttag(va);
885	vhpte = (struct ia64_lpte *)ia64_thash(va);
886	bckt = (struct ia64_bucket *)vhpte->chain;
887
888	lpte = NULL;
889	mtx_lock_spin(&bckt->mutex);
890	chain = bckt->chain;
891	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
892	while (chain != 0 && pte->tag != tag) {
893		lpte = pte;
894		chain = pte->chain;
895		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
896	}
897	if (chain == 0) {
898		mtx_unlock_spin(&bckt->mutex);
899		return (ENOENT);
900	}
901
902	/* Snip this pv_entry out of the collision chain. */
903	if (lpte == NULL)
904		bckt->chain = pte->chain;
905	else
906		lpte->chain = pte->chain;
907	ia64_mf();
908
909	bckt->length--;
910	mtx_unlock_spin(&bckt->mutex);
911	return (0);
912}
913
914/*
915 * Find the ia64_lpte for the given va, if any.
916 */
917static struct ia64_lpte *
918pmap_find_vhpt(vm_offset_t va)
919{
920	struct ia64_bucket *bckt;
921	struct ia64_lpte *pte;
922	uint64_t chain, tag;
923
924	tag = ia64_ttag(va);
925	pte = (struct ia64_lpte *)ia64_thash(va);
926	bckt = (struct ia64_bucket *)pte->chain;
927
928	mtx_lock_spin(&bckt->mutex);
929	chain = bckt->chain;
930	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
931	while (chain != 0 && pte->tag != tag) {
932		chain = pte->chain;
933		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
934	}
935	mtx_unlock_spin(&bckt->mutex);
936	return ((chain != 0) ? pte : NULL);
937}
938
939/*
940 * Remove an entry from the list of managed mappings.
941 */
942static int
943pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
944{
945	if (!pv) {
946		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
947			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
948				if (pmap == pv->pv_pmap && va == pv->pv_va)
949					break;
950			}
951		} else {
952			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
953				if (va == pv->pv_va)
954					break;
955			}
956		}
957	}
958
959	if (pv) {
960		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
961		m->md.pv_list_count--;
962		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
963			vm_page_flag_clear(m, PG_WRITEABLE);
964
965		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
966		free_pv_entry(pv);
967		return 0;
968	} else {
969		return ENOENT;
970	}
971}
972
973/*
974 * Create a pv entry for page at pa for
975 * (pmap, va).
976 */
977static void
978pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
979{
980	pv_entry_t pv;
981
982	pv = get_pv_entry(pmap);
983	pv->pv_pmap = pmap;
984	pv->pv_va = va;
985
986	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
987	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
988	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
989	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
990	m->md.pv_list_count++;
991}
992
993/*
994 *	Routine:	pmap_extract
995 *	Function:
996 *		Extract the physical page address associated
997 *		with the given map/virtual_address pair.
998 */
999vm_paddr_t
1000pmap_extract(pmap_t pmap, vm_offset_t va)
1001{
1002	struct ia64_lpte *pte;
1003	pmap_t oldpmap;
1004	vm_paddr_t pa;
1005
1006	pa = 0;
1007	PMAP_LOCK(pmap);
1008	oldpmap = pmap_switch(pmap);
1009	pte = pmap_find_vhpt(va);
1010	if (pte != NULL && pmap_present(pte))
1011		pa = pmap_ppn(pte);
1012	pmap_switch(oldpmap);
1013	PMAP_UNLOCK(pmap);
1014	return (pa);
1015}
1016
1017/*
1018 *	Routine:	pmap_extract_and_hold
1019 *	Function:
1020 *		Atomically extract and hold the physical page
1021 *		with the given pmap and virtual address pair
1022 *		if that mapping permits the given protection.
1023 */
1024vm_page_t
1025pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1026{
1027	struct ia64_lpte *pte;
1028	pmap_t oldpmap;
1029	vm_page_t m;
1030	vm_paddr_t pa;
1031
1032	pa = 0;
1033	m = NULL;
1034	PMAP_LOCK(pmap);
1035	oldpmap = pmap_switch(pmap);
1036retry:
1037	pte = pmap_find_vhpt(va);
1038	if (pte != NULL && pmap_present(pte) &&
1039	    (pmap_prot(pte) & prot) == prot) {
1040		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1041		if (vm_page_pa_tryrelock(pmap, pmap_ppn(pte), &pa))
1042			goto retry;
1043		vm_page_hold(m);
1044	}
1045	PA_UNLOCK_COND(pa);
1046	pmap_switch(oldpmap);
1047	PMAP_UNLOCK(pmap);
1048	return (m);
1049}
1050
1051/***************************************************
1052 * Low level mapping routines.....
1053 ***************************************************/
1054
1055/*
1056 * Find the kernel lpte for mapping the given virtual address, which
1057 * must be in the part of region 5 which we can cover with our kernel
1058 * 'page tables'.
1059 */
1060static struct ia64_lpte *
1061pmap_find_kpte(vm_offset_t va)
1062{
1063	struct ia64_lpte **dir1;
1064	struct ia64_lpte *leaf;
1065
1066	KASSERT((va >> 61) == 5,
1067		("kernel mapping 0x%lx not in region 5", va));
1068	KASSERT(va < kernel_vm_end,
1069		("kernel mapping 0x%lx out of range", va));
1070
1071	dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)];
1072	leaf = dir1[KPTE_DIR1_INDEX(va)];
1073	return (&leaf[KPTE_PTE_INDEX(va)]);
1074}
1075
1076/*
1077 * Find a pte suitable for mapping a user-space address. If one exists
1078 * in the VHPT, that one will be returned, otherwise a new pte is
1079 * allocated.
1080 */
1081static struct ia64_lpte *
1082pmap_find_pte(vm_offset_t va)
1083{
1084	struct ia64_lpte *pte;
1085
1086	if (va >= VM_MAXUSER_ADDRESS)
1087		return pmap_find_kpte(va);
1088
1089	pte = pmap_find_vhpt(va);
1090	if (pte == NULL) {
1091		pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1092		pte->tag = 1UL << 63;
1093	}
1094	return (pte);
1095}
1096
1097/*
1098 * Free a pte which is now unused. This simply returns it to the zone
1099 * allocator if it is a user mapping. For kernel mappings, clear the
1100 * valid bit to make it clear that the mapping is not currently used.
1101 */
1102static void
1103pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1104{
1105	if (va < VM_MAXUSER_ADDRESS)
1106		uma_zfree(ptezone, pte);
1107	else
1108		pmap_clear_present(pte);
1109}
1110
1111static PMAP_INLINE void
1112pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1113{
1114	static long prot2ar[4] = {
1115		PTE_AR_R,		/* VM_PROT_NONE */
1116		PTE_AR_RW,		/* VM_PROT_WRITE */
1117		PTE_AR_RX|PTE_ED,	/* VM_PROT_EXECUTE */
1118		PTE_AR_RWX|PTE_ED	/* VM_PROT_WRITE|VM_PROT_EXECUTE */
1119	};
1120
1121	pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED);
1122	pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1123	pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1124	    ? PTE_PL_KERN : PTE_PL_USER;
1125	pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1126}
1127
1128/*
1129 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1130 * the pte was orginally valid, then its assumed to already be in the
1131 * VHPT.
1132 * This functions does not set the protection bits.  It's expected
1133 * that those have been set correctly prior to calling this function.
1134 */
1135static void
1136pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1137    boolean_t wired, boolean_t managed)
1138{
1139
1140	pte->pte &= PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED;
1141	pte->pte |= PTE_PRESENT | PTE_MA_WB;
1142	pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1143	pte->pte |= (wired) ? PTE_WIRED : 0;
1144	pte->pte |= pa & PTE_PPN_MASK;
1145
1146	pte->itir = PAGE_SHIFT << 2;
1147
1148	pte->tag = ia64_ttag(va);
1149}
1150
1151/*
1152 * Remove the (possibly managed) mapping represented by pte from the
1153 * given pmap.
1154 */
1155static int
1156pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1157		pv_entry_t pv, int freepte)
1158{
1159	int error;
1160	vm_page_t m;
1161
1162	/*
1163	 * First remove from the VHPT.
1164	 */
1165	error = pmap_remove_vhpt(va);
1166	if (error)
1167		return (error);
1168
1169	pmap_invalidate_page(va);
1170
1171	if (pmap_wired(pte))
1172		pmap->pm_stats.wired_count -= 1;
1173
1174	pmap->pm_stats.resident_count -= 1;
1175	if (pmap_managed(pte)) {
1176		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1177		if (pmap_dirty(pte))
1178			vm_page_dirty(m);
1179		if (pmap_accessed(pte))
1180			vm_page_flag_set(m, PG_REFERENCED);
1181
1182		error = pmap_remove_entry(pmap, m, va, pv);
1183	}
1184	if (freepte)
1185		pmap_free_pte(pte, va);
1186
1187	return (error);
1188}
1189
1190/*
1191 * Extract the physical page address associated with a kernel
1192 * virtual address.
1193 */
1194vm_paddr_t
1195pmap_kextract(vm_offset_t va)
1196{
1197	struct ia64_lpte *pte;
1198	uint64_t *pbvm_pgtbl;
1199	u_int idx;
1200
1201	KASSERT(va >= VM_MAXUSER_ADDRESS, ("Must be kernel VA"));
1202
1203	/* Regions 6 and 7 are direct mapped. */
1204	if (va >= IA64_RR_BASE(6))
1205		return (IA64_RR_MASK(va));
1206
1207	/* Bail out if the virtual address is beyond our limits. */
1208	if (va >= kernel_vm_end)
1209		return (0);
1210
1211	if (va >= VM_MIN_KERNEL_ADDRESS) {
1212		pte = pmap_find_kpte(va);
1213		return (pmap_present(pte) ? pmap_ppn(pte)|(va&PAGE_MASK) : 0);
1214	}
1215
1216	/* PBVM page table. */
1217	if (va >= IA64_PBVM_PGTBL + bootinfo->bi_pbvm_pgtblsz);
1218		return (0);
1219	if (va >= IA64_PBVM_PGTBL)
1220		return (va - IA64_PBVM_PGTBL) + bootinfo->bi_pbvm_pgtbl;
1221
1222	/* PBVM. */
1223	if (va >= IA64_PBVM_BASE) {
1224		pbvm_pgtbl = (void *)IA64_PBVM_PGTBL;
1225		idx = (va - IA64_PBVM_BASE) >> IA64_PBVM_PAGE_SHIFT;
1226		if (idx >= (bootinfo->bi_pbvm_pgtblsz >> 3))
1227			return (0);
1228		if ((pbvm_pgtbl[idx] & PTE_PRESENT) == 0)
1229			return (0);
1230		return ((pbvm_pgtbl[idx] & PTE_PPN_MASK) +
1231		    (va & IA64_PBVM_PAGE_MASK));
1232	}
1233
1234	printf("XXX: %s: va=%#lx\n", __func__, va);
1235	return (0);
1236}
1237
1238/*
1239 * Add a list of wired pages to the kva this routine is only used for
1240 * temporary kernel mappings that do not need to have page modification
1241 * or references recorded.  Note that old mappings are simply written
1242 * over.  The page is effectively wired, but it's customary to not have
1243 * the PTE reflect that, nor update statistics.
1244 */
1245void
1246pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1247{
1248	struct ia64_lpte *pte;
1249	int i;
1250
1251	for (i = 0; i < count; i++) {
1252		pte = pmap_find_kpte(va);
1253		if (pmap_present(pte))
1254			pmap_invalidate_page(va);
1255		else
1256			pmap_enter_vhpt(pte, va);
1257		pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1258		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1259		va += PAGE_SIZE;
1260	}
1261}
1262
1263/*
1264 * this routine jerks page mappings from the
1265 * kernel -- it is meant only for temporary mappings.
1266 */
1267void
1268pmap_qremove(vm_offset_t va, int count)
1269{
1270	struct ia64_lpte *pte;
1271	int i;
1272
1273	for (i = 0; i < count; i++) {
1274		pte = pmap_find_kpte(va);
1275		if (pmap_present(pte)) {
1276			pmap_remove_vhpt(va);
1277			pmap_invalidate_page(va);
1278			pmap_clear_present(pte);
1279		}
1280		va += PAGE_SIZE;
1281	}
1282}
1283
1284/*
1285 * Add a wired page to the kva.  As for pmap_qenter(), it's customary
1286 * to not have the PTE reflect that, nor update statistics.
1287 */
1288void
1289pmap_kenter(vm_offset_t va, vm_offset_t pa)
1290{
1291	struct ia64_lpte *pte;
1292
1293	pte = pmap_find_kpte(va);
1294	if (pmap_present(pte))
1295		pmap_invalidate_page(va);
1296	else
1297		pmap_enter_vhpt(pte, va);
1298	pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1299	pmap_set_pte(pte, va, pa, FALSE, FALSE);
1300}
1301
1302/*
1303 * Remove a page from the kva
1304 */
1305void
1306pmap_kremove(vm_offset_t va)
1307{
1308	struct ia64_lpte *pte;
1309
1310	pte = pmap_find_kpte(va);
1311	if (pmap_present(pte)) {
1312		pmap_remove_vhpt(va);
1313		pmap_invalidate_page(va);
1314		pmap_clear_present(pte);
1315	}
1316}
1317
1318/*
1319 *	Used to map a range of physical addresses into kernel
1320 *	virtual address space.
1321 *
1322 *	The value passed in '*virt' is a suggested virtual address for
1323 *	the mapping. Architectures which can support a direct-mapped
1324 *	physical to virtual region can return the appropriate address
1325 *	within that region, leaving '*virt' unchanged. Other
1326 *	architectures should map the pages starting at '*virt' and
1327 *	update '*virt' with the first usable address after the mapped
1328 *	region.
1329 */
1330vm_offset_t
1331pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1332{
1333	return IA64_PHYS_TO_RR7(start);
1334}
1335
1336/*
1337 *	Remove the given range of addresses from the specified map.
1338 *
1339 *	It is assumed that the start and end are properly
1340 *	rounded to the page size.
1341 */
1342void
1343pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1344{
1345	pmap_t oldpmap;
1346	vm_offset_t va;
1347	pv_entry_t npv, pv;
1348	struct ia64_lpte *pte;
1349
1350	if (pmap->pm_stats.resident_count == 0)
1351		return;
1352
1353	vm_page_lock_queues();
1354	PMAP_LOCK(pmap);
1355	oldpmap = pmap_switch(pmap);
1356
1357	/*
1358	 * special handling of removing one page.  a very
1359	 * common operation and easy to short circuit some
1360	 * code.
1361	 */
1362	if (sva + PAGE_SIZE == eva) {
1363		pte = pmap_find_vhpt(sva);
1364		if (pte != NULL)
1365			pmap_remove_pte(pmap, pte, sva, 0, 1);
1366		goto out;
1367	}
1368
1369	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1370		TAILQ_FOREACH_SAFE(pv, &pmap->pm_pvlist, pv_plist, npv) {
1371			va = pv->pv_va;
1372			if (va >= sva && va < eva) {
1373				pte = pmap_find_vhpt(va);
1374				KASSERT(pte != NULL, ("pte"));
1375				pmap_remove_pte(pmap, pte, va, pv, 1);
1376			}
1377		}
1378	} else {
1379		for (va = sva; va < eva; va += PAGE_SIZE) {
1380			pte = pmap_find_vhpt(va);
1381			if (pte != NULL)
1382				pmap_remove_pte(pmap, pte, va, 0, 1);
1383		}
1384	}
1385
1386out:
1387	vm_page_unlock_queues();
1388	pmap_switch(oldpmap);
1389	PMAP_UNLOCK(pmap);
1390}
1391
1392/*
1393 *	Routine:	pmap_remove_all
1394 *	Function:
1395 *		Removes this physical page from
1396 *		all physical maps in which it resides.
1397 *		Reflects back modify bits to the pager.
1398 *
1399 *	Notes:
1400 *		Original versions of this routine were very
1401 *		inefficient because they iteratively called
1402 *		pmap_remove (slow...)
1403 */
1404
1405void
1406pmap_remove_all(vm_page_t m)
1407{
1408	pmap_t oldpmap;
1409	pv_entry_t pv;
1410
1411	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
1412	    ("pmap_remove_all: page %p is not managed", m));
1413	vm_page_lock_queues();
1414	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1415		struct ia64_lpte *pte;
1416		pmap_t pmap = pv->pv_pmap;
1417		vm_offset_t va = pv->pv_va;
1418
1419		PMAP_LOCK(pmap);
1420		oldpmap = pmap_switch(pmap);
1421		pte = pmap_find_vhpt(va);
1422		KASSERT(pte != NULL, ("pte"));
1423		if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1424			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1425		pmap_remove_pte(pmap, pte, va, pv, 1);
1426		pmap_switch(oldpmap);
1427		PMAP_UNLOCK(pmap);
1428	}
1429	vm_page_flag_clear(m, PG_WRITEABLE);
1430	vm_page_unlock_queues();
1431}
1432
1433/*
1434 *	Set the physical protection on the
1435 *	specified range of this map as requested.
1436 */
1437void
1438pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1439{
1440	pmap_t oldpmap;
1441	struct ia64_lpte *pte;
1442
1443	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1444		pmap_remove(pmap, sva, eva);
1445		return;
1446	}
1447
1448	if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
1449	    (VM_PROT_WRITE|VM_PROT_EXECUTE))
1450		return;
1451
1452	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1453		panic("pmap_protect: unaligned addresses");
1454
1455	vm_page_lock_queues();
1456	PMAP_LOCK(pmap);
1457	oldpmap = pmap_switch(pmap);
1458	for ( ; sva < eva; sva += PAGE_SIZE) {
1459		/* If page is invalid, skip this page */
1460		pte = pmap_find_vhpt(sva);
1461		if (pte == NULL)
1462			continue;
1463
1464		/* If there's no change, skip it too */
1465		if (pmap_prot(pte) == prot)
1466			continue;
1467
1468		if ((prot & VM_PROT_WRITE) == 0 &&
1469		    pmap_managed(pte) && pmap_dirty(pte)) {
1470			vm_paddr_t pa = pmap_ppn(pte);
1471			vm_page_t m = PHYS_TO_VM_PAGE(pa);
1472
1473			vm_page_dirty(m);
1474			pmap_clear_dirty(pte);
1475		}
1476
1477		if (prot & VM_PROT_EXECUTE)
1478			ia64_sync_icache(sva, PAGE_SIZE);
1479
1480		pmap_pte_prot(pmap, pte, prot);
1481		pmap_invalidate_page(sva);
1482	}
1483	vm_page_unlock_queues();
1484	pmap_switch(oldpmap);
1485	PMAP_UNLOCK(pmap);
1486}
1487
1488/*
1489 *	Insert the given physical page (p) at
1490 *	the specified virtual address (v) in the
1491 *	target physical map with the protection requested.
1492 *
1493 *	If specified, the page will be wired down, meaning
1494 *	that the related pte can not be reclaimed.
1495 *
1496 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1497 *	or lose information.  That is, this routine must actually
1498 *	insert this page into the given map NOW.
1499 */
1500void
1501pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
1502    vm_prot_t prot, boolean_t wired)
1503{
1504	pmap_t oldpmap;
1505	vm_offset_t pa;
1506	vm_offset_t opa;
1507	struct ia64_lpte origpte;
1508	struct ia64_lpte *pte;
1509	boolean_t icache_inval, managed;
1510
1511	vm_page_lock_queues();
1512	PMAP_LOCK(pmap);
1513	oldpmap = pmap_switch(pmap);
1514
1515	va &= ~PAGE_MASK;
1516 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
1517	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 ||
1518	    (m->oflags & VPO_BUSY) != 0,
1519	    ("pmap_enter: page %p is not busy", m));
1520
1521	/*
1522	 * Find (or create) a pte for the given mapping.
1523	 */
1524	while ((pte = pmap_find_pte(va)) == NULL) {
1525		pmap_switch(oldpmap);
1526		PMAP_UNLOCK(pmap);
1527		vm_page_unlock_queues();
1528		VM_WAIT;
1529		vm_page_lock_queues();
1530		PMAP_LOCK(pmap);
1531		oldpmap = pmap_switch(pmap);
1532	}
1533	origpte = *pte;
1534	if (!pmap_present(pte)) {
1535		opa = ~0UL;
1536		pmap_enter_vhpt(pte, va);
1537	} else
1538		opa = pmap_ppn(pte);
1539	managed = FALSE;
1540	pa = VM_PAGE_TO_PHYS(m);
1541
1542	icache_inval = (prot & VM_PROT_EXECUTE) ? TRUE : FALSE;
1543
1544	/*
1545	 * Mapping has not changed, must be protection or wiring change.
1546	 */
1547	if (opa == pa) {
1548		/*
1549		 * Wiring change, just update stats. We don't worry about
1550		 * wiring PT pages as they remain resident as long as there
1551		 * are valid mappings in them. Hence, if a user page is wired,
1552		 * the PT page will be also.
1553		 */
1554		if (wired && !pmap_wired(&origpte))
1555			pmap->pm_stats.wired_count++;
1556		else if (!wired && pmap_wired(&origpte))
1557			pmap->pm_stats.wired_count--;
1558
1559		managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1560
1561		/*
1562		 * We might be turning off write access to the page,
1563		 * so we go ahead and sense modify status. Otherwise,
1564		 * we can avoid I-cache invalidation if the page
1565		 * already allowed execution.
1566		 */
1567		if (managed && pmap_dirty(&origpte))
1568			vm_page_dirty(m);
1569		else if (pmap_exec(&origpte))
1570			icache_inval = FALSE;
1571
1572		pmap_invalidate_page(va);
1573		goto validate;
1574	}
1575
1576	/*
1577	 * Mapping has changed, invalidate old range and fall
1578	 * through to handle validating new mapping.
1579	 */
1580	if (opa != ~0UL) {
1581		pmap_remove_pte(pmap, pte, va, 0, 0);
1582		pmap_enter_vhpt(pte, va);
1583	}
1584
1585	/*
1586	 * Enter on the PV list if part of our managed memory.
1587	 */
1588	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1589		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1590		    ("pmap_enter: managed mapping within the clean submap"));
1591		pmap_insert_entry(pmap, va, m);
1592		managed = TRUE;
1593	}
1594
1595	/*
1596	 * Increment counters
1597	 */
1598	pmap->pm_stats.resident_count++;
1599	if (wired)
1600		pmap->pm_stats.wired_count++;
1601
1602validate:
1603
1604	/*
1605	 * Now validate mapping with desired protection/wiring. This
1606	 * adds the pte to the VHPT if necessary.
1607	 */
1608	pmap_pte_prot(pmap, pte, prot);
1609	pmap_set_pte(pte, va, pa, wired, managed);
1610
1611	/* Invalidate the I-cache when needed. */
1612	if (icache_inval)
1613		ia64_sync_icache(va, PAGE_SIZE);
1614
1615	if ((prot & VM_PROT_WRITE) != 0 && managed)
1616		vm_page_flag_set(m, PG_WRITEABLE);
1617	vm_page_unlock_queues();
1618	pmap_switch(oldpmap);
1619	PMAP_UNLOCK(pmap);
1620}
1621
1622/*
1623 * Maps a sequence of resident pages belonging to the same object.
1624 * The sequence begins with the given page m_start.  This page is
1625 * mapped at the given virtual address start.  Each subsequent page is
1626 * mapped at a virtual address that is offset from start by the same
1627 * amount as the page is offset from m_start within the object.  The
1628 * last page in the sequence is the page with the largest offset from
1629 * m_start that can be mapped at a virtual address less than the given
1630 * virtual address end.  Not every virtual page between start and end
1631 * is mapped; only those for which a resident page exists with the
1632 * corresponding offset from m_start are mapped.
1633 */
1634void
1635pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1636    vm_page_t m_start, vm_prot_t prot)
1637{
1638	pmap_t oldpmap;
1639	vm_page_t m;
1640	vm_pindex_t diff, psize;
1641
1642	VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
1643	psize = atop(end - start);
1644	m = m_start;
1645	vm_page_lock_queues();
1646	PMAP_LOCK(pmap);
1647	oldpmap = pmap_switch(pmap);
1648	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1649		pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1650		m = TAILQ_NEXT(m, listq);
1651	}
1652	vm_page_unlock_queues();
1653	pmap_switch(oldpmap);
1654 	PMAP_UNLOCK(pmap);
1655}
1656
1657/*
1658 * this code makes some *MAJOR* assumptions:
1659 * 1. Current pmap & pmap exists.
1660 * 2. Not wired.
1661 * 3. Read access.
1662 * 4. No page table pages.
1663 * but is *MUCH* faster than pmap_enter...
1664 */
1665
1666void
1667pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1668{
1669	pmap_t oldpmap;
1670
1671	vm_page_lock_queues();
1672	PMAP_LOCK(pmap);
1673	oldpmap = pmap_switch(pmap);
1674	pmap_enter_quick_locked(pmap, va, m, prot);
1675	vm_page_unlock_queues();
1676	pmap_switch(oldpmap);
1677	PMAP_UNLOCK(pmap);
1678}
1679
1680static void
1681pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1682    vm_prot_t prot)
1683{
1684	struct ia64_lpte *pte;
1685	boolean_t managed;
1686
1687	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1688	    (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
1689	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1690	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1691	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1692
1693	if ((pte = pmap_find_pte(va)) == NULL)
1694		return;
1695
1696	if (!pmap_present(pte)) {
1697		/* Enter on the PV list if the page is managed. */
1698		if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1699			if (!pmap_try_insert_pv_entry(pmap, va, m)) {
1700				pmap_free_pte(pte, va);
1701				return;
1702			}
1703			managed = TRUE;
1704		} else
1705			managed = FALSE;
1706
1707		/* Increment counters. */
1708		pmap->pm_stats.resident_count++;
1709
1710		/* Initialise with R/O protection and enter into VHPT. */
1711		pmap_enter_vhpt(pte, va);
1712		pmap_pte_prot(pmap, pte,
1713		    prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1714		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1715
1716		if (prot & VM_PROT_EXECUTE)
1717			ia64_sync_icache(va, PAGE_SIZE);
1718	}
1719}
1720
1721/*
1722 * pmap_object_init_pt preloads the ptes for a given object
1723 * into the specified pmap.  This eliminates the blast of soft
1724 * faults on process startup and immediately after an mmap.
1725 */
1726void
1727pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1728		    vm_object_t object, vm_pindex_t pindex,
1729		    vm_size_t size)
1730{
1731
1732	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1733	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1734	    ("pmap_object_init_pt: non-device object"));
1735}
1736
1737/*
1738 *	Routine:	pmap_change_wiring
1739 *	Function:	Change the wiring attribute for a map/virtual-address
1740 *			pair.
1741 *	In/out conditions:
1742 *			The mapping must already exist in the pmap.
1743 */
1744void
1745pmap_change_wiring(pmap, va, wired)
1746	register pmap_t pmap;
1747	vm_offset_t va;
1748	boolean_t wired;
1749{
1750	pmap_t oldpmap;
1751	struct ia64_lpte *pte;
1752
1753	PMAP_LOCK(pmap);
1754	oldpmap = pmap_switch(pmap);
1755
1756	pte = pmap_find_vhpt(va);
1757	KASSERT(pte != NULL, ("pte"));
1758	if (wired && !pmap_wired(pte)) {
1759		pmap->pm_stats.wired_count++;
1760		pmap_set_wired(pte);
1761	} else if (!wired && pmap_wired(pte)) {
1762		pmap->pm_stats.wired_count--;
1763		pmap_clear_wired(pte);
1764	}
1765
1766	pmap_switch(oldpmap);
1767	PMAP_UNLOCK(pmap);
1768}
1769
1770
1771
1772/*
1773 *	Copy the range specified by src_addr/len
1774 *	from the source map to the range dst_addr/len
1775 *	in the destination map.
1776 *
1777 *	This routine is only advisory and need not do anything.
1778 */
1779
1780void
1781pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
1782	  vm_offset_t src_addr)
1783{
1784}
1785
1786
1787/*
1788 *	pmap_zero_page zeros the specified hardware page by
1789 *	mapping it into virtual memory and using bzero to clear
1790 *	its contents.
1791 */
1792
1793void
1794pmap_zero_page(vm_page_t m)
1795{
1796	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1797	bzero((caddr_t) va, PAGE_SIZE);
1798}
1799
1800
1801/*
1802 *	pmap_zero_page_area zeros the specified hardware page by
1803 *	mapping it into virtual memory and using bzero to clear
1804 *	its contents.
1805 *
1806 *	off and size must reside within a single page.
1807 */
1808
1809void
1810pmap_zero_page_area(vm_page_t m, int off, int size)
1811{
1812	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1813	bzero((char *)(caddr_t)va + off, size);
1814}
1815
1816
1817/*
1818 *	pmap_zero_page_idle zeros the specified hardware page by
1819 *	mapping it into virtual memory and using bzero to clear
1820 *	its contents.  This is for the vm_idlezero process.
1821 */
1822
1823void
1824pmap_zero_page_idle(vm_page_t m)
1825{
1826	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1827	bzero((caddr_t) va, PAGE_SIZE);
1828}
1829
1830
1831/*
1832 *	pmap_copy_page copies the specified (machine independent)
1833 *	page by mapping the page into virtual memory and using
1834 *	bcopy to copy the page, one machine dependent page at a
1835 *	time.
1836 */
1837void
1838pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1839{
1840	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
1841	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
1842	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
1843}
1844
1845/*
1846 * Returns true if the pmap's pv is one of the first
1847 * 16 pvs linked to from this page.  This count may
1848 * be changed upwards or downwards in the future; it
1849 * is only necessary that true be returned for a small
1850 * subset of pmaps for proper page aging.
1851 */
1852boolean_t
1853pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
1854{
1855	pv_entry_t pv;
1856	int loops = 0;
1857	boolean_t rv;
1858
1859	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
1860	    ("pmap_page_exists_quick: page %p is not managed", m));
1861	rv = FALSE;
1862	vm_page_lock_queues();
1863	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1864		if (pv->pv_pmap == pmap) {
1865			rv = TRUE;
1866			break;
1867		}
1868		loops++;
1869		if (loops >= 16)
1870			break;
1871	}
1872	vm_page_unlock_queues();
1873	return (rv);
1874}
1875
1876/*
1877 *	pmap_page_wired_mappings:
1878 *
1879 *	Return the number of managed mappings to the given physical page
1880 *	that are wired.
1881 */
1882int
1883pmap_page_wired_mappings(vm_page_t m)
1884{
1885	struct ia64_lpte *pte;
1886	pmap_t oldpmap, pmap;
1887	pv_entry_t pv;
1888	int count;
1889
1890	count = 0;
1891	if ((m->flags & PG_FICTITIOUS) != 0)
1892		return (count);
1893	vm_page_lock_queues();
1894	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1895		pmap = pv->pv_pmap;
1896		PMAP_LOCK(pmap);
1897		oldpmap = pmap_switch(pmap);
1898		pte = pmap_find_vhpt(pv->pv_va);
1899		KASSERT(pte != NULL, ("pte"));
1900		if (pmap_wired(pte))
1901			count++;
1902		pmap_switch(oldpmap);
1903		PMAP_UNLOCK(pmap);
1904	}
1905	vm_page_unlock_queues();
1906	return (count);
1907}
1908
1909/*
1910 * Remove all pages from specified address space
1911 * this aids process exit speeds.  Also, this code
1912 * is special cased for current process only, but
1913 * can have the more generic (and slightly slower)
1914 * mode enabled.  This is much faster than pmap_remove
1915 * in the case of running down an entire address space.
1916 */
1917void
1918pmap_remove_pages(pmap_t pmap)
1919{
1920	pmap_t oldpmap;
1921	pv_entry_t pv, npv;
1922
1923	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
1924		printf("warning: %s called with non-current pmap\n",
1925		    __func__);
1926		return;
1927	}
1928
1929	vm_page_lock_queues();
1930	PMAP_LOCK(pmap);
1931	oldpmap = pmap_switch(pmap);
1932
1933	for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
1934		struct ia64_lpte *pte;
1935
1936		npv = TAILQ_NEXT(pv, pv_plist);
1937
1938		pte = pmap_find_vhpt(pv->pv_va);
1939		KASSERT(pte != NULL, ("pte"));
1940		if (!pmap_wired(pte))
1941			pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
1942	}
1943
1944	pmap_switch(oldpmap);
1945	PMAP_UNLOCK(pmap);
1946	vm_page_unlock_queues();
1947}
1948
1949/*
1950 *	pmap_ts_referenced:
1951 *
1952 *	Return a count of reference bits for a page, clearing those bits.
1953 *	It is not necessary for every reference bit to be cleared, but it
1954 *	is necessary that 0 only be returned when there are truly no
1955 *	reference bits set.
1956 *
1957 *	XXX: The exact number of bits to check and clear is a matter that
1958 *	should be tested and standardized at some point in the future for
1959 *	optimal aging of shared pages.
1960 */
1961int
1962pmap_ts_referenced(vm_page_t m)
1963{
1964	struct ia64_lpte *pte;
1965	pmap_t oldpmap;
1966	pv_entry_t pv;
1967	int count = 0;
1968
1969	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
1970	    ("pmap_ts_referenced: page %p is not managed", m));
1971	vm_page_lock_queues();
1972	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1973		PMAP_LOCK(pv->pv_pmap);
1974		oldpmap = pmap_switch(pv->pv_pmap);
1975		pte = pmap_find_vhpt(pv->pv_va);
1976		KASSERT(pte != NULL, ("pte"));
1977		if (pmap_accessed(pte)) {
1978			count++;
1979			pmap_clear_accessed(pte);
1980			pmap_invalidate_page(pv->pv_va);
1981		}
1982		pmap_switch(oldpmap);
1983		PMAP_UNLOCK(pv->pv_pmap);
1984	}
1985	vm_page_unlock_queues();
1986	return (count);
1987}
1988
1989/*
1990 *	pmap_is_modified:
1991 *
1992 *	Return whether or not the specified physical page was modified
1993 *	in any physical maps.
1994 */
1995boolean_t
1996pmap_is_modified(vm_page_t m)
1997{
1998	struct ia64_lpte *pte;
1999	pmap_t oldpmap;
2000	pv_entry_t pv;
2001	boolean_t rv;
2002
2003	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2004	    ("pmap_is_modified: page %p is not managed", m));
2005	rv = FALSE;
2006
2007	/*
2008	 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be
2009	 * concurrently set while the object is locked.  Thus, if PG_WRITEABLE
2010	 * is clear, no PTEs can be dirty.
2011	 */
2012	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2013	if ((m->oflags & VPO_BUSY) == 0 &&
2014	    (m->flags & PG_WRITEABLE) == 0)
2015		return (rv);
2016	vm_page_lock_queues();
2017	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2018		PMAP_LOCK(pv->pv_pmap);
2019		oldpmap = pmap_switch(pv->pv_pmap);
2020		pte = pmap_find_vhpt(pv->pv_va);
2021		pmap_switch(oldpmap);
2022		KASSERT(pte != NULL, ("pte"));
2023		rv = pmap_dirty(pte) ? TRUE : FALSE;
2024		PMAP_UNLOCK(pv->pv_pmap);
2025		if (rv)
2026			break;
2027	}
2028	vm_page_unlock_queues();
2029	return (rv);
2030}
2031
2032/*
2033 *	pmap_is_prefaultable:
2034 *
2035 *	Return whether or not the specified virtual address is elgible
2036 *	for prefault.
2037 */
2038boolean_t
2039pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2040{
2041	struct ia64_lpte *pte;
2042
2043	pte = pmap_find_vhpt(addr);
2044	if (pte != NULL && pmap_present(pte))
2045		return (FALSE);
2046	return (TRUE);
2047}
2048
2049/*
2050 *	pmap_is_referenced:
2051 *
2052 *	Return whether or not the specified physical page was referenced
2053 *	in any physical maps.
2054 */
2055boolean_t
2056pmap_is_referenced(vm_page_t m)
2057{
2058	struct ia64_lpte *pte;
2059	pmap_t oldpmap;
2060	pv_entry_t pv;
2061	boolean_t rv;
2062
2063	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2064	    ("pmap_is_referenced: page %p is not managed", m));
2065	rv = FALSE;
2066	vm_page_lock_queues();
2067	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2068		PMAP_LOCK(pv->pv_pmap);
2069		oldpmap = pmap_switch(pv->pv_pmap);
2070		pte = pmap_find_vhpt(pv->pv_va);
2071		pmap_switch(oldpmap);
2072		KASSERT(pte != NULL, ("pte"));
2073		rv = pmap_accessed(pte) ? TRUE : FALSE;
2074		PMAP_UNLOCK(pv->pv_pmap);
2075		if (rv)
2076			break;
2077	}
2078	vm_page_unlock_queues();
2079	return (rv);
2080}
2081
2082/*
2083 *	Clear the modify bits on the specified physical page.
2084 */
2085void
2086pmap_clear_modify(vm_page_t m)
2087{
2088	struct ia64_lpte *pte;
2089	pmap_t oldpmap;
2090	pv_entry_t pv;
2091
2092	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2093	    ("pmap_clear_modify: page %p is not managed", m));
2094	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2095	KASSERT((m->oflags & VPO_BUSY) == 0,
2096	    ("pmap_clear_modify: page %p is busy", m));
2097
2098	/*
2099	 * If the page is not PG_WRITEABLE, then no PTEs can be modified.
2100	 * If the object containing the page is locked and the page is not
2101	 * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set.
2102	 */
2103	if ((m->flags & PG_WRITEABLE) == 0)
2104		return;
2105	vm_page_lock_queues();
2106	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2107		PMAP_LOCK(pv->pv_pmap);
2108		oldpmap = pmap_switch(pv->pv_pmap);
2109		pte = pmap_find_vhpt(pv->pv_va);
2110		KASSERT(pte != NULL, ("pte"));
2111		if (pmap_dirty(pte)) {
2112			pmap_clear_dirty(pte);
2113			pmap_invalidate_page(pv->pv_va);
2114		}
2115		pmap_switch(oldpmap);
2116		PMAP_UNLOCK(pv->pv_pmap);
2117	}
2118	vm_page_unlock_queues();
2119}
2120
2121/*
2122 *	pmap_clear_reference:
2123 *
2124 *	Clear the reference bit on the specified physical page.
2125 */
2126void
2127pmap_clear_reference(vm_page_t m)
2128{
2129	struct ia64_lpte *pte;
2130	pmap_t oldpmap;
2131	pv_entry_t pv;
2132
2133	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2134	    ("pmap_clear_reference: page %p is not managed", m));
2135	vm_page_lock_queues();
2136	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2137		PMAP_LOCK(pv->pv_pmap);
2138		oldpmap = pmap_switch(pv->pv_pmap);
2139		pte = pmap_find_vhpt(pv->pv_va);
2140		KASSERT(pte != NULL, ("pte"));
2141		if (pmap_accessed(pte)) {
2142			pmap_clear_accessed(pte);
2143			pmap_invalidate_page(pv->pv_va);
2144		}
2145		pmap_switch(oldpmap);
2146		PMAP_UNLOCK(pv->pv_pmap);
2147	}
2148	vm_page_unlock_queues();
2149}
2150
2151/*
2152 * Clear the write and modified bits in each of the given page's mappings.
2153 */
2154void
2155pmap_remove_write(vm_page_t m)
2156{
2157	struct ia64_lpte *pte;
2158	pmap_t oldpmap, pmap;
2159	pv_entry_t pv;
2160	vm_prot_t prot;
2161
2162	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2163	    ("pmap_remove_write: page %p is not managed", m));
2164
2165	/*
2166	 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by
2167	 * another thread while the object is locked.  Thus, if PG_WRITEABLE
2168	 * is clear, no page table entries need updating.
2169	 */
2170	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2171	if ((m->oflags & VPO_BUSY) == 0 &&
2172	    (m->flags & PG_WRITEABLE) == 0)
2173		return;
2174	vm_page_lock_queues();
2175	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2176		pmap = pv->pv_pmap;
2177		PMAP_LOCK(pmap);
2178		oldpmap = pmap_switch(pmap);
2179		pte = pmap_find_vhpt(pv->pv_va);
2180		KASSERT(pte != NULL, ("pte"));
2181		prot = pmap_prot(pte);
2182		if ((prot & VM_PROT_WRITE) != 0) {
2183			if (pmap_dirty(pte)) {
2184				vm_page_dirty(m);
2185				pmap_clear_dirty(pte);
2186			}
2187			prot &= ~VM_PROT_WRITE;
2188			pmap_pte_prot(pmap, pte, prot);
2189			pmap_invalidate_page(pv->pv_va);
2190		}
2191		pmap_switch(oldpmap);
2192		PMAP_UNLOCK(pmap);
2193	}
2194	vm_page_flag_clear(m, PG_WRITEABLE);
2195	vm_page_unlock_queues();
2196}
2197
2198/*
2199 * Map a set of physical memory pages into the kernel virtual
2200 * address space. Return a pointer to where it is mapped. This
2201 * routine is intended to be used for mapping device memory,
2202 * NOT real memory.
2203 */
2204void *
2205pmap_mapdev(vm_paddr_t pa, vm_size_t size)
2206{
2207	vm_offset_t va;
2208
2209	va = pa | IA64_RR_BASE(6);
2210	return ((void *)va);
2211}
2212
2213/*
2214 * 'Unmap' a range mapped by pmap_mapdev().
2215 */
2216void
2217pmap_unmapdev(vm_offset_t va, vm_size_t size)
2218{
2219}
2220
2221/*
2222 * perform the pmap work for mincore
2223 */
2224int
2225pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
2226{
2227	pmap_t oldpmap;
2228	struct ia64_lpte *pte, tpte;
2229	vm_paddr_t pa;
2230	int val;
2231
2232	PMAP_LOCK(pmap);
2233retry:
2234	oldpmap = pmap_switch(pmap);
2235	pte = pmap_find_vhpt(addr);
2236	if (pte != NULL) {
2237		tpte = *pte;
2238		pte = &tpte;
2239	}
2240	pmap_switch(oldpmap);
2241	if (pte == NULL || !pmap_present(pte)) {
2242		val = 0;
2243		goto out;
2244	}
2245	val = MINCORE_INCORE;
2246	if (pmap_dirty(pte))
2247		val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
2248	if (pmap_accessed(pte))
2249		val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
2250	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
2251	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
2252	    pmap_managed(pte)) {
2253		pa = pmap_ppn(pte);
2254		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
2255		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
2256			goto retry;
2257	} else
2258out:
2259		PA_UNLOCK_COND(*locked_pa);
2260	PMAP_UNLOCK(pmap);
2261	return (val);
2262}
2263
2264void
2265pmap_activate(struct thread *td)
2266{
2267	pmap_switch(vmspace_pmap(td->td_proc->p_vmspace));
2268}
2269
2270pmap_t
2271pmap_switch(pmap_t pm)
2272{
2273	pmap_t prevpm;
2274	int i;
2275
2276	critical_enter();
2277	prevpm = PCPU_GET(md.current_pmap);
2278	if (prevpm == pm)
2279		goto out;
2280	if (pm == NULL) {
2281		for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2282			ia64_set_rr(IA64_RR_BASE(i),
2283			    (i << 8)|(PAGE_SHIFT << 2)|1);
2284		}
2285	} else {
2286		for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2287			ia64_set_rr(IA64_RR_BASE(i),
2288			    (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2289		}
2290	}
2291	PCPU_SET(md.current_pmap, pm);
2292	ia64_srlz_d();
2293
2294out:
2295	critical_exit();
2296	return (prevpm);
2297}
2298
2299void
2300pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2301{
2302	pmap_t oldpm;
2303	struct ia64_lpte *pte;
2304	vm_offset_t lim;
2305	vm_size_t len;
2306
2307	sz += va & 31;
2308	va &= ~31;
2309	sz = (sz + 31) & ~31;
2310
2311	PMAP_LOCK(pm);
2312	oldpm = pmap_switch(pm);
2313	while (sz > 0) {
2314		lim = round_page(va);
2315		len = MIN(lim - va, sz);
2316		pte = pmap_find_vhpt(va);
2317		if (pte != NULL && pmap_present(pte))
2318			ia64_sync_icache(va, len);
2319		va += len;
2320		sz -= len;
2321	}
2322	pmap_switch(oldpm);
2323	PMAP_UNLOCK(pm);
2324}
2325
2326/*
2327 *	Increase the starting virtual address of the given mapping if a
2328 *	different alignment might result in more superpage mappings.
2329 */
2330void
2331pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2332    vm_offset_t *addr, vm_size_t size)
2333{
2334}
2335
2336#include "opt_ddb.h"
2337
2338#ifdef DDB
2339
2340#include <ddb/ddb.h>
2341
2342static const char*	psnames[] = {
2343	"1B",	"2B",	"4B",	"8B",
2344	"16B",	"32B",	"64B",	"128B",
2345	"256B",	"512B",	"1K",	"2K",
2346	"4K",	"8K",	"16K",	"32K",
2347	"64K",	"128K",	"256K",	"512K",
2348	"1M",	"2M",	"4M",	"8M",
2349	"16M",	"32M",	"64M",	"128M",
2350	"256M",	"512M",	"1G",	"2G"
2351};
2352
2353static void
2354print_trs(int type)
2355{
2356	struct ia64_pal_result res;
2357	int i, maxtr;
2358	struct {
2359		pt_entry_t	pte;
2360		uint64_t	itir;
2361		uint64_t	ifa;
2362		struct ia64_rr	rr;
2363	} buf;
2364	static const char *manames[] = {
2365		"WB",	"bad",	"bad",	"bad",
2366		"UC",	"UCE",	"WC",	"NaT",
2367	};
2368
2369	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2370	if (res.pal_status != 0) {
2371		db_printf("Can't get VM summary\n");
2372		return;
2373	}
2374
2375	if (type == 0)
2376		maxtr = (res.pal_result[0] >> 40) & 0xff;
2377	else
2378		maxtr = (res.pal_result[0] >> 32) & 0xff;
2379
2380	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2381	for (i = 0; i <= maxtr; i++) {
2382		bzero(&buf, sizeof(buf));
2383		res = ia64_pal_physical(PAL_VM_TR_READ, i, type,
2384		    ia64_tpa((uint64_t)&buf));
2385		if (!(res.pal_result[0] & 1))
2386			buf.pte &= ~PTE_AR_MASK;
2387		if (!(res.pal_result[0] & 2))
2388			buf.pte &= ~PTE_PL_MASK;
2389		if (!(res.pal_result[0] & 4))
2390			pmap_clear_dirty(&buf);
2391		if (!(res.pal_result[0] & 8))
2392			buf.pte &= ~PTE_MA_MASK;
2393		db_printf("%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s "
2394		    "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2395		    buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2396		    psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2397		    (buf.pte & PTE_ED) ? 1 : 0,
2398		    (int)(buf.pte & PTE_AR_MASK) >> 9,
2399		    (int)(buf.pte & PTE_PL_MASK) >> 7,
2400		    (pmap_dirty(&buf)) ? 1 : 0,
2401		    (pmap_accessed(&buf)) ? 1 : 0,
2402		    manames[(buf.pte & PTE_MA_MASK) >> 2],
2403		    (pmap_present(&buf)) ? 1 : 0,
2404		    (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2405	}
2406}
2407
2408DB_COMMAND(itr, db_itr)
2409{
2410	print_trs(0);
2411}
2412
2413DB_COMMAND(dtr, db_dtr)
2414{
2415	print_trs(1);
2416}
2417
2418DB_COMMAND(rr, db_rr)
2419{
2420	int i;
2421	uint64_t t;
2422	struct ia64_rr rr;
2423
2424	printf("RR RID    PgSz VE\n");
2425	for (i = 0; i < 8; i++) {
2426		__asm __volatile ("mov %0=rr[%1]"
2427				  : "=r"(t)
2428				  : "r"(IA64_RR_BASE(i)));
2429		*(uint64_t *) &rr = t;
2430		printf("%d  %06x %4s %d\n",
2431		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2432	}
2433}
2434
2435DB_COMMAND(thash, db_thash)
2436{
2437	if (!have_addr)
2438		return;
2439
2440	db_printf("%p\n", (void *) ia64_thash(addr));
2441}
2442
2443DB_COMMAND(ttag, db_ttag)
2444{
2445	if (!have_addr)
2446		return;
2447
2448	db_printf("0x%lx\n", ia64_ttag(addr));
2449}
2450
2451DB_COMMAND(kpte, db_kpte)
2452{
2453	struct ia64_lpte *pte;
2454
2455	if (!have_addr) {
2456		db_printf("usage: kpte <kva>\n");
2457		return;
2458	}
2459	if (addr < VM_MIN_KERNEL_ADDRESS) {
2460		db_printf("kpte: error: invalid <kva>\n");
2461		return;
2462	}
2463	pte = pmap_find_kpte(addr);
2464	db_printf("kpte at %p:\n", pte);
2465	db_printf("  pte  =%016lx\n", pte->pte);
2466	db_printf("  itir =%016lx\n", pte->itir);
2467	db_printf("  tag  =%016lx\n", pte->tag);
2468	db_printf("  chain=%016lx\n", pte->chain);
2469}
2470
2471#endif
2472