pmap.c revision 223170
1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 */
47
48#include <sys/cdefs.h>
49__FBSDID("$FreeBSD: head/sys/ia64/ia64/pmap.c 223170 2011-06-17 04:26:03Z marcel $");
50
51#include <sys/param.h>
52#include <sys/kernel.h>
53#include <sys/lock.h>
54#include <sys/mman.h>
55#include <sys/mutex.h>
56#include <sys/proc.h>
57#include <sys/smp.h>
58#include <sys/sysctl.h>
59#include <sys/systm.h>
60
61#include <vm/vm.h>
62#include <vm/vm_page.h>
63#include <vm/vm_map.h>
64#include <vm/vm_object.h>
65#include <vm/vm_pageout.h>
66#include <vm/uma.h>
67
68#include <machine/bootinfo.h>
69#include <machine/md_var.h>
70#include <machine/pal.h>
71
72/*
73 *	Manages physical address maps.
74 *
75 *	In addition to hardware address maps, this
76 *	module is called upon to provide software-use-only
77 *	maps which may or may not be stored in the same
78 *	form as hardware maps.  These pseudo-maps are
79 *	used to store intermediate results from copy
80 *	operations to and from address spaces.
81 *
82 *	Since the information managed by this module is
83 *	also stored by the logical address mapping module,
84 *	this module may throw away valid virtual-to-physical
85 *	mappings at almost any time.  However, invalidations
86 *	of virtual-to-physical mappings must be done as
87 *	requested.
88 *
89 *	In order to cope with hardware architectures which
90 *	make virtual-to-physical map invalidates expensive,
91 *	this module may delay invalidate or reduced protection
92 *	operations until such time as they are actually
93 *	necessary.  This module is given full information as
94 *	to which processors are currently using which maps,
95 *	and to when physical maps must be made correct.
96 */
97
98/*
99 * Following the Linux model, region IDs are allocated in groups of
100 * eight so that a single region ID can be used for as many RRs as we
101 * want by encoding the RR number into the low bits of the ID.
102 *
103 * We reserve region ID 0 for the kernel and allocate the remaining
104 * IDs for user pmaps.
105 *
106 * Region 0-3:	User virtually mapped
107 * Region 4:	PBVM and special mappings
108 * Region 5:	Kernel virtual memory
109 * Region 6:	Direct-mapped uncacheable
110 * Region 7:	Direct-mapped cacheable
111 */
112
113/* XXX move to a header. */
114extern uint64_t ia64_gateway_page[];
115
116#ifndef PMAP_SHPGPERPROC
117#define PMAP_SHPGPERPROC 200
118#endif
119
120#if !defined(DIAGNOSTIC)
121#define PMAP_INLINE __inline
122#else
123#define PMAP_INLINE
124#endif
125
126#define	pmap_accessed(lpte)		((lpte)->pte & PTE_ACCESSED)
127#define	pmap_dirty(lpte)		((lpte)->pte & PTE_DIRTY)
128#define	pmap_exec(lpte)			((lpte)->pte & PTE_AR_RX)
129#define	pmap_managed(lpte)		((lpte)->pte & PTE_MANAGED)
130#define	pmap_ppn(lpte)			((lpte)->pte & PTE_PPN_MASK)
131#define	pmap_present(lpte)		((lpte)->pte & PTE_PRESENT)
132#define	pmap_prot(lpte)			(((lpte)->pte & PTE_PROT_MASK) >> 56)
133#define	pmap_wired(lpte)		((lpte)->pte & PTE_WIRED)
134
135#define	pmap_clear_accessed(lpte)	(lpte)->pte &= ~PTE_ACCESSED
136#define	pmap_clear_dirty(lpte)		(lpte)->pte &= ~PTE_DIRTY
137#define	pmap_clear_present(lpte)	(lpte)->pte &= ~PTE_PRESENT
138#define	pmap_clear_wired(lpte)		(lpte)->pte &= ~PTE_WIRED
139
140#define	pmap_set_wired(lpte)		(lpte)->pte |= PTE_WIRED
141
142/*
143 * The VHPT bucket head structure.
144 */
145struct ia64_bucket {
146	uint64_t	chain;
147	struct mtx	mutex;
148	u_int		length;
149};
150
151/*
152 * Statically allocated kernel pmap
153 */
154struct pmap kernel_pmap_store;
155
156vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
157vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
158
159/*
160 * Kernel virtual memory management.
161 */
162static int nkpt;
163extern struct ia64_lpte ***ia64_kptdir;
164
165#define KPTE_DIR0_INDEX(va) \
166	(((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1))
167#define KPTE_DIR1_INDEX(va) \
168	(((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
169#define KPTE_PTE_INDEX(va) \
170	(((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
171#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
172
173vm_offset_t kernel_vm_end;
174
175/* Values for ptc.e. XXX values for SKI. */
176static uint64_t pmap_ptc_e_base = 0x100000000;
177static uint64_t pmap_ptc_e_count1 = 3;
178static uint64_t pmap_ptc_e_count2 = 2;
179static uint64_t pmap_ptc_e_stride1 = 0x2000;
180static uint64_t pmap_ptc_e_stride2 = 0x100000000;
181
182extern volatile u_long pmap_ptc_g_sem;
183
184/*
185 * Data for the RID allocator
186 */
187static int pmap_ridcount;
188static int pmap_rididx;
189static int pmap_ridmapsz;
190static int pmap_ridmax;
191static uint64_t *pmap_ridmap;
192struct mtx pmap_ridmutex;
193
194/*
195 * Data for the pv entry allocation mechanism
196 */
197static uma_zone_t pvzone;
198static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
199
200/*
201 * Data for allocating PTEs for user processes.
202 */
203static uma_zone_t ptezone;
204
205/*
206 * Virtual Hash Page Table (VHPT) data.
207 */
208/* SYSCTL_DECL(_machdep); */
209SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
210
211struct ia64_bucket *pmap_vhpt_bucket;
212
213int pmap_vhpt_nbuckets;
214SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
215    &pmap_vhpt_nbuckets, 0, "");
216
217int pmap_vhpt_log2size = 0;
218TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
219SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
220    &pmap_vhpt_log2size, 0, "");
221
222static int pmap_vhpt_inserts;
223SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
224    &pmap_vhpt_inserts, 0, "");
225
226static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
227SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
228    NULL, 0, pmap_vhpt_population, "I", "");
229
230static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
231
232static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
233static pv_entry_t get_pv_entry(pmap_t locked_pmap);
234
235static void	pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
236		    vm_page_t m, vm_prot_t prot);
237static void	pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va);
238static void	pmap_invalidate_all(void);
239static int	pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
240		    vm_offset_t va, pv_entry_t pv, int freepte);
241static int	pmap_remove_vhpt(vm_offset_t va);
242static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
243		    vm_page_t m);
244
245vm_offset_t
246pmap_steal_memory(vm_size_t size)
247{
248	vm_size_t bank_size;
249	vm_offset_t pa, va;
250
251	size = round_page(size);
252
253	bank_size = phys_avail[1] - phys_avail[0];
254	while (size > bank_size) {
255		int i;
256		for (i = 0; phys_avail[i+2]; i+= 2) {
257			phys_avail[i] = phys_avail[i+2];
258			phys_avail[i+1] = phys_avail[i+3];
259		}
260		phys_avail[i] = 0;
261		phys_avail[i+1] = 0;
262		if (!phys_avail[0])
263			panic("pmap_steal_memory: out of memory");
264		bank_size = phys_avail[1] - phys_avail[0];
265	}
266
267	pa = phys_avail[0];
268	phys_avail[0] += size;
269
270	va = IA64_PHYS_TO_RR7(pa);
271	bzero((caddr_t) va, size);
272	return va;
273}
274
275static void
276pmap_initialize_vhpt(vm_offset_t vhpt)
277{
278	struct ia64_lpte *pte;
279	u_int i;
280
281	pte = (struct ia64_lpte *)vhpt;
282	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
283		pte[i].pte = 0;
284		pte[i].itir = 0;
285		pte[i].tag = 1UL << 63; /* Invalid tag */
286		pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
287	}
288}
289
290#ifdef SMP
291MALLOC_DECLARE(M_SMP);
292
293vm_offset_t
294pmap_alloc_vhpt(void)
295{
296	vm_offset_t vhpt;
297	vm_size_t size;
298
299	size = 1UL << pmap_vhpt_log2size;
300	vhpt = (uintptr_t)contigmalloc(size, M_SMP, 0, 0UL, ~0UL, size, 0UL);
301	if (vhpt != 0) {
302		vhpt = IA64_PHYS_TO_RR7(ia64_tpa(vhpt));
303		pmap_initialize_vhpt(vhpt);
304	}
305	return (vhpt);
306}
307#endif
308
309/*
310 *	Bootstrap the system enough to run with virtual memory.
311 */
312void
313pmap_bootstrap()
314{
315	struct ia64_pal_result res;
316	vm_offset_t base;
317	size_t size;
318	int i, j, count, ridbits;
319
320	/*
321	 * Query the PAL Code to find the loop parameters for the
322	 * ptc.e instruction.
323	 */
324	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
325	if (res.pal_status != 0)
326		panic("Can't configure ptc.e parameters");
327	pmap_ptc_e_base = res.pal_result[0];
328	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
329	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
330	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
331	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
332	if (bootverbose)
333		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
334		       "stride1=0x%lx, stride2=0x%lx\n",
335		       pmap_ptc_e_base,
336		       pmap_ptc_e_count1,
337		       pmap_ptc_e_count2,
338		       pmap_ptc_e_stride1,
339		       pmap_ptc_e_stride2);
340
341	/*
342	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
343	 *
344	 * We currently need at least 19 bits in the RID because PID_MAX
345	 * can only be encoded in 17 bits and we need RIDs for 4 regions
346	 * per process. With PID_MAX equalling 99999 this means that we
347	 * need to be able to encode 399996 (=4*PID_MAX).
348	 * The Itanium processor only has 18 bits and the architected
349	 * minimum is exactly that. So, we cannot use a PID based scheme
350	 * in those cases. Enter pmap_ridmap...
351	 * We should avoid the map when running on a processor that has
352	 * implemented enough bits. This means that we should pass the
353	 * process/thread ID to pmap. This we currently don't do, so we
354	 * use the map anyway. However, we don't want to allocate a map
355	 * that is large enough to cover the range dictated by the number
356	 * of bits in the RID, because that may result in a RID map of
357	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
358	 * The bottomline: we create a 32KB map when the processor only
359	 * implements 18 bits (or when we can't figure it out). Otherwise
360	 * we create a 64KB map.
361	 */
362	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
363	if (res.pal_status != 0) {
364		if (bootverbose)
365			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
366		ridbits = 18; /* guaranteed minimum */
367	} else {
368		ridbits = (res.pal_result[1] >> 8) & 0xff;
369		if (bootverbose)
370			printf("Processor supports %d Region ID bits\n",
371			    ridbits);
372	}
373	if (ridbits > 19)
374		ridbits = 19;
375
376	pmap_ridmax = (1 << ridbits);
377	pmap_ridmapsz = pmap_ridmax / 64;
378	pmap_ridmap = (uint64_t *)pmap_steal_memory(pmap_ridmax / 8);
379	pmap_ridmap[0] |= 0xff;
380	pmap_rididx = 0;
381	pmap_ridcount = 8;
382	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
383
384	/*
385	 * Allocate some memory for initial kernel 'page tables'.
386	 */
387	ia64_kptdir = (void *)pmap_steal_memory(PAGE_SIZE);
388	nkpt = 0;
389	kernel_vm_end = VM_MIN_KERNEL_ADDRESS;
390
391	for (i = 0; phys_avail[i+2]; i+= 2)
392		;
393	count = i+2;
394
395	/*
396	 * Determine a valid (mappable) VHPT size.
397	 */
398	TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
399	if (pmap_vhpt_log2size == 0)
400		pmap_vhpt_log2size = 20;
401	else if (pmap_vhpt_log2size < 16)
402		pmap_vhpt_log2size = 16;
403	else if (pmap_vhpt_log2size > 28)
404		pmap_vhpt_log2size = 28;
405	if (pmap_vhpt_log2size & 1)
406		pmap_vhpt_log2size--;
407
408	base = 0;
409	size = 1UL << pmap_vhpt_log2size;
410	for (i = 0; i < count; i += 2) {
411		base = (phys_avail[i] + size - 1) & ~(size - 1);
412		if (base + size <= phys_avail[i+1])
413			break;
414	}
415	if (!phys_avail[i])
416		panic("Unable to allocate VHPT");
417
418	if (base != phys_avail[i]) {
419		/* Split this region. */
420		for (j = count; j > i; j -= 2) {
421			phys_avail[j] = phys_avail[j-2];
422			phys_avail[j+1] = phys_avail[j-2+1];
423		}
424		phys_avail[i+1] = base;
425		phys_avail[i+2] = base + size;
426	} else
427		phys_avail[i] = base + size;
428
429	base = IA64_PHYS_TO_RR7(base);
430	PCPU_SET(md.vhpt, base);
431	if (bootverbose)
432		printf("VHPT: address=%#lx, size=%#lx\n", base, size);
433
434	pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
435	pmap_vhpt_bucket = (void *)pmap_steal_memory(pmap_vhpt_nbuckets *
436	    sizeof(struct ia64_bucket));
437	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
438		/* Stolen memory is zeroed. */
439		mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
440		    MTX_NOWITNESS | MTX_SPIN);
441	}
442
443	pmap_initialize_vhpt(base);
444	map_vhpt(base);
445	ia64_set_pta(base + (1 << 8) + (pmap_vhpt_log2size << 2) + 1);
446	ia64_srlz_i();
447
448	virtual_avail = VM_MIN_KERNEL_ADDRESS;
449	virtual_end = VM_MAX_KERNEL_ADDRESS;
450
451	/*
452	 * Initialize the kernel pmap (which is statically allocated).
453	 */
454	PMAP_LOCK_INIT(kernel_pmap);
455	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
456		kernel_pmap->pm_rid[i] = 0;
457	TAILQ_INIT(&kernel_pmap->pm_pvlist);
458	PCPU_SET(md.current_pmap, kernel_pmap);
459
460	/* Region 5 is mapped via the VHPT. */
461	ia64_set_rr(IA64_RR_BASE(5), (5 << 8) | (PAGE_SHIFT << 2) | 1);
462
463	/*
464	 * Clear out any random TLB entries left over from booting.
465	 */
466	pmap_invalidate_all();
467
468	map_gateway_page();
469}
470
471static int
472pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
473{
474	int count, error, i;
475
476	count = 0;
477	for (i = 0; i < pmap_vhpt_nbuckets; i++)
478		count += pmap_vhpt_bucket[i].length;
479
480	error = SYSCTL_OUT(req, &count, sizeof(count));
481	return (error);
482}
483
484/*
485 *	Initialize a vm_page's machine-dependent fields.
486 */
487void
488pmap_page_init(vm_page_t m)
489{
490
491	TAILQ_INIT(&m->md.pv_list);
492	m->md.pv_list_count = 0;
493}
494
495/*
496 *	Initialize the pmap module.
497 *	Called by vm_init, to initialize any structures that the pmap
498 *	system needs to map virtual memory.
499 */
500void
501pmap_init(void)
502{
503	int shpgperproc = PMAP_SHPGPERPROC;
504
505	/*
506	 * Initialize the address space (zone) for the pv entries.  Set a
507	 * high water mark so that the system can recover from excessive
508	 * numbers of pv entries.
509	 */
510	pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
511	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
512	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
513	pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
514	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
515	pv_entry_high_water = 9 * (pv_entry_max / 10);
516
517	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
518	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
519}
520
521
522/***************************************************
523 * Manipulate TLBs for a pmap
524 ***************************************************/
525
526static void
527pmap_invalidate_page(vm_offset_t va)
528{
529	struct ia64_lpte *pte;
530	struct pcpu *pc;
531	uint64_t tag, sem;
532	register_t is;
533	u_int vhpt_ofs;
534
535	critical_enter();
536	vhpt_ofs = ia64_thash(va) - PCPU_GET(md.vhpt);
537	tag = ia64_ttag(va);
538	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
539		pte = (struct ia64_lpte *)(pc->pc_md.vhpt + vhpt_ofs);
540		atomic_cmpset_64(&pte->tag, tag, 1UL << 63);
541	}
542
543	/* PTC.G enter exclusive */
544	is = intr_disable();
545
546	/* Atomically assert writer after all writers have gone. */
547	do {
548		/* Wait until there's no more writer. */
549		do {
550			sem = atomic_load_acq_long(&pmap_ptc_g_sem);
551			tag = sem | (1ul << 63);
552		} while (sem == tag);
553	} while (!atomic_cmpset_rel_long(&pmap_ptc_g_sem, sem, tag));
554
555	/* Wait until all readers are gone. */
556	tag = (1ul << 63);
557	do {
558		sem = atomic_load_acq_long(&pmap_ptc_g_sem);
559	} while (sem != tag);
560
561	ia64_ptc_ga(va, PAGE_SHIFT << 2);
562	ia64_mf();
563	ia64_srlz_i();
564
565	/* PTC.G leave exclusive */
566	atomic_store_rel_long(&pmap_ptc_g_sem, 0);
567
568	ia64_invala();
569
570	intr_restore(is);
571	critical_exit();
572}
573
574static void
575pmap_invalidate_all_1(void *arg)
576{
577	uint64_t addr;
578	int i, j;
579
580	critical_enter();
581	addr = pmap_ptc_e_base;
582	for (i = 0; i < pmap_ptc_e_count1; i++) {
583		for (j = 0; j < pmap_ptc_e_count2; j++) {
584			ia64_ptc_e(addr);
585			addr += pmap_ptc_e_stride2;
586		}
587		addr += pmap_ptc_e_stride1;
588	}
589	critical_exit();
590}
591
592static void
593pmap_invalidate_all(void)
594{
595
596#ifdef SMP
597	if (mp_ncpus > 1) {
598		smp_rendezvous(NULL, pmap_invalidate_all_1, NULL, NULL);
599		return;
600	}
601#endif
602	pmap_invalidate_all_1(NULL);
603}
604
605static uint32_t
606pmap_allocate_rid(void)
607{
608	uint64_t bit, bits;
609	int rid;
610
611	mtx_lock(&pmap_ridmutex);
612	if (pmap_ridcount == pmap_ridmax)
613		panic("pmap_allocate_rid: All Region IDs used");
614
615	/* Find an index with a free bit. */
616	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
617		pmap_rididx++;
618		if (pmap_rididx == pmap_ridmapsz)
619			pmap_rididx = 0;
620	}
621	rid = pmap_rididx * 64;
622
623	/* Find a free bit. */
624	bit = 1UL;
625	while (bits & bit) {
626		rid++;
627		bit <<= 1;
628	}
629
630	pmap_ridmap[pmap_rididx] |= bit;
631	pmap_ridcount++;
632	mtx_unlock(&pmap_ridmutex);
633
634	return rid;
635}
636
637static void
638pmap_free_rid(uint32_t rid)
639{
640	uint64_t bit;
641	int idx;
642
643	idx = rid / 64;
644	bit = ~(1UL << (rid & 63));
645
646	mtx_lock(&pmap_ridmutex);
647	pmap_ridmap[idx] &= bit;
648	pmap_ridcount--;
649	mtx_unlock(&pmap_ridmutex);
650}
651
652/***************************************************
653 * Page table page management routines.....
654 ***************************************************/
655
656void
657pmap_pinit0(struct pmap *pmap)
658{
659	/* kernel_pmap is the same as any other pmap. */
660	pmap_pinit(pmap);
661}
662
663/*
664 * Initialize a preallocated and zeroed pmap structure,
665 * such as one in a vmspace structure.
666 */
667int
668pmap_pinit(struct pmap *pmap)
669{
670	int i;
671
672	PMAP_LOCK_INIT(pmap);
673	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
674		pmap->pm_rid[i] = pmap_allocate_rid();
675	TAILQ_INIT(&pmap->pm_pvlist);
676	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
677	return (1);
678}
679
680/***************************************************
681 * Pmap allocation/deallocation routines.
682 ***************************************************/
683
684/*
685 * Release any resources held by the given physical map.
686 * Called when a pmap initialized by pmap_pinit is being released.
687 * Should only be called if the map contains no valid mappings.
688 */
689void
690pmap_release(pmap_t pmap)
691{
692	int i;
693
694	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
695		if (pmap->pm_rid[i])
696			pmap_free_rid(pmap->pm_rid[i]);
697	PMAP_LOCK_DESTROY(pmap);
698}
699
700/*
701 * grow the number of kernel page table entries, if needed
702 */
703void
704pmap_growkernel(vm_offset_t addr)
705{
706	struct ia64_lpte **dir1;
707	struct ia64_lpte *leaf;
708	vm_page_t nkpg;
709
710	while (kernel_vm_end <= addr) {
711		if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64)
712			panic("%s: out of kernel address space", __func__);
713
714		dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)];
715		if (dir1 == NULL) {
716			nkpg = vm_page_alloc(NULL, nkpt++,
717			    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
718			if (!nkpg)
719				panic("%s: cannot add dir. page", __func__);
720
721			dir1 = (struct ia64_lpte **)
722			    IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
723			bzero(dir1, PAGE_SIZE);
724			ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1;
725		}
726
727		nkpg = vm_page_alloc(NULL, nkpt++,
728		    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
729		if (!nkpg)
730			panic("%s: cannot add PTE page", __func__);
731
732		leaf = (struct ia64_lpte *)
733		    IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
734		bzero(leaf, PAGE_SIZE);
735		dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf;
736
737		kernel_vm_end += PAGE_SIZE * NKPTEPG;
738	}
739}
740
741/***************************************************
742 * page management routines.
743 ***************************************************/
744
745/*
746 * free the pv_entry back to the free list
747 */
748static PMAP_INLINE void
749free_pv_entry(pv_entry_t pv)
750{
751	pv_entry_count--;
752	uma_zfree(pvzone, pv);
753}
754
755/*
756 * get a new pv_entry, allocating a block from the system
757 * when needed.
758 */
759static pv_entry_t
760get_pv_entry(pmap_t locked_pmap)
761{
762	static const struct timeval printinterval = { 60, 0 };
763	static struct timeval lastprint;
764	struct vpgqueues *vpq;
765	struct ia64_lpte *pte;
766	pmap_t oldpmap, pmap;
767	pv_entry_t allocated_pv, next_pv, pv;
768	vm_offset_t va;
769	vm_page_t m;
770
771	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
772	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
773	allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
774	if (allocated_pv != NULL) {
775		pv_entry_count++;
776		if (pv_entry_count > pv_entry_high_water)
777			pagedaemon_wakeup();
778		else
779			return (allocated_pv);
780	}
781
782	/*
783	 * Reclaim pv entries: At first, destroy mappings to inactive
784	 * pages.  After that, if a pv entry is still needed, destroy
785	 * mappings to active pages.
786	 */
787	if (ratecheck(&lastprint, &printinterval))
788		printf("Approaching the limit on PV entries, "
789		    "increase the vm.pmap.shpgperproc tunable.\n");
790	vpq = &vm_page_queues[PQ_INACTIVE];
791retry:
792	TAILQ_FOREACH(m, &vpq->pl, pageq) {
793		if (m->hold_count || m->busy)
794			continue;
795		TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
796			va = pv->pv_va;
797			pmap = pv->pv_pmap;
798			/* Avoid deadlock and lock recursion. */
799			if (pmap > locked_pmap)
800				PMAP_LOCK(pmap);
801			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
802				continue;
803			pmap->pm_stats.resident_count--;
804			oldpmap = pmap_switch(pmap);
805			pte = pmap_find_vhpt(va);
806			KASSERT(pte != NULL, ("pte"));
807			pmap_remove_vhpt(va);
808			pmap_invalidate_page(va);
809			pmap_switch(oldpmap);
810			if (pmap_accessed(pte))
811				vm_page_flag_set(m, PG_REFERENCED);
812			if (pmap_dirty(pte))
813				vm_page_dirty(m);
814			pmap_free_pte(pte, va);
815			TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
816			m->md.pv_list_count--;
817			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
818			if (pmap != locked_pmap)
819				PMAP_UNLOCK(pmap);
820			if (allocated_pv == NULL)
821				allocated_pv = pv;
822			else
823				free_pv_entry(pv);
824		}
825		if (TAILQ_EMPTY(&m->md.pv_list))
826			vm_page_flag_clear(m, PG_WRITEABLE);
827	}
828	if (allocated_pv == NULL) {
829		if (vpq == &vm_page_queues[PQ_INACTIVE]) {
830			vpq = &vm_page_queues[PQ_ACTIVE];
831			goto retry;
832		}
833		panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
834	}
835	return (allocated_pv);
836}
837
838/*
839 * Conditionally create a pv entry.
840 */
841static boolean_t
842pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
843{
844	pv_entry_t pv;
845
846	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
847	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
848	if (pv_entry_count < pv_entry_high_water &&
849	    (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) {
850		pv_entry_count++;
851		pv->pv_va = va;
852		pv->pv_pmap = pmap;
853		TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
854		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
855		m->md.pv_list_count++;
856		return (TRUE);
857	} else
858		return (FALSE);
859}
860
861/*
862 * Add an ia64_lpte to the VHPT.
863 */
864static void
865pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
866{
867	struct ia64_bucket *bckt;
868	struct ia64_lpte *vhpte;
869	uint64_t pte_pa;
870
871	/* Can fault, so get it out of the way. */
872	pte_pa = ia64_tpa((vm_offset_t)pte);
873
874	vhpte = (struct ia64_lpte *)ia64_thash(va);
875	bckt = (struct ia64_bucket *)vhpte->chain;
876
877	mtx_lock_spin(&bckt->mutex);
878	pte->chain = bckt->chain;
879	ia64_mf();
880	bckt->chain = pte_pa;
881
882	pmap_vhpt_inserts++;
883	bckt->length++;
884	mtx_unlock_spin(&bckt->mutex);
885}
886
887/*
888 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
889 * worked or an appropriate error code otherwise.
890 */
891static int
892pmap_remove_vhpt(vm_offset_t va)
893{
894	struct ia64_bucket *bckt;
895	struct ia64_lpte *pte;
896	struct ia64_lpte *lpte;
897	struct ia64_lpte *vhpte;
898	uint64_t chain, tag;
899
900	tag = ia64_ttag(va);
901	vhpte = (struct ia64_lpte *)ia64_thash(va);
902	bckt = (struct ia64_bucket *)vhpte->chain;
903
904	lpte = NULL;
905	mtx_lock_spin(&bckt->mutex);
906	chain = bckt->chain;
907	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
908	while (chain != 0 && pte->tag != tag) {
909		lpte = pte;
910		chain = pte->chain;
911		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
912	}
913	if (chain == 0) {
914		mtx_unlock_spin(&bckt->mutex);
915		return (ENOENT);
916	}
917
918	/* Snip this pv_entry out of the collision chain. */
919	if (lpte == NULL)
920		bckt->chain = pte->chain;
921	else
922		lpte->chain = pte->chain;
923	ia64_mf();
924
925	bckt->length--;
926	mtx_unlock_spin(&bckt->mutex);
927	return (0);
928}
929
930/*
931 * Find the ia64_lpte for the given va, if any.
932 */
933static struct ia64_lpte *
934pmap_find_vhpt(vm_offset_t va)
935{
936	struct ia64_bucket *bckt;
937	struct ia64_lpte *pte;
938	uint64_t chain, tag;
939
940	tag = ia64_ttag(va);
941	pte = (struct ia64_lpte *)ia64_thash(va);
942	bckt = (struct ia64_bucket *)pte->chain;
943
944	mtx_lock_spin(&bckt->mutex);
945	chain = bckt->chain;
946	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
947	while (chain != 0 && pte->tag != tag) {
948		chain = pte->chain;
949		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
950	}
951	mtx_unlock_spin(&bckt->mutex);
952	return ((chain != 0) ? pte : NULL);
953}
954
955/*
956 * Remove an entry from the list of managed mappings.
957 */
958static int
959pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
960{
961	if (!pv) {
962		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
963			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
964				if (pmap == pv->pv_pmap && va == pv->pv_va)
965					break;
966			}
967		} else {
968			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
969				if (va == pv->pv_va)
970					break;
971			}
972		}
973	}
974
975	if (pv) {
976		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
977		m->md.pv_list_count--;
978		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
979			vm_page_flag_clear(m, PG_WRITEABLE);
980
981		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
982		free_pv_entry(pv);
983		return 0;
984	} else {
985		return ENOENT;
986	}
987}
988
989/*
990 * Create a pv entry for page at pa for
991 * (pmap, va).
992 */
993static void
994pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
995{
996	pv_entry_t pv;
997
998	pv = get_pv_entry(pmap);
999	pv->pv_pmap = pmap;
1000	pv->pv_va = va;
1001
1002	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1003	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1004	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1005	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1006	m->md.pv_list_count++;
1007}
1008
1009/*
1010 *	Routine:	pmap_extract
1011 *	Function:
1012 *		Extract the physical page address associated
1013 *		with the given map/virtual_address pair.
1014 */
1015vm_paddr_t
1016pmap_extract(pmap_t pmap, vm_offset_t va)
1017{
1018	struct ia64_lpte *pte;
1019	pmap_t oldpmap;
1020	vm_paddr_t pa;
1021
1022	pa = 0;
1023	PMAP_LOCK(pmap);
1024	oldpmap = pmap_switch(pmap);
1025	pte = pmap_find_vhpt(va);
1026	if (pte != NULL && pmap_present(pte))
1027		pa = pmap_ppn(pte);
1028	pmap_switch(oldpmap);
1029	PMAP_UNLOCK(pmap);
1030	return (pa);
1031}
1032
1033/*
1034 *	Routine:	pmap_extract_and_hold
1035 *	Function:
1036 *		Atomically extract and hold the physical page
1037 *		with the given pmap and virtual address pair
1038 *		if that mapping permits the given protection.
1039 */
1040vm_page_t
1041pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1042{
1043	struct ia64_lpte *pte;
1044	pmap_t oldpmap;
1045	vm_page_t m;
1046	vm_paddr_t pa;
1047
1048	pa = 0;
1049	m = NULL;
1050	PMAP_LOCK(pmap);
1051	oldpmap = pmap_switch(pmap);
1052retry:
1053	pte = pmap_find_vhpt(va);
1054	if (pte != NULL && pmap_present(pte) &&
1055	    (pmap_prot(pte) & prot) == prot) {
1056		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1057		if (vm_page_pa_tryrelock(pmap, pmap_ppn(pte), &pa))
1058			goto retry;
1059		vm_page_hold(m);
1060	}
1061	PA_UNLOCK_COND(pa);
1062	pmap_switch(oldpmap);
1063	PMAP_UNLOCK(pmap);
1064	return (m);
1065}
1066
1067/***************************************************
1068 * Low level mapping routines.....
1069 ***************************************************/
1070
1071/*
1072 * Find the kernel lpte for mapping the given virtual address, which
1073 * must be in the part of region 5 which we can cover with our kernel
1074 * 'page tables'.
1075 */
1076static struct ia64_lpte *
1077pmap_find_kpte(vm_offset_t va)
1078{
1079	struct ia64_lpte **dir1;
1080	struct ia64_lpte *leaf;
1081
1082	KASSERT((va >> 61) == 5,
1083		("kernel mapping 0x%lx not in region 5", va));
1084	KASSERT(va < kernel_vm_end,
1085		("kernel mapping 0x%lx out of range", va));
1086
1087	dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)];
1088	leaf = dir1[KPTE_DIR1_INDEX(va)];
1089	return (&leaf[KPTE_PTE_INDEX(va)]);
1090}
1091
1092/*
1093 * Find a pte suitable for mapping a user-space address. If one exists
1094 * in the VHPT, that one will be returned, otherwise a new pte is
1095 * allocated.
1096 */
1097static struct ia64_lpte *
1098pmap_find_pte(vm_offset_t va)
1099{
1100	struct ia64_lpte *pte;
1101
1102	if (va >= VM_MAXUSER_ADDRESS)
1103		return pmap_find_kpte(va);
1104
1105	pte = pmap_find_vhpt(va);
1106	if (pte == NULL) {
1107		pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1108		pte->tag = 1UL << 63;
1109	}
1110	return (pte);
1111}
1112
1113/*
1114 * Free a pte which is now unused. This simply returns it to the zone
1115 * allocator if it is a user mapping. For kernel mappings, clear the
1116 * valid bit to make it clear that the mapping is not currently used.
1117 */
1118static void
1119pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1120{
1121	if (va < VM_MAXUSER_ADDRESS)
1122		uma_zfree(ptezone, pte);
1123	else
1124		pmap_clear_present(pte);
1125}
1126
1127static PMAP_INLINE void
1128pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1129{
1130	static long prot2ar[4] = {
1131		PTE_AR_R,		/* VM_PROT_NONE */
1132		PTE_AR_RW,		/* VM_PROT_WRITE */
1133		PTE_AR_RX|PTE_ED,	/* VM_PROT_EXECUTE */
1134		PTE_AR_RWX|PTE_ED	/* VM_PROT_WRITE|VM_PROT_EXECUTE */
1135	};
1136
1137	pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED);
1138	pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1139	pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1140	    ? PTE_PL_KERN : PTE_PL_USER;
1141	pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1142}
1143
1144/*
1145 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1146 * the pte was orginally valid, then its assumed to already be in the
1147 * VHPT.
1148 * This functions does not set the protection bits.  It's expected
1149 * that those have been set correctly prior to calling this function.
1150 */
1151static void
1152pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1153    boolean_t wired, boolean_t managed)
1154{
1155
1156	pte->pte &= PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED;
1157	pte->pte |= PTE_PRESENT | PTE_MA_WB;
1158	pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1159	pte->pte |= (wired) ? PTE_WIRED : 0;
1160	pte->pte |= pa & PTE_PPN_MASK;
1161
1162	pte->itir = PAGE_SHIFT << 2;
1163
1164	pte->tag = ia64_ttag(va);
1165}
1166
1167/*
1168 * Remove the (possibly managed) mapping represented by pte from the
1169 * given pmap.
1170 */
1171static int
1172pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1173		pv_entry_t pv, int freepte)
1174{
1175	int error;
1176	vm_page_t m;
1177
1178	/*
1179	 * First remove from the VHPT.
1180	 */
1181	error = pmap_remove_vhpt(va);
1182	if (error)
1183		return (error);
1184
1185	pmap_invalidate_page(va);
1186
1187	if (pmap_wired(pte))
1188		pmap->pm_stats.wired_count -= 1;
1189
1190	pmap->pm_stats.resident_count -= 1;
1191	if (pmap_managed(pte)) {
1192		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1193		if (pmap_dirty(pte))
1194			vm_page_dirty(m);
1195		if (pmap_accessed(pte))
1196			vm_page_flag_set(m, PG_REFERENCED);
1197
1198		error = pmap_remove_entry(pmap, m, va, pv);
1199	}
1200	if (freepte)
1201		pmap_free_pte(pte, va);
1202
1203	return (error);
1204}
1205
1206/*
1207 * Extract the physical page address associated with a kernel
1208 * virtual address.
1209 */
1210vm_paddr_t
1211pmap_kextract(vm_offset_t va)
1212{
1213	struct ia64_lpte *pte;
1214	uint64_t *pbvm_pgtbl;
1215	u_int idx;
1216
1217	KASSERT(va >= VM_MAXUSER_ADDRESS, ("Must be kernel VA"));
1218
1219	/* Regions 6 and 7 are direct mapped. */
1220	if (va >= IA64_RR_BASE(6))
1221		return (IA64_RR_MASK(va));
1222
1223	/* Bail out if the virtual address is beyond our limits. */
1224	if (va >= kernel_vm_end)
1225		return (0);
1226
1227	if (va >= VM_MIN_KERNEL_ADDRESS) {
1228		pte = pmap_find_kpte(va);
1229		return (pmap_present(pte) ? pmap_ppn(pte)|(va&PAGE_MASK) : 0);
1230	}
1231
1232	/* PBVM page table. */
1233	if (va >= IA64_PBVM_PGTBL + bootinfo->bi_pbvm_pgtblsz);
1234		return (0);
1235	if (va >= IA64_PBVM_PGTBL)
1236		return (va - IA64_PBVM_PGTBL) + bootinfo->bi_pbvm_pgtbl;
1237
1238	/* PBVM. */
1239	if (va >= IA64_PBVM_BASE) {
1240		pbvm_pgtbl = (void *)IA64_PBVM_PGTBL;
1241		idx = (va - IA64_PBVM_BASE) >> IA64_PBVM_PAGE_SHIFT;
1242		if (idx >= (bootinfo->bi_pbvm_pgtblsz >> 3))
1243			return (0);
1244		if ((pbvm_pgtbl[idx] & PTE_PRESENT) == 0)
1245			return (0);
1246		return ((pbvm_pgtbl[idx] & PTE_PPN_MASK) +
1247		    (va & IA64_PBVM_PAGE_MASK));
1248	}
1249
1250	printf("XXX: %s: va=%#lx\n", __func__, va);
1251	return (0);
1252}
1253
1254/*
1255 * Add a list of wired pages to the kva this routine is only used for
1256 * temporary kernel mappings that do not need to have page modification
1257 * or references recorded.  Note that old mappings are simply written
1258 * over.  The page is effectively wired, but it's customary to not have
1259 * the PTE reflect that, nor update statistics.
1260 */
1261void
1262pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1263{
1264	struct ia64_lpte *pte;
1265	int i;
1266
1267	for (i = 0; i < count; i++) {
1268		pte = pmap_find_kpte(va);
1269		if (pmap_present(pte))
1270			pmap_invalidate_page(va);
1271		else
1272			pmap_enter_vhpt(pte, va);
1273		pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1274		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1275		va += PAGE_SIZE;
1276	}
1277}
1278
1279/*
1280 * this routine jerks page mappings from the
1281 * kernel -- it is meant only for temporary mappings.
1282 */
1283void
1284pmap_qremove(vm_offset_t va, int count)
1285{
1286	struct ia64_lpte *pte;
1287	int i;
1288
1289	for (i = 0; i < count; i++) {
1290		pte = pmap_find_kpte(va);
1291		if (pmap_present(pte)) {
1292			pmap_remove_vhpt(va);
1293			pmap_invalidate_page(va);
1294			pmap_clear_present(pte);
1295		}
1296		va += PAGE_SIZE;
1297	}
1298}
1299
1300/*
1301 * Add a wired page to the kva.  As for pmap_qenter(), it's customary
1302 * to not have the PTE reflect that, nor update statistics.
1303 */
1304void
1305pmap_kenter(vm_offset_t va, vm_offset_t pa)
1306{
1307	struct ia64_lpte *pte;
1308
1309	pte = pmap_find_kpte(va);
1310	if (pmap_present(pte))
1311		pmap_invalidate_page(va);
1312	else
1313		pmap_enter_vhpt(pte, va);
1314	pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1315	pmap_set_pte(pte, va, pa, FALSE, FALSE);
1316}
1317
1318/*
1319 * Remove a page from the kva
1320 */
1321void
1322pmap_kremove(vm_offset_t va)
1323{
1324	struct ia64_lpte *pte;
1325
1326	pte = pmap_find_kpte(va);
1327	if (pmap_present(pte)) {
1328		pmap_remove_vhpt(va);
1329		pmap_invalidate_page(va);
1330		pmap_clear_present(pte);
1331	}
1332}
1333
1334/*
1335 *	Used to map a range of physical addresses into kernel
1336 *	virtual address space.
1337 *
1338 *	The value passed in '*virt' is a suggested virtual address for
1339 *	the mapping. Architectures which can support a direct-mapped
1340 *	physical to virtual region can return the appropriate address
1341 *	within that region, leaving '*virt' unchanged. Other
1342 *	architectures should map the pages starting at '*virt' and
1343 *	update '*virt' with the first usable address after the mapped
1344 *	region.
1345 */
1346vm_offset_t
1347pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1348{
1349	return IA64_PHYS_TO_RR7(start);
1350}
1351
1352/*
1353 *	Remove the given range of addresses from the specified map.
1354 *
1355 *	It is assumed that the start and end are properly
1356 *	rounded to the page size.
1357 */
1358void
1359pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1360{
1361	pmap_t oldpmap;
1362	vm_offset_t va;
1363	pv_entry_t npv, pv;
1364	struct ia64_lpte *pte;
1365
1366	if (pmap->pm_stats.resident_count == 0)
1367		return;
1368
1369	vm_page_lock_queues();
1370	PMAP_LOCK(pmap);
1371	oldpmap = pmap_switch(pmap);
1372
1373	/*
1374	 * special handling of removing one page.  a very
1375	 * common operation and easy to short circuit some
1376	 * code.
1377	 */
1378	if (sva + PAGE_SIZE == eva) {
1379		pte = pmap_find_vhpt(sva);
1380		if (pte != NULL)
1381			pmap_remove_pte(pmap, pte, sva, 0, 1);
1382		goto out;
1383	}
1384
1385	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1386		TAILQ_FOREACH_SAFE(pv, &pmap->pm_pvlist, pv_plist, npv) {
1387			va = pv->pv_va;
1388			if (va >= sva && va < eva) {
1389				pte = pmap_find_vhpt(va);
1390				KASSERT(pte != NULL, ("pte"));
1391				pmap_remove_pte(pmap, pte, va, pv, 1);
1392			}
1393		}
1394	} else {
1395		for (va = sva; va < eva; va += PAGE_SIZE) {
1396			pte = pmap_find_vhpt(va);
1397			if (pte != NULL)
1398				pmap_remove_pte(pmap, pte, va, 0, 1);
1399		}
1400	}
1401
1402out:
1403	vm_page_unlock_queues();
1404	pmap_switch(oldpmap);
1405	PMAP_UNLOCK(pmap);
1406}
1407
1408/*
1409 *	Routine:	pmap_remove_all
1410 *	Function:
1411 *		Removes this physical page from
1412 *		all physical maps in which it resides.
1413 *		Reflects back modify bits to the pager.
1414 *
1415 *	Notes:
1416 *		Original versions of this routine were very
1417 *		inefficient because they iteratively called
1418 *		pmap_remove (slow...)
1419 */
1420
1421void
1422pmap_remove_all(vm_page_t m)
1423{
1424	pmap_t oldpmap;
1425	pv_entry_t pv;
1426
1427	KASSERT((m->flags & PG_FICTITIOUS) == 0,
1428	    ("pmap_remove_all: page %p is fictitious", m));
1429	vm_page_lock_queues();
1430	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1431		struct ia64_lpte *pte;
1432		pmap_t pmap = pv->pv_pmap;
1433		vm_offset_t va = pv->pv_va;
1434
1435		PMAP_LOCK(pmap);
1436		oldpmap = pmap_switch(pmap);
1437		pte = pmap_find_vhpt(va);
1438		KASSERT(pte != NULL, ("pte"));
1439		if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1440			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1441		pmap_remove_pte(pmap, pte, va, pv, 1);
1442		pmap_switch(oldpmap);
1443		PMAP_UNLOCK(pmap);
1444	}
1445	vm_page_flag_clear(m, PG_WRITEABLE);
1446	vm_page_unlock_queues();
1447}
1448
1449/*
1450 *	Set the physical protection on the
1451 *	specified range of this map as requested.
1452 */
1453void
1454pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1455{
1456	pmap_t oldpmap;
1457	struct ia64_lpte *pte;
1458
1459	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1460		pmap_remove(pmap, sva, eva);
1461		return;
1462	}
1463
1464	if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
1465	    (VM_PROT_WRITE|VM_PROT_EXECUTE))
1466		return;
1467
1468	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1469		panic("pmap_protect: unaligned addresses");
1470
1471	vm_page_lock_queues();
1472	PMAP_LOCK(pmap);
1473	oldpmap = pmap_switch(pmap);
1474	for ( ; sva < eva; sva += PAGE_SIZE) {
1475		/* If page is invalid, skip this page */
1476		pte = pmap_find_vhpt(sva);
1477		if (pte == NULL)
1478			continue;
1479
1480		/* If there's no change, skip it too */
1481		if (pmap_prot(pte) == prot)
1482			continue;
1483
1484		if ((prot & VM_PROT_WRITE) == 0 &&
1485		    pmap_managed(pte) && pmap_dirty(pte)) {
1486			vm_paddr_t pa = pmap_ppn(pte);
1487			vm_page_t m = PHYS_TO_VM_PAGE(pa);
1488
1489			vm_page_dirty(m);
1490			pmap_clear_dirty(pte);
1491		}
1492
1493		if (prot & VM_PROT_EXECUTE)
1494			ia64_sync_icache(sva, PAGE_SIZE);
1495
1496		pmap_pte_prot(pmap, pte, prot);
1497		pmap_invalidate_page(sva);
1498	}
1499	vm_page_unlock_queues();
1500	pmap_switch(oldpmap);
1501	PMAP_UNLOCK(pmap);
1502}
1503
1504/*
1505 *	Insert the given physical page (p) at
1506 *	the specified virtual address (v) in the
1507 *	target physical map with the protection requested.
1508 *
1509 *	If specified, the page will be wired down, meaning
1510 *	that the related pte can not be reclaimed.
1511 *
1512 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1513 *	or lose information.  That is, this routine must actually
1514 *	insert this page into the given map NOW.
1515 */
1516void
1517pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
1518    vm_prot_t prot, boolean_t wired)
1519{
1520	pmap_t oldpmap;
1521	vm_offset_t pa;
1522	vm_offset_t opa;
1523	struct ia64_lpte origpte;
1524	struct ia64_lpte *pte;
1525	boolean_t icache_inval, managed;
1526
1527	vm_page_lock_queues();
1528	PMAP_LOCK(pmap);
1529	oldpmap = pmap_switch(pmap);
1530
1531	va &= ~PAGE_MASK;
1532 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
1533	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 ||
1534	    (m->oflags & VPO_BUSY) != 0,
1535	    ("pmap_enter: page %p is not busy", m));
1536
1537	/*
1538	 * Find (or create) a pte for the given mapping.
1539	 */
1540	while ((pte = pmap_find_pte(va)) == NULL) {
1541		pmap_switch(oldpmap);
1542		PMAP_UNLOCK(pmap);
1543		vm_page_unlock_queues();
1544		VM_WAIT;
1545		vm_page_lock_queues();
1546		PMAP_LOCK(pmap);
1547		oldpmap = pmap_switch(pmap);
1548	}
1549	origpte = *pte;
1550	if (!pmap_present(pte)) {
1551		opa = ~0UL;
1552		pmap_enter_vhpt(pte, va);
1553	} else
1554		opa = pmap_ppn(pte);
1555	managed = FALSE;
1556	pa = VM_PAGE_TO_PHYS(m);
1557
1558	icache_inval = (prot & VM_PROT_EXECUTE) ? TRUE : FALSE;
1559
1560	/*
1561	 * Mapping has not changed, must be protection or wiring change.
1562	 */
1563	if (opa == pa) {
1564		/*
1565		 * Wiring change, just update stats. We don't worry about
1566		 * wiring PT pages as they remain resident as long as there
1567		 * are valid mappings in them. Hence, if a user page is wired,
1568		 * the PT page will be also.
1569		 */
1570		if (wired && !pmap_wired(&origpte))
1571			pmap->pm_stats.wired_count++;
1572		else if (!wired && pmap_wired(&origpte))
1573			pmap->pm_stats.wired_count--;
1574
1575		managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1576
1577		/*
1578		 * We might be turning off write access to the page,
1579		 * so we go ahead and sense modify status. Otherwise,
1580		 * we can avoid I-cache invalidation if the page
1581		 * already allowed execution.
1582		 */
1583		if (managed && pmap_dirty(&origpte))
1584			vm_page_dirty(m);
1585		else if (pmap_exec(&origpte))
1586			icache_inval = FALSE;
1587
1588		pmap_invalidate_page(va);
1589		goto validate;
1590	}
1591
1592	/*
1593	 * Mapping has changed, invalidate old range and fall
1594	 * through to handle validating new mapping.
1595	 */
1596	if (opa != ~0UL) {
1597		pmap_remove_pte(pmap, pte, va, 0, 0);
1598		pmap_enter_vhpt(pte, va);
1599	}
1600
1601	/*
1602	 * Enter on the PV list if part of our managed memory.
1603	 */
1604	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1605		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1606		    ("pmap_enter: managed mapping within the clean submap"));
1607		pmap_insert_entry(pmap, va, m);
1608		managed = TRUE;
1609	}
1610
1611	/*
1612	 * Increment counters
1613	 */
1614	pmap->pm_stats.resident_count++;
1615	if (wired)
1616		pmap->pm_stats.wired_count++;
1617
1618validate:
1619
1620	/*
1621	 * Now validate mapping with desired protection/wiring. This
1622	 * adds the pte to the VHPT if necessary.
1623	 */
1624	pmap_pte_prot(pmap, pte, prot);
1625	pmap_set_pte(pte, va, pa, wired, managed);
1626
1627	/* Invalidate the I-cache when needed. */
1628	if (icache_inval)
1629		ia64_sync_icache(va, PAGE_SIZE);
1630
1631	if ((prot & VM_PROT_WRITE) != 0 && managed)
1632		vm_page_flag_set(m, PG_WRITEABLE);
1633	vm_page_unlock_queues();
1634	pmap_switch(oldpmap);
1635	PMAP_UNLOCK(pmap);
1636}
1637
1638/*
1639 * Maps a sequence of resident pages belonging to the same object.
1640 * The sequence begins with the given page m_start.  This page is
1641 * mapped at the given virtual address start.  Each subsequent page is
1642 * mapped at a virtual address that is offset from start by the same
1643 * amount as the page is offset from m_start within the object.  The
1644 * last page in the sequence is the page with the largest offset from
1645 * m_start that can be mapped at a virtual address less than the given
1646 * virtual address end.  Not every virtual page between start and end
1647 * is mapped; only those for which a resident page exists with the
1648 * corresponding offset from m_start are mapped.
1649 */
1650void
1651pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1652    vm_page_t m_start, vm_prot_t prot)
1653{
1654	pmap_t oldpmap;
1655	vm_page_t m;
1656	vm_pindex_t diff, psize;
1657
1658	VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
1659	psize = atop(end - start);
1660	m = m_start;
1661	vm_page_lock_queues();
1662	PMAP_LOCK(pmap);
1663	oldpmap = pmap_switch(pmap);
1664	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1665		pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1666		m = TAILQ_NEXT(m, listq);
1667	}
1668	vm_page_unlock_queues();
1669	pmap_switch(oldpmap);
1670 	PMAP_UNLOCK(pmap);
1671}
1672
1673/*
1674 * this code makes some *MAJOR* assumptions:
1675 * 1. Current pmap & pmap exists.
1676 * 2. Not wired.
1677 * 3. Read access.
1678 * 4. No page table pages.
1679 * but is *MUCH* faster than pmap_enter...
1680 */
1681
1682void
1683pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1684{
1685	pmap_t oldpmap;
1686
1687	vm_page_lock_queues();
1688	PMAP_LOCK(pmap);
1689	oldpmap = pmap_switch(pmap);
1690	pmap_enter_quick_locked(pmap, va, m, prot);
1691	vm_page_unlock_queues();
1692	pmap_switch(oldpmap);
1693	PMAP_UNLOCK(pmap);
1694}
1695
1696static void
1697pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1698    vm_prot_t prot)
1699{
1700	struct ia64_lpte *pte;
1701	boolean_t managed;
1702
1703	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1704	    (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
1705	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1706	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1707	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1708
1709	if ((pte = pmap_find_pte(va)) == NULL)
1710		return;
1711
1712	if (!pmap_present(pte)) {
1713		/* Enter on the PV list if the page is managed. */
1714		if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1715			if (!pmap_try_insert_pv_entry(pmap, va, m)) {
1716				pmap_free_pte(pte, va);
1717				return;
1718			}
1719			managed = TRUE;
1720		} else
1721			managed = FALSE;
1722
1723		/* Increment counters. */
1724		pmap->pm_stats.resident_count++;
1725
1726		/* Initialise with R/O protection and enter into VHPT. */
1727		pmap_enter_vhpt(pte, va);
1728		pmap_pte_prot(pmap, pte,
1729		    prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1730		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1731
1732		if (prot & VM_PROT_EXECUTE)
1733			ia64_sync_icache(va, PAGE_SIZE);
1734	}
1735}
1736
1737/*
1738 * pmap_object_init_pt preloads the ptes for a given object
1739 * into the specified pmap.  This eliminates the blast of soft
1740 * faults on process startup and immediately after an mmap.
1741 */
1742void
1743pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1744		    vm_object_t object, vm_pindex_t pindex,
1745		    vm_size_t size)
1746{
1747
1748	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1749	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1750	    ("pmap_object_init_pt: non-device object"));
1751}
1752
1753/*
1754 *	Routine:	pmap_change_wiring
1755 *	Function:	Change the wiring attribute for a map/virtual-address
1756 *			pair.
1757 *	In/out conditions:
1758 *			The mapping must already exist in the pmap.
1759 */
1760void
1761pmap_change_wiring(pmap, va, wired)
1762	register pmap_t pmap;
1763	vm_offset_t va;
1764	boolean_t wired;
1765{
1766	pmap_t oldpmap;
1767	struct ia64_lpte *pte;
1768
1769	PMAP_LOCK(pmap);
1770	oldpmap = pmap_switch(pmap);
1771
1772	pte = pmap_find_vhpt(va);
1773	KASSERT(pte != NULL, ("pte"));
1774	if (wired && !pmap_wired(pte)) {
1775		pmap->pm_stats.wired_count++;
1776		pmap_set_wired(pte);
1777	} else if (!wired && pmap_wired(pte)) {
1778		pmap->pm_stats.wired_count--;
1779		pmap_clear_wired(pte);
1780	}
1781
1782	pmap_switch(oldpmap);
1783	PMAP_UNLOCK(pmap);
1784}
1785
1786
1787
1788/*
1789 *	Copy the range specified by src_addr/len
1790 *	from the source map to the range dst_addr/len
1791 *	in the destination map.
1792 *
1793 *	This routine is only advisory and need not do anything.
1794 */
1795
1796void
1797pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
1798	  vm_offset_t src_addr)
1799{
1800}
1801
1802
1803/*
1804 *	pmap_zero_page zeros the specified hardware page by
1805 *	mapping it into virtual memory and using bzero to clear
1806 *	its contents.
1807 */
1808
1809void
1810pmap_zero_page(vm_page_t m)
1811{
1812	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1813	bzero((caddr_t) va, PAGE_SIZE);
1814}
1815
1816
1817/*
1818 *	pmap_zero_page_area zeros the specified hardware page by
1819 *	mapping it into virtual memory and using bzero to clear
1820 *	its contents.
1821 *
1822 *	off and size must reside within a single page.
1823 */
1824
1825void
1826pmap_zero_page_area(vm_page_t m, int off, int size)
1827{
1828	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1829	bzero((char *)(caddr_t)va + off, size);
1830}
1831
1832
1833/*
1834 *	pmap_zero_page_idle zeros the specified hardware page by
1835 *	mapping it into virtual memory and using bzero to clear
1836 *	its contents.  This is for the vm_idlezero process.
1837 */
1838
1839void
1840pmap_zero_page_idle(vm_page_t m)
1841{
1842	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1843	bzero((caddr_t) va, PAGE_SIZE);
1844}
1845
1846
1847/*
1848 *	pmap_copy_page copies the specified (machine independent)
1849 *	page by mapping the page into virtual memory and using
1850 *	bcopy to copy the page, one machine dependent page at a
1851 *	time.
1852 */
1853void
1854pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1855{
1856	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
1857	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
1858	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
1859}
1860
1861/*
1862 * Returns true if the pmap's pv is one of the first
1863 * 16 pvs linked to from this page.  This count may
1864 * be changed upwards or downwards in the future; it
1865 * is only necessary that true be returned for a small
1866 * subset of pmaps for proper page aging.
1867 */
1868boolean_t
1869pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
1870{
1871	pv_entry_t pv;
1872	int loops = 0;
1873	boolean_t rv;
1874
1875	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
1876	    ("pmap_page_exists_quick: page %p is not managed", m));
1877	rv = FALSE;
1878	vm_page_lock_queues();
1879	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1880		if (pv->pv_pmap == pmap) {
1881			rv = TRUE;
1882			break;
1883		}
1884		loops++;
1885		if (loops >= 16)
1886			break;
1887	}
1888	vm_page_unlock_queues();
1889	return (rv);
1890}
1891
1892/*
1893 *	pmap_page_wired_mappings:
1894 *
1895 *	Return the number of managed mappings to the given physical page
1896 *	that are wired.
1897 */
1898int
1899pmap_page_wired_mappings(vm_page_t m)
1900{
1901	struct ia64_lpte *pte;
1902	pmap_t oldpmap, pmap;
1903	pv_entry_t pv;
1904	int count;
1905
1906	count = 0;
1907	if ((m->flags & PG_FICTITIOUS) != 0)
1908		return (count);
1909	vm_page_lock_queues();
1910	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1911		pmap = pv->pv_pmap;
1912		PMAP_LOCK(pmap);
1913		oldpmap = pmap_switch(pmap);
1914		pte = pmap_find_vhpt(pv->pv_va);
1915		KASSERT(pte != NULL, ("pte"));
1916		if (pmap_wired(pte))
1917			count++;
1918		pmap_switch(oldpmap);
1919		PMAP_UNLOCK(pmap);
1920	}
1921	vm_page_unlock_queues();
1922	return (count);
1923}
1924
1925/*
1926 * Remove all pages from specified address space
1927 * this aids process exit speeds.  Also, this code
1928 * is special cased for current process only, but
1929 * can have the more generic (and slightly slower)
1930 * mode enabled.  This is much faster than pmap_remove
1931 * in the case of running down an entire address space.
1932 */
1933void
1934pmap_remove_pages(pmap_t pmap)
1935{
1936	pmap_t oldpmap;
1937	pv_entry_t pv, npv;
1938
1939	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
1940		printf("warning: %s called with non-current pmap\n",
1941		    __func__);
1942		return;
1943	}
1944
1945	vm_page_lock_queues();
1946	PMAP_LOCK(pmap);
1947	oldpmap = pmap_switch(pmap);
1948
1949	for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
1950		struct ia64_lpte *pte;
1951
1952		npv = TAILQ_NEXT(pv, pv_plist);
1953
1954		pte = pmap_find_vhpt(pv->pv_va);
1955		KASSERT(pte != NULL, ("pte"));
1956		if (!pmap_wired(pte))
1957			pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
1958	}
1959
1960	pmap_switch(oldpmap);
1961	PMAP_UNLOCK(pmap);
1962	vm_page_unlock_queues();
1963}
1964
1965/*
1966 *	pmap_ts_referenced:
1967 *
1968 *	Return a count of reference bits for a page, clearing those bits.
1969 *	It is not necessary for every reference bit to be cleared, but it
1970 *	is necessary that 0 only be returned when there are truly no
1971 *	reference bits set.
1972 *
1973 *	XXX: The exact number of bits to check and clear is a matter that
1974 *	should be tested and standardized at some point in the future for
1975 *	optimal aging of shared pages.
1976 */
1977int
1978pmap_ts_referenced(vm_page_t m)
1979{
1980	struct ia64_lpte *pte;
1981	pmap_t oldpmap;
1982	pv_entry_t pv;
1983	int count = 0;
1984
1985	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
1986	    ("pmap_ts_referenced: page %p is not managed", m));
1987	vm_page_lock_queues();
1988	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1989		PMAP_LOCK(pv->pv_pmap);
1990		oldpmap = pmap_switch(pv->pv_pmap);
1991		pte = pmap_find_vhpt(pv->pv_va);
1992		KASSERT(pte != NULL, ("pte"));
1993		if (pmap_accessed(pte)) {
1994			count++;
1995			pmap_clear_accessed(pte);
1996			pmap_invalidate_page(pv->pv_va);
1997		}
1998		pmap_switch(oldpmap);
1999		PMAP_UNLOCK(pv->pv_pmap);
2000	}
2001	vm_page_unlock_queues();
2002	return (count);
2003}
2004
2005/*
2006 *	pmap_is_modified:
2007 *
2008 *	Return whether or not the specified physical page was modified
2009 *	in any physical maps.
2010 */
2011boolean_t
2012pmap_is_modified(vm_page_t m)
2013{
2014	struct ia64_lpte *pte;
2015	pmap_t oldpmap;
2016	pv_entry_t pv;
2017	boolean_t rv;
2018
2019	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2020	    ("pmap_is_modified: page %p is not managed", m));
2021	rv = FALSE;
2022
2023	/*
2024	 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be
2025	 * concurrently set while the object is locked.  Thus, if PG_WRITEABLE
2026	 * is clear, no PTEs can be dirty.
2027	 */
2028	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2029	if ((m->oflags & VPO_BUSY) == 0 &&
2030	    (m->flags & PG_WRITEABLE) == 0)
2031		return (rv);
2032	vm_page_lock_queues();
2033	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2034		PMAP_LOCK(pv->pv_pmap);
2035		oldpmap = pmap_switch(pv->pv_pmap);
2036		pte = pmap_find_vhpt(pv->pv_va);
2037		pmap_switch(oldpmap);
2038		KASSERT(pte != NULL, ("pte"));
2039		rv = pmap_dirty(pte) ? TRUE : FALSE;
2040		PMAP_UNLOCK(pv->pv_pmap);
2041		if (rv)
2042			break;
2043	}
2044	vm_page_unlock_queues();
2045	return (rv);
2046}
2047
2048/*
2049 *	pmap_is_prefaultable:
2050 *
2051 *	Return whether or not the specified virtual address is elgible
2052 *	for prefault.
2053 */
2054boolean_t
2055pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2056{
2057	struct ia64_lpte *pte;
2058
2059	pte = pmap_find_vhpt(addr);
2060	if (pte != NULL && pmap_present(pte))
2061		return (FALSE);
2062	return (TRUE);
2063}
2064
2065/*
2066 *	pmap_is_referenced:
2067 *
2068 *	Return whether or not the specified physical page was referenced
2069 *	in any physical maps.
2070 */
2071boolean_t
2072pmap_is_referenced(vm_page_t m)
2073{
2074	struct ia64_lpte *pte;
2075	pmap_t oldpmap;
2076	pv_entry_t pv;
2077	boolean_t rv;
2078
2079	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2080	    ("pmap_is_referenced: page %p is not managed", m));
2081	rv = FALSE;
2082	vm_page_lock_queues();
2083	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2084		PMAP_LOCK(pv->pv_pmap);
2085		oldpmap = pmap_switch(pv->pv_pmap);
2086		pte = pmap_find_vhpt(pv->pv_va);
2087		pmap_switch(oldpmap);
2088		KASSERT(pte != NULL, ("pte"));
2089		rv = pmap_accessed(pte) ? TRUE : FALSE;
2090		PMAP_UNLOCK(pv->pv_pmap);
2091		if (rv)
2092			break;
2093	}
2094	vm_page_unlock_queues();
2095	return (rv);
2096}
2097
2098/*
2099 *	Clear the modify bits on the specified physical page.
2100 */
2101void
2102pmap_clear_modify(vm_page_t m)
2103{
2104	struct ia64_lpte *pte;
2105	pmap_t oldpmap;
2106	pv_entry_t pv;
2107
2108	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2109	    ("pmap_clear_modify: page %p is not managed", m));
2110	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2111	KASSERT((m->oflags & VPO_BUSY) == 0,
2112	    ("pmap_clear_modify: page %p is busy", m));
2113
2114	/*
2115	 * If the page is not PG_WRITEABLE, then no PTEs can be modified.
2116	 * If the object containing the page is locked and the page is not
2117	 * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set.
2118	 */
2119	if ((m->flags & PG_WRITEABLE) == 0)
2120		return;
2121	vm_page_lock_queues();
2122	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2123		PMAP_LOCK(pv->pv_pmap);
2124		oldpmap = pmap_switch(pv->pv_pmap);
2125		pte = pmap_find_vhpt(pv->pv_va);
2126		KASSERT(pte != NULL, ("pte"));
2127		if (pmap_dirty(pte)) {
2128			pmap_clear_dirty(pte);
2129			pmap_invalidate_page(pv->pv_va);
2130		}
2131		pmap_switch(oldpmap);
2132		PMAP_UNLOCK(pv->pv_pmap);
2133	}
2134	vm_page_unlock_queues();
2135}
2136
2137/*
2138 *	pmap_clear_reference:
2139 *
2140 *	Clear the reference bit on the specified physical page.
2141 */
2142void
2143pmap_clear_reference(vm_page_t m)
2144{
2145	struct ia64_lpte *pte;
2146	pmap_t oldpmap;
2147	pv_entry_t pv;
2148
2149	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2150	    ("pmap_clear_reference: page %p is not managed", m));
2151	vm_page_lock_queues();
2152	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2153		PMAP_LOCK(pv->pv_pmap);
2154		oldpmap = pmap_switch(pv->pv_pmap);
2155		pte = pmap_find_vhpt(pv->pv_va);
2156		KASSERT(pte != NULL, ("pte"));
2157		if (pmap_accessed(pte)) {
2158			pmap_clear_accessed(pte);
2159			pmap_invalidate_page(pv->pv_va);
2160		}
2161		pmap_switch(oldpmap);
2162		PMAP_UNLOCK(pv->pv_pmap);
2163	}
2164	vm_page_unlock_queues();
2165}
2166
2167/*
2168 * Clear the write and modified bits in each of the given page's mappings.
2169 */
2170void
2171pmap_remove_write(vm_page_t m)
2172{
2173	struct ia64_lpte *pte;
2174	pmap_t oldpmap, pmap;
2175	pv_entry_t pv;
2176	vm_prot_t prot;
2177
2178	KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2179	    ("pmap_remove_write: page %p is not managed", m));
2180
2181	/*
2182	 * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by
2183	 * another thread while the object is locked.  Thus, if PG_WRITEABLE
2184	 * is clear, no page table entries need updating.
2185	 */
2186	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2187	if ((m->oflags & VPO_BUSY) == 0 &&
2188	    (m->flags & PG_WRITEABLE) == 0)
2189		return;
2190	vm_page_lock_queues();
2191	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2192		pmap = pv->pv_pmap;
2193		PMAP_LOCK(pmap);
2194		oldpmap = pmap_switch(pmap);
2195		pte = pmap_find_vhpt(pv->pv_va);
2196		KASSERT(pte != NULL, ("pte"));
2197		prot = pmap_prot(pte);
2198		if ((prot & VM_PROT_WRITE) != 0) {
2199			if (pmap_dirty(pte)) {
2200				vm_page_dirty(m);
2201				pmap_clear_dirty(pte);
2202			}
2203			prot &= ~VM_PROT_WRITE;
2204			pmap_pte_prot(pmap, pte, prot);
2205			pmap_invalidate_page(pv->pv_va);
2206		}
2207		pmap_switch(oldpmap);
2208		PMAP_UNLOCK(pmap);
2209	}
2210	vm_page_flag_clear(m, PG_WRITEABLE);
2211	vm_page_unlock_queues();
2212}
2213
2214/*
2215 * Map a set of physical memory pages into the kernel virtual
2216 * address space. Return a pointer to where it is mapped. This
2217 * routine is intended to be used for mapping device memory,
2218 * NOT real memory.
2219 */
2220void *
2221pmap_mapdev(vm_paddr_t pa, vm_size_t size)
2222{
2223	vm_offset_t va;
2224
2225	va = pa | IA64_RR_BASE(6);
2226	return ((void *)va);
2227}
2228
2229/*
2230 * 'Unmap' a range mapped by pmap_mapdev().
2231 */
2232void
2233pmap_unmapdev(vm_offset_t va, vm_size_t size)
2234{
2235}
2236
2237/*
2238 * perform the pmap work for mincore
2239 */
2240int
2241pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
2242{
2243	pmap_t oldpmap;
2244	struct ia64_lpte *pte, tpte;
2245	vm_paddr_t pa;
2246	int val;
2247
2248	PMAP_LOCK(pmap);
2249retry:
2250	oldpmap = pmap_switch(pmap);
2251	pte = pmap_find_vhpt(addr);
2252	if (pte != NULL) {
2253		tpte = *pte;
2254		pte = &tpte;
2255	}
2256	pmap_switch(oldpmap);
2257	if (pte == NULL || !pmap_present(pte)) {
2258		val = 0;
2259		goto out;
2260	}
2261	val = MINCORE_INCORE;
2262	if (pmap_dirty(pte))
2263		val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
2264	if (pmap_accessed(pte))
2265		val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
2266	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
2267	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
2268	    pmap_managed(pte)) {
2269		pa = pmap_ppn(pte);
2270		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
2271		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
2272			goto retry;
2273	} else
2274out:
2275		PA_UNLOCK_COND(*locked_pa);
2276	PMAP_UNLOCK(pmap);
2277	return (val);
2278}
2279
2280void
2281pmap_activate(struct thread *td)
2282{
2283	pmap_switch(vmspace_pmap(td->td_proc->p_vmspace));
2284}
2285
2286pmap_t
2287pmap_switch(pmap_t pm)
2288{
2289	pmap_t prevpm;
2290	int i;
2291
2292	critical_enter();
2293	prevpm = PCPU_GET(md.current_pmap);
2294	if (prevpm == pm)
2295		goto out;
2296	if (pm == NULL) {
2297		for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2298			ia64_set_rr(IA64_RR_BASE(i),
2299			    (i << 8)|(PAGE_SHIFT << 2)|1);
2300		}
2301	} else {
2302		for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2303			ia64_set_rr(IA64_RR_BASE(i),
2304			    (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2305		}
2306	}
2307	PCPU_SET(md.current_pmap, pm);
2308	ia64_srlz_d();
2309
2310out:
2311	critical_exit();
2312	return (prevpm);
2313}
2314
2315void
2316pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2317{
2318	pmap_t oldpm;
2319	struct ia64_lpte *pte;
2320	vm_offset_t lim;
2321	vm_size_t len;
2322
2323	sz += va & 31;
2324	va &= ~31;
2325	sz = (sz + 31) & ~31;
2326
2327	PMAP_LOCK(pm);
2328	oldpm = pmap_switch(pm);
2329	while (sz > 0) {
2330		lim = round_page(va);
2331		len = MIN(lim - va, sz);
2332		pte = pmap_find_vhpt(va);
2333		if (pte != NULL && pmap_present(pte))
2334			ia64_sync_icache(va, len);
2335		va += len;
2336		sz -= len;
2337	}
2338	pmap_switch(oldpm);
2339	PMAP_UNLOCK(pm);
2340}
2341
2342/*
2343 *	Increase the starting virtual address of the given mapping if a
2344 *	different alignment might result in more superpage mappings.
2345 */
2346void
2347pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2348    vm_offset_t *addr, vm_size_t size)
2349{
2350}
2351
2352#include "opt_ddb.h"
2353
2354#ifdef DDB
2355
2356#include <ddb/ddb.h>
2357
2358static const char*	psnames[] = {
2359	"1B",	"2B",	"4B",	"8B",
2360	"16B",	"32B",	"64B",	"128B",
2361	"256B",	"512B",	"1K",	"2K",
2362	"4K",	"8K",	"16K",	"32K",
2363	"64K",	"128K",	"256K",	"512K",
2364	"1M",	"2M",	"4M",	"8M",
2365	"16M",	"32M",	"64M",	"128M",
2366	"256M",	"512M",	"1G",	"2G"
2367};
2368
2369static void
2370print_trs(int type)
2371{
2372	struct ia64_pal_result res;
2373	int i, maxtr;
2374	struct {
2375		pt_entry_t	pte;
2376		uint64_t	itir;
2377		uint64_t	ifa;
2378		struct ia64_rr	rr;
2379	} buf;
2380	static const char *manames[] = {
2381		"WB",	"bad",	"bad",	"bad",
2382		"UC",	"UCE",	"WC",	"NaT",
2383	};
2384
2385	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2386	if (res.pal_status != 0) {
2387		db_printf("Can't get VM summary\n");
2388		return;
2389	}
2390
2391	if (type == 0)
2392		maxtr = (res.pal_result[0] >> 40) & 0xff;
2393	else
2394		maxtr = (res.pal_result[0] >> 32) & 0xff;
2395
2396	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2397	for (i = 0; i <= maxtr; i++) {
2398		bzero(&buf, sizeof(buf));
2399		res = ia64_pal_physical(PAL_VM_TR_READ, i, type,
2400		    ia64_tpa((uint64_t)&buf));
2401		if (!(res.pal_result[0] & 1))
2402			buf.pte &= ~PTE_AR_MASK;
2403		if (!(res.pal_result[0] & 2))
2404			buf.pte &= ~PTE_PL_MASK;
2405		if (!(res.pal_result[0] & 4))
2406			pmap_clear_dirty(&buf);
2407		if (!(res.pal_result[0] & 8))
2408			buf.pte &= ~PTE_MA_MASK;
2409		db_printf("%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s "
2410		    "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2411		    buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2412		    psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2413		    (buf.pte & PTE_ED) ? 1 : 0,
2414		    (int)(buf.pte & PTE_AR_MASK) >> 9,
2415		    (int)(buf.pte & PTE_PL_MASK) >> 7,
2416		    (pmap_dirty(&buf)) ? 1 : 0,
2417		    (pmap_accessed(&buf)) ? 1 : 0,
2418		    manames[(buf.pte & PTE_MA_MASK) >> 2],
2419		    (pmap_present(&buf)) ? 1 : 0,
2420		    (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2421	}
2422}
2423
2424DB_COMMAND(itr, db_itr)
2425{
2426	print_trs(0);
2427}
2428
2429DB_COMMAND(dtr, db_dtr)
2430{
2431	print_trs(1);
2432}
2433
2434DB_COMMAND(rr, db_rr)
2435{
2436	int i;
2437	uint64_t t;
2438	struct ia64_rr rr;
2439
2440	printf("RR RID    PgSz VE\n");
2441	for (i = 0; i < 8; i++) {
2442		__asm __volatile ("mov %0=rr[%1]"
2443				  : "=r"(t)
2444				  : "r"(IA64_RR_BASE(i)));
2445		*(uint64_t *) &rr = t;
2446		printf("%d  %06x %4s %d\n",
2447		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2448	}
2449}
2450
2451DB_COMMAND(thash, db_thash)
2452{
2453	if (!have_addr)
2454		return;
2455
2456	db_printf("%p\n", (void *) ia64_thash(addr));
2457}
2458
2459DB_COMMAND(ttag, db_ttag)
2460{
2461	if (!have_addr)
2462		return;
2463
2464	db_printf("0x%lx\n", ia64_ttag(addr));
2465}
2466
2467DB_COMMAND(kpte, db_kpte)
2468{
2469	struct ia64_lpte *pte;
2470
2471	if (!have_addr) {
2472		db_printf("usage: kpte <kva>\n");
2473		return;
2474	}
2475	if (addr < VM_MIN_KERNEL_ADDRESS) {
2476		db_printf("kpte: error: invalid <kva>\n");
2477		return;
2478	}
2479	pte = pmap_find_kpte(addr);
2480	db_printf("kpte at %p:\n", pte);
2481	db_printf("  pte  =%016lx\n", pte->pte);
2482	db_printf("  itir =%016lx\n", pte->itir);
2483	db_printf("  tag  =%016lx\n", pte->tag);
2484	db_printf("  chain=%016lx\n", pte->chain);
2485}
2486
2487#endif
2488