pmap.c revision 200200
1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 */
47
48#include <sys/cdefs.h>
49__FBSDID("$FreeBSD: head/sys/ia64/ia64/pmap.c 200200 2009-12-07 00:54:02Z marcel $");
50
51#include <sys/param.h>
52#include <sys/kernel.h>
53#include <sys/lock.h>
54#include <sys/mman.h>
55#include <sys/mutex.h>
56#include <sys/proc.h>
57#include <sys/smp.h>
58#include <sys/sysctl.h>
59#include <sys/systm.h>
60
61#include <vm/vm.h>
62#include <vm/vm_page.h>
63#include <vm/vm_map.h>
64#include <vm/vm_object.h>
65#include <vm/vm_pageout.h>
66#include <vm/uma.h>
67
68#include <machine/md_var.h>
69#include <machine/pal.h>
70
71/*
72 *	Manages physical address maps.
73 *
74 *	In addition to hardware address maps, this
75 *	module is called upon to provide software-use-only
76 *	maps which may or may not be stored in the same
77 *	form as hardware maps.  These pseudo-maps are
78 *	used to store intermediate results from copy
79 *	operations to and from address spaces.
80 *
81 *	Since the information managed by this module is
82 *	also stored by the logical address mapping module,
83 *	this module may throw away valid virtual-to-physical
84 *	mappings at almost any time.  However, invalidations
85 *	of virtual-to-physical mappings must be done as
86 *	requested.
87 *
88 *	In order to cope with hardware architectures which
89 *	make virtual-to-physical map invalidates expensive,
90 *	this module may delay invalidate or reduced protection
91 *	operations until such time as they are actually
92 *	necessary.  This module is given full information as
93 *	to which processors are currently using which maps,
94 *	and to when physical maps must be made correct.
95 */
96
97/*
98 * Following the Linux model, region IDs are allocated in groups of
99 * eight so that a single region ID can be used for as many RRs as we
100 * want by encoding the RR number into the low bits of the ID.
101 *
102 * We reserve region ID 0 for the kernel and allocate the remaining
103 * IDs for user pmaps.
104 *
105 * Region 0..4
106 *	User virtually mapped
107 *
108 * Region 5
109 *	Kernel virtually mapped
110 *
111 * Region 6
112 *	Kernel physically mapped uncacheable
113 *
114 * Region 7
115 *	Kernel physically mapped cacheable
116 */
117
118/* XXX move to a header. */
119extern uint64_t ia64_gateway_page[];
120
121#ifndef PMAP_SHPGPERPROC
122#define PMAP_SHPGPERPROC 200
123#endif
124
125#if !defined(DIAGNOSTIC)
126#define PMAP_INLINE __inline
127#else
128#define PMAP_INLINE
129#endif
130
131#define	pmap_accessed(lpte)		((lpte)->pte & PTE_ACCESSED)
132#define	pmap_dirty(lpte)		((lpte)->pte & PTE_DIRTY)
133#define	pmap_exec(lpte)			((lpte)->pte & PTE_AR_RX)
134#define	pmap_managed(lpte)		((lpte)->pte & PTE_MANAGED)
135#define	pmap_ppn(lpte)			((lpte)->pte & PTE_PPN_MASK)
136#define	pmap_present(lpte)		((lpte)->pte & PTE_PRESENT)
137#define	pmap_prot(lpte)			(((lpte)->pte & PTE_PROT_MASK) >> 56)
138#define	pmap_wired(lpte)		((lpte)->pte & PTE_WIRED)
139
140#define	pmap_clear_accessed(lpte)	(lpte)->pte &= ~PTE_ACCESSED
141#define	pmap_clear_dirty(lpte)		(lpte)->pte &= ~PTE_DIRTY
142#define	pmap_clear_present(lpte)	(lpte)->pte &= ~PTE_PRESENT
143#define	pmap_clear_wired(lpte)		(lpte)->pte &= ~PTE_WIRED
144
145#define	pmap_set_wired(lpte)		(lpte)->pte |= PTE_WIRED
146
147/*
148 * The VHPT bucket head structure.
149 */
150struct ia64_bucket {
151	uint64_t	chain;
152	struct mtx	mutex;
153	u_int		length;
154};
155
156/*
157 * Statically allocated kernel pmap
158 */
159struct pmap kernel_pmap_store;
160
161vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
162vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
163
164/*
165 * Kernel virtual memory management.
166 */
167static int nkpt;
168struct ia64_lpte ***ia64_kptdir;
169#define KPTE_DIR0_INDEX(va) \
170	(((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1))
171#define KPTE_DIR1_INDEX(va) \
172	(((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
173#define KPTE_PTE_INDEX(va) \
174	(((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
175#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
176
177vm_offset_t kernel_vm_end;
178
179/* Values for ptc.e. XXX values for SKI. */
180static uint64_t pmap_ptc_e_base = 0x100000000;
181static uint64_t pmap_ptc_e_count1 = 3;
182static uint64_t pmap_ptc_e_count2 = 2;
183static uint64_t pmap_ptc_e_stride1 = 0x2000;
184static uint64_t pmap_ptc_e_stride2 = 0x100000000;
185struct mtx pmap_ptcmutex;
186
187/*
188 * Data for the RID allocator
189 */
190static int pmap_ridcount;
191static int pmap_rididx;
192static int pmap_ridmapsz;
193static int pmap_ridmax;
194static uint64_t *pmap_ridmap;
195struct mtx pmap_ridmutex;
196
197/*
198 * Data for the pv entry allocation mechanism
199 */
200static uma_zone_t pvzone;
201static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
202
203/*
204 * Data for allocating PTEs for user processes.
205 */
206static uma_zone_t ptezone;
207
208/*
209 * Virtual Hash Page Table (VHPT) data.
210 */
211/* SYSCTL_DECL(_machdep); */
212SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
213
214struct ia64_bucket *pmap_vhpt_bucket;
215
216int pmap_vhpt_nbuckets;
217SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
218    &pmap_vhpt_nbuckets, 0, "");
219
220int pmap_vhpt_log2size = 0;
221TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
222SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
223    &pmap_vhpt_log2size, 0, "");
224
225static int pmap_vhpt_inserts;
226SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
227    &pmap_vhpt_inserts, 0, "");
228
229static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
230SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
231    NULL, 0, pmap_vhpt_population, "I", "");
232
233static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
234
235static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
236static pv_entry_t get_pv_entry(pmap_t locked_pmap);
237
238static void	pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
239		    vm_page_t m, vm_prot_t prot);
240static void	pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va);
241static void	pmap_invalidate_all(pmap_t pmap);
242static int	pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
243		    vm_offset_t va, pv_entry_t pv, int freepte);
244static int	pmap_remove_vhpt(vm_offset_t va);
245static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
246		    vm_page_t m);
247
248vm_offset_t
249pmap_steal_memory(vm_size_t size)
250{
251	vm_size_t bank_size;
252	vm_offset_t pa, va;
253
254	size = round_page(size);
255
256	bank_size = phys_avail[1] - phys_avail[0];
257	while (size > bank_size) {
258		int i;
259		for (i = 0; phys_avail[i+2]; i+= 2) {
260			phys_avail[i] = phys_avail[i+2];
261			phys_avail[i+1] = phys_avail[i+3];
262		}
263		phys_avail[i] = 0;
264		phys_avail[i+1] = 0;
265		if (!phys_avail[0])
266			panic("pmap_steal_memory: out of memory");
267		bank_size = phys_avail[1] - phys_avail[0];
268	}
269
270	pa = phys_avail[0];
271	phys_avail[0] += size;
272
273	va = IA64_PHYS_TO_RR7(pa);
274	bzero((caddr_t) va, size);
275	return va;
276}
277
278static void
279pmap_initialize_vhpt(vm_offset_t vhpt)
280{
281	struct ia64_lpte *pte;
282	u_int i;
283
284	pte = (struct ia64_lpte *)vhpt;
285	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
286		pte[i].pte = 0;
287		pte[i].itir = 0;
288		pte[i].tag = 1UL << 63; /* Invalid tag */
289		pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
290	}
291}
292
293#ifdef SMP
294MALLOC_DECLARE(M_SMP);
295
296vm_offset_t
297pmap_alloc_vhpt(void)
298{
299	vm_offset_t vhpt;
300	vm_size_t size;
301
302	size = 1UL << pmap_vhpt_log2size;
303	vhpt = (uintptr_t)contigmalloc(size, M_SMP, 0, 0UL, ~0UL, size, 0UL);
304	if (vhpt != 0) {
305		vhpt = IA64_PHYS_TO_RR7(ia64_tpa(vhpt));
306		pmap_initialize_vhpt(vhpt);
307	}
308	return (vhpt);
309}
310#endif
311
312/*
313 *	Bootstrap the system enough to run with virtual memory.
314 */
315void
316pmap_bootstrap()
317{
318	struct ia64_pal_result res;
319	vm_offset_t base;
320	size_t size;
321	int i, j, count, ridbits;
322
323	/*
324	 * Query the PAL Code to find the loop parameters for the
325	 * ptc.e instruction.
326	 */
327	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
328	if (res.pal_status != 0)
329		panic("Can't configure ptc.e parameters");
330	pmap_ptc_e_base = res.pal_result[0];
331	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
332	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
333	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
334	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
335	if (bootverbose)
336		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
337		       "stride1=0x%lx, stride2=0x%lx\n",
338		       pmap_ptc_e_base,
339		       pmap_ptc_e_count1,
340		       pmap_ptc_e_count2,
341		       pmap_ptc_e_stride1,
342		       pmap_ptc_e_stride2);
343	mtx_init(&pmap_ptcmutex, "Global PTC lock", NULL, MTX_SPIN);
344
345	/*
346	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
347	 *
348	 * We currently need at least 19 bits in the RID because PID_MAX
349	 * can only be encoded in 17 bits and we need RIDs for 5 regions
350	 * per process. With PID_MAX equalling 99999 this means that we
351	 * need to be able to encode 499995 (=5*PID_MAX).
352	 * The Itanium processor only has 18 bits and the architected
353	 * minimum is exactly that. So, we cannot use a PID based scheme
354	 * in those cases. Enter pmap_ridmap...
355	 * We should avoid the map when running on a processor that has
356	 * implemented enough bits. This means that we should pass the
357	 * process/thread ID to pmap. This we currently don't do, so we
358	 * use the map anyway. However, we don't want to allocate a map
359	 * that is large enough to cover the range dictated by the number
360	 * of bits in the RID, because that may result in a RID map of
361	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
362	 * The bottomline: we create a 32KB map when the processor only
363	 * implements 18 bits (or when we can't figure it out). Otherwise
364	 * we create a 64KB map.
365	 */
366	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
367	if (res.pal_status != 0) {
368		if (bootverbose)
369			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
370		ridbits = 18; /* guaranteed minimum */
371	} else {
372		ridbits = (res.pal_result[1] >> 8) & 0xff;
373		if (bootverbose)
374			printf("Processor supports %d Region ID bits\n",
375			    ridbits);
376	}
377	if (ridbits > 19)
378		ridbits = 19;
379
380	pmap_ridmax = (1 << ridbits);
381	pmap_ridmapsz = pmap_ridmax / 64;
382	pmap_ridmap = (uint64_t *)pmap_steal_memory(pmap_ridmax / 8);
383	pmap_ridmap[0] |= 0xff;
384	pmap_rididx = 0;
385	pmap_ridcount = 8;
386	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
387
388	/*
389	 * Allocate some memory for initial kernel 'page tables'.
390	 */
391	ia64_kptdir = (void *)pmap_steal_memory(PAGE_SIZE);
392	nkpt = 0;
393	kernel_vm_end = VM_MIN_KERNEL_ADDRESS - VM_GATEWAY_SIZE;
394
395	for (i = 0; phys_avail[i+2]; i+= 2)
396		;
397	count = i+2;
398
399	TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
400	if (pmap_vhpt_log2size == 0)
401		pmap_vhpt_log2size = 20;
402	else if (pmap_vhpt_log2size < 15)
403		pmap_vhpt_log2size = 15;
404	else if (pmap_vhpt_log2size > 61)
405		pmap_vhpt_log2size = 61;
406
407	base = 0;
408	size = 1UL << pmap_vhpt_log2size;
409	for (i = 0; i < count; i += 2) {
410		base = (phys_avail[i] + size - 1) & ~(size - 1);
411		if (base + size <= phys_avail[i+1])
412			break;
413	}
414	if (!phys_avail[i])
415		panic("Unable to allocate VHPT");
416
417	if (base != phys_avail[i]) {
418		/* Split this region. */
419		for (j = count; j > i; j -= 2) {
420			phys_avail[j] = phys_avail[j-2];
421			phys_avail[j+1] = phys_avail[j-2+1];
422		}
423		phys_avail[i+1] = base;
424		phys_avail[i+2] = base + size;
425	} else
426		phys_avail[i] = base + size;
427
428	base = IA64_PHYS_TO_RR7(base);
429	PCPU_SET(vhpt, base);
430	if (bootverbose)
431		printf("VHPT: address=%#lx, size=%#lx\n", base, size);
432
433	pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
434	pmap_vhpt_bucket = (void *)pmap_steal_memory(pmap_vhpt_nbuckets *
435	    sizeof(struct ia64_bucket));
436	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
437		/* Stolen memory is zeroed. */
438		mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
439		    MTX_NOWITNESS | MTX_SPIN);
440	}
441
442	pmap_initialize_vhpt(base);
443	map_vhpt(base);
444	ia64_set_pta(base + (1 << 8) + (pmap_vhpt_log2size << 2) + 1);
445	ia64_srlz_i();
446
447	virtual_avail = VM_MIN_KERNEL_ADDRESS;
448	virtual_end = VM_MAX_KERNEL_ADDRESS;
449
450	/*
451	 * Initialize the kernel pmap (which is statically allocated).
452	 */
453	PMAP_LOCK_INIT(kernel_pmap);
454	for (i = 0; i < 5; i++)
455		kernel_pmap->pm_rid[i] = 0;
456	kernel_pmap->pm_active = 1;
457	TAILQ_INIT(&kernel_pmap->pm_pvlist);
458	PCPU_SET(current_pmap, kernel_pmap);
459
460	/*
461	 * Region 5 is mapped via the vhpt.
462	 */
463	ia64_set_rr(IA64_RR_BASE(5),
464		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
465
466	/*
467	 * Region 6 is direct mapped UC and region 7 is direct mapped
468	 * WC. The details of this is controlled by the Alt {I,D}TLB
469	 * handlers. Here we just make sure that they have the largest
470	 * possible page size to minimise TLB usage.
471	 */
472	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (IA64_ID_PAGE_SHIFT << 2));
473	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (IA64_ID_PAGE_SHIFT << 2));
474	ia64_srlz_d();
475
476	/*
477	 * Clear out any random TLB entries left over from booting.
478	 */
479	pmap_invalidate_all(kernel_pmap);
480
481	map_gateway_page();
482}
483
484static int
485pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
486{
487	int count, error, i;
488
489	count = 0;
490	for (i = 0; i < pmap_vhpt_nbuckets; i++)
491		count += pmap_vhpt_bucket[i].length;
492
493	error = SYSCTL_OUT(req, &count, sizeof(count));
494	return (error);
495}
496
497/*
498 *	Initialize a vm_page's machine-dependent fields.
499 */
500void
501pmap_page_init(vm_page_t m)
502{
503
504	TAILQ_INIT(&m->md.pv_list);
505	m->md.pv_list_count = 0;
506}
507
508/*
509 *	Initialize the pmap module.
510 *	Called by vm_init, to initialize any structures that the pmap
511 *	system needs to map virtual memory.
512 */
513void
514pmap_init(void)
515{
516	int shpgperproc = PMAP_SHPGPERPROC;
517
518	/*
519	 * Initialize the address space (zone) for the pv entries.  Set a
520	 * high water mark so that the system can recover from excessive
521	 * numbers of pv entries.
522	 */
523	pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
524	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
525	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
526	pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
527	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
528	pv_entry_high_water = 9 * (pv_entry_max / 10);
529
530	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
531	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
532}
533
534
535/***************************************************
536 * Manipulate TLBs for a pmap
537 ***************************************************/
538
539static void
540pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
541{
542	struct ia64_lpte *pte;
543	struct pcpu *pc;
544	u_int vhpt_ofs;
545
546	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
547		("invalidating TLB for non-current pmap"));
548
549	vhpt_ofs = ia64_thash(va) - PCPU_GET(vhpt);
550	critical_enter();
551	SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
552		pte = (struct ia64_lpte *)(pc->pc_vhpt + vhpt_ofs);
553		if (pte->tag == ia64_ttag(va))
554			pte->tag = 1UL << 63;
555	}
556	critical_exit();
557	mtx_lock_spin(&pmap_ptcmutex);
558	ia64_ptc_ga(va, PAGE_SHIFT << 2);
559	mtx_unlock_spin(&pmap_ptcmutex);
560}
561
562static void
563pmap_invalidate_all_1(void *arg)
564{
565	uint64_t addr;
566	int i, j;
567
568	critical_enter();
569	addr = pmap_ptc_e_base;
570	for (i = 0; i < pmap_ptc_e_count1; i++) {
571		for (j = 0; j < pmap_ptc_e_count2; j++) {
572			ia64_ptc_e(addr);
573			addr += pmap_ptc_e_stride2;
574		}
575		addr += pmap_ptc_e_stride1;
576	}
577	critical_exit();
578}
579
580static void
581pmap_invalidate_all(pmap_t pmap)
582{
583
584	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
585		("invalidating TLB for non-current pmap"));
586
587#ifdef SMP
588	if (mp_ncpus > 1)
589		smp_rendezvous(NULL, pmap_invalidate_all_1, NULL, NULL);
590	else
591#endif
592	pmap_invalidate_all_1(NULL);
593}
594
595static uint32_t
596pmap_allocate_rid(void)
597{
598	uint64_t bit, bits;
599	int rid;
600
601	mtx_lock(&pmap_ridmutex);
602	if (pmap_ridcount == pmap_ridmax)
603		panic("pmap_allocate_rid: All Region IDs used");
604
605	/* Find an index with a free bit. */
606	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
607		pmap_rididx++;
608		if (pmap_rididx == pmap_ridmapsz)
609			pmap_rididx = 0;
610	}
611	rid = pmap_rididx * 64;
612
613	/* Find a free bit. */
614	bit = 1UL;
615	while (bits & bit) {
616		rid++;
617		bit <<= 1;
618	}
619
620	pmap_ridmap[pmap_rididx] |= bit;
621	pmap_ridcount++;
622	mtx_unlock(&pmap_ridmutex);
623
624	return rid;
625}
626
627static void
628pmap_free_rid(uint32_t rid)
629{
630	uint64_t bit;
631	int idx;
632
633	idx = rid / 64;
634	bit = ~(1UL << (rid & 63));
635
636	mtx_lock(&pmap_ridmutex);
637	pmap_ridmap[idx] &= bit;
638	pmap_ridcount--;
639	mtx_unlock(&pmap_ridmutex);
640}
641
642/***************************************************
643 * Page table page management routines.....
644 ***************************************************/
645
646void
647pmap_pinit0(struct pmap *pmap)
648{
649	/* kernel_pmap is the same as any other pmap. */
650	pmap_pinit(pmap);
651}
652
653/*
654 * Initialize a preallocated and zeroed pmap structure,
655 * such as one in a vmspace structure.
656 */
657int
658pmap_pinit(struct pmap *pmap)
659{
660	int i;
661
662	PMAP_LOCK_INIT(pmap);
663	for (i = 0; i < 5; i++)
664		pmap->pm_rid[i] = pmap_allocate_rid();
665	pmap->pm_active = 0;
666	TAILQ_INIT(&pmap->pm_pvlist);
667	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
668	return (1);
669}
670
671/***************************************************
672 * Pmap allocation/deallocation routines.
673 ***************************************************/
674
675/*
676 * Release any resources held by the given physical map.
677 * Called when a pmap initialized by pmap_pinit is being released.
678 * Should only be called if the map contains no valid mappings.
679 */
680void
681pmap_release(pmap_t pmap)
682{
683	int i;
684
685	for (i = 0; i < 5; i++)
686		if (pmap->pm_rid[i])
687			pmap_free_rid(pmap->pm_rid[i]);
688	PMAP_LOCK_DESTROY(pmap);
689}
690
691/*
692 * grow the number of kernel page table entries, if needed
693 */
694void
695pmap_growkernel(vm_offset_t addr)
696{
697	struct ia64_lpte **dir1;
698	struct ia64_lpte *leaf;
699	vm_page_t nkpg;
700
701	while (kernel_vm_end <= addr) {
702		if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64)
703			panic("%s: out of kernel address space", __func__);
704
705		dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)];
706		if (dir1 == NULL) {
707			nkpg = vm_page_alloc(NULL, nkpt++,
708			    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
709			if (!nkpg)
710				panic("%s: cannot add dir. page", __func__);
711
712			dir1 = (struct ia64_lpte **)
713			    IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
714			bzero(dir1, PAGE_SIZE);
715			ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1;
716		}
717
718		nkpg = vm_page_alloc(NULL, nkpt++,
719		    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
720		if (!nkpg)
721			panic("%s: cannot add PTE page", __func__);
722
723		leaf = (struct ia64_lpte *)
724		    IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
725		bzero(leaf, PAGE_SIZE);
726		dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf;
727
728		kernel_vm_end += PAGE_SIZE * NKPTEPG;
729	}
730}
731
732/***************************************************
733 * page management routines.
734 ***************************************************/
735
736/*
737 * free the pv_entry back to the free list
738 */
739static PMAP_INLINE void
740free_pv_entry(pv_entry_t pv)
741{
742	pv_entry_count--;
743	uma_zfree(pvzone, pv);
744}
745
746/*
747 * get a new pv_entry, allocating a block from the system
748 * when needed.
749 */
750static pv_entry_t
751get_pv_entry(pmap_t locked_pmap)
752{
753	static const struct timeval printinterval = { 60, 0 };
754	static struct timeval lastprint;
755	struct vpgqueues *vpq;
756	struct ia64_lpte *pte;
757	pmap_t oldpmap, pmap;
758	pv_entry_t allocated_pv, next_pv, pv;
759	vm_offset_t va;
760	vm_page_t m;
761
762	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
763	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
764	allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
765	if (allocated_pv != NULL) {
766		pv_entry_count++;
767		if (pv_entry_count > pv_entry_high_water)
768			pagedaemon_wakeup();
769		else
770			return (allocated_pv);
771	}
772
773	/*
774	 * Reclaim pv entries: At first, destroy mappings to inactive
775	 * pages.  After that, if a pv entry is still needed, destroy
776	 * mappings to active pages.
777	 */
778	if (ratecheck(&lastprint, &printinterval))
779		printf("Approaching the limit on PV entries, "
780		    "increase the vm.pmap.shpgperproc tunable.\n");
781	vpq = &vm_page_queues[PQ_INACTIVE];
782retry:
783	TAILQ_FOREACH(m, &vpq->pl, pageq) {
784		if (m->hold_count || m->busy)
785			continue;
786		TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
787			va = pv->pv_va;
788			pmap = pv->pv_pmap;
789			/* Avoid deadlock and lock recursion. */
790			if (pmap > locked_pmap)
791				PMAP_LOCK(pmap);
792			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
793				continue;
794			pmap->pm_stats.resident_count--;
795			oldpmap = pmap_switch(pmap);
796			pte = pmap_find_vhpt(va);
797			KASSERT(pte != NULL, ("pte"));
798			pmap_remove_vhpt(va);
799			pmap_invalidate_page(pmap, va);
800			pmap_switch(oldpmap);
801			if (pmap_accessed(pte))
802				vm_page_flag_set(m, PG_REFERENCED);
803			if (pmap_dirty(pte))
804				vm_page_dirty(m);
805			pmap_free_pte(pte, va);
806			TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
807			m->md.pv_list_count--;
808			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
809			if (TAILQ_EMPTY(&m->md.pv_list))
810				vm_page_flag_clear(m, PG_WRITEABLE);
811			if (pmap != locked_pmap)
812				PMAP_UNLOCK(pmap);
813			if (allocated_pv == NULL)
814				allocated_pv = pv;
815			else
816				free_pv_entry(pv);
817		}
818	}
819	if (allocated_pv == NULL) {
820		if (vpq == &vm_page_queues[PQ_INACTIVE]) {
821			vpq = &vm_page_queues[PQ_ACTIVE];
822			goto retry;
823		}
824		panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
825	}
826	return (allocated_pv);
827}
828
829/*
830 * Conditionally create a pv entry.
831 */
832static boolean_t
833pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
834{
835	pv_entry_t pv;
836
837	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
838	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
839	if (pv_entry_count < pv_entry_high_water &&
840	    (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) {
841		pv_entry_count++;
842		pv->pv_va = va;
843		pv->pv_pmap = pmap;
844		TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
845		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
846		m->md.pv_list_count++;
847		return (TRUE);
848	} else
849		return (FALSE);
850}
851
852/*
853 * Add an ia64_lpte to the VHPT.
854 */
855static void
856pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
857{
858	struct ia64_bucket *bckt;
859	struct ia64_lpte *vhpte;
860	uint64_t pte_pa;
861
862	/* Can fault, so get it out of the way. */
863	pte_pa = ia64_tpa((vm_offset_t)pte);
864
865	vhpte = (struct ia64_lpte *)ia64_thash(va);
866	bckt = (struct ia64_bucket *)vhpte->chain;
867
868	mtx_lock_spin(&bckt->mutex);
869	pte->chain = bckt->chain;
870	ia64_mf();
871	bckt->chain = pte_pa;
872
873	pmap_vhpt_inserts++;
874	bckt->length++;
875	mtx_unlock_spin(&bckt->mutex);
876}
877
878/*
879 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
880 * worked or an appropriate error code otherwise.
881 */
882static int
883pmap_remove_vhpt(vm_offset_t va)
884{
885	struct ia64_bucket *bckt;
886	struct ia64_lpte *pte;
887	struct ia64_lpte *lpte;
888	struct ia64_lpte *vhpte;
889	uint64_t chain, tag;
890
891	tag = ia64_ttag(va);
892	vhpte = (struct ia64_lpte *)ia64_thash(va);
893	bckt = (struct ia64_bucket *)vhpte->chain;
894
895	lpte = NULL;
896	mtx_lock_spin(&bckt->mutex);
897	chain = bckt->chain;
898	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
899	while (chain != 0 && pte->tag != tag) {
900		lpte = pte;
901		chain = pte->chain;
902		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
903	}
904	if (chain == 0) {
905		mtx_unlock_spin(&bckt->mutex);
906		return (ENOENT);
907	}
908
909	/* Snip this pv_entry out of the collision chain. */
910	if (lpte == NULL)
911		bckt->chain = pte->chain;
912	else
913		lpte->chain = pte->chain;
914	ia64_mf();
915
916	bckt->length--;
917	mtx_unlock_spin(&bckt->mutex);
918	return (0);
919}
920
921/*
922 * Find the ia64_lpte for the given va, if any.
923 */
924static struct ia64_lpte *
925pmap_find_vhpt(vm_offset_t va)
926{
927	struct ia64_bucket *bckt;
928	struct ia64_lpte *pte;
929	uint64_t chain, tag;
930
931	tag = ia64_ttag(va);
932	pte = (struct ia64_lpte *)ia64_thash(va);
933	bckt = (struct ia64_bucket *)pte->chain;
934
935	mtx_lock_spin(&bckt->mutex);
936	chain = bckt->chain;
937	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
938	while (chain != 0 && pte->tag != tag) {
939		chain = pte->chain;
940		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
941	}
942	mtx_unlock_spin(&bckt->mutex);
943	return ((chain != 0) ? pte : NULL);
944}
945
946/*
947 * Remove an entry from the list of managed mappings.
948 */
949static int
950pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
951{
952	if (!pv) {
953		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
954			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
955				if (pmap == pv->pv_pmap && va == pv->pv_va)
956					break;
957			}
958		} else {
959			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
960				if (va == pv->pv_va)
961					break;
962			}
963		}
964	}
965
966	if (pv) {
967		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
968		m->md.pv_list_count--;
969		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
970			vm_page_flag_clear(m, PG_WRITEABLE);
971
972		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
973		free_pv_entry(pv);
974		return 0;
975	} else {
976		return ENOENT;
977	}
978}
979
980/*
981 * Create a pv entry for page at pa for
982 * (pmap, va).
983 */
984static void
985pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
986{
987	pv_entry_t pv;
988
989	pv = get_pv_entry(pmap);
990	pv->pv_pmap = pmap;
991	pv->pv_va = va;
992
993	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
994	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
995	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
996	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
997	m->md.pv_list_count++;
998}
999
1000/*
1001 *	Routine:	pmap_extract
1002 *	Function:
1003 *		Extract the physical page address associated
1004 *		with the given map/virtual_address pair.
1005 */
1006vm_paddr_t
1007pmap_extract(pmap_t pmap, vm_offset_t va)
1008{
1009	struct ia64_lpte *pte;
1010	pmap_t oldpmap;
1011	vm_paddr_t pa;
1012
1013	pa = 0;
1014	PMAP_LOCK(pmap);
1015	oldpmap = pmap_switch(pmap);
1016	pte = pmap_find_vhpt(va);
1017	if (pte != NULL && pmap_present(pte))
1018		pa = pmap_ppn(pte);
1019	pmap_switch(oldpmap);
1020	PMAP_UNLOCK(pmap);
1021	return (pa);
1022}
1023
1024/*
1025 *	Routine:	pmap_extract_and_hold
1026 *	Function:
1027 *		Atomically extract and hold the physical page
1028 *		with the given pmap and virtual address pair
1029 *		if that mapping permits the given protection.
1030 */
1031vm_page_t
1032pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1033{
1034	struct ia64_lpte *pte;
1035	pmap_t oldpmap;
1036	vm_page_t m;
1037
1038	m = NULL;
1039	vm_page_lock_queues();
1040	PMAP_LOCK(pmap);
1041	oldpmap = pmap_switch(pmap);
1042	pte = pmap_find_vhpt(va);
1043	if (pte != NULL && pmap_present(pte) &&
1044	    (pmap_prot(pte) & prot) == prot) {
1045		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1046		vm_page_hold(m);
1047	}
1048	vm_page_unlock_queues();
1049	pmap_switch(oldpmap);
1050	PMAP_UNLOCK(pmap);
1051	return (m);
1052}
1053
1054/***************************************************
1055 * Low level mapping routines.....
1056 ***************************************************/
1057
1058/*
1059 * Find the kernel lpte for mapping the given virtual address, which
1060 * must be in the part of region 5 which we can cover with our kernel
1061 * 'page tables'.
1062 */
1063static struct ia64_lpte *
1064pmap_find_kpte(vm_offset_t va)
1065{
1066	struct ia64_lpte **dir1;
1067	struct ia64_lpte *leaf;
1068
1069	KASSERT((va >> 61) == 5,
1070		("kernel mapping 0x%lx not in region 5", va));
1071	KASSERT(va < kernel_vm_end,
1072		("kernel mapping 0x%lx out of range", va));
1073
1074	dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)];
1075	leaf = dir1[KPTE_DIR1_INDEX(va)];
1076	return (&leaf[KPTE_PTE_INDEX(va)]);
1077}
1078
1079/*
1080 * Find a pte suitable for mapping a user-space address. If one exists
1081 * in the VHPT, that one will be returned, otherwise a new pte is
1082 * allocated.
1083 */
1084static struct ia64_lpte *
1085pmap_find_pte(vm_offset_t va)
1086{
1087	struct ia64_lpte *pte;
1088
1089	if (va >= VM_MAXUSER_ADDRESS)
1090		return pmap_find_kpte(va);
1091
1092	pte = pmap_find_vhpt(va);
1093	if (pte == NULL) {
1094		pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1095		pte->tag = 1UL << 63;
1096	}
1097	return (pte);
1098}
1099
1100/*
1101 * Free a pte which is now unused. This simply returns it to the zone
1102 * allocator if it is a user mapping. For kernel mappings, clear the
1103 * valid bit to make it clear that the mapping is not currently used.
1104 */
1105static void
1106pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1107{
1108	if (va < VM_MAXUSER_ADDRESS)
1109		uma_zfree(ptezone, pte);
1110	else
1111		pmap_clear_present(pte);
1112}
1113
1114static PMAP_INLINE void
1115pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1116{
1117	static long prot2ar[4] = {
1118		PTE_AR_R,		/* VM_PROT_NONE */
1119		PTE_AR_RW,		/* VM_PROT_WRITE */
1120		PTE_AR_RX|PTE_ED,	/* VM_PROT_EXECUTE */
1121		PTE_AR_RWX|PTE_ED	/* VM_PROT_WRITE|VM_PROT_EXECUTE */
1122	};
1123
1124	pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED);
1125	pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1126	pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1127	    ? PTE_PL_KERN : PTE_PL_USER;
1128	pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1129}
1130
1131/*
1132 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1133 * the pte was orginally valid, then its assumed to already be in the
1134 * VHPT.
1135 * This functions does not set the protection bits.  It's expected
1136 * that those have been set correctly prior to calling this function.
1137 */
1138static void
1139pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1140    boolean_t wired, boolean_t managed)
1141{
1142
1143	pte->pte &= PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED;
1144	pte->pte |= PTE_PRESENT | PTE_MA_WB;
1145	pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1146	pte->pte |= (wired) ? PTE_WIRED : 0;
1147	pte->pte |= pa & PTE_PPN_MASK;
1148
1149	pte->itir = PAGE_SHIFT << 2;
1150
1151	pte->tag = ia64_ttag(va);
1152}
1153
1154/*
1155 * Remove the (possibly managed) mapping represented by pte from the
1156 * given pmap.
1157 */
1158static int
1159pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1160		pv_entry_t pv, int freepte)
1161{
1162	int error;
1163	vm_page_t m;
1164
1165	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1166		("removing pte for non-current pmap"));
1167
1168	/*
1169	 * First remove from the VHPT.
1170	 */
1171	error = pmap_remove_vhpt(va);
1172	if (error)
1173		return (error);
1174
1175	pmap_invalidate_page(pmap, va);
1176
1177	if (pmap_wired(pte))
1178		pmap->pm_stats.wired_count -= 1;
1179
1180	pmap->pm_stats.resident_count -= 1;
1181	if (pmap_managed(pte)) {
1182		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1183		if (pmap_dirty(pte))
1184			vm_page_dirty(m);
1185		if (pmap_accessed(pte))
1186			vm_page_flag_set(m, PG_REFERENCED);
1187
1188		error = pmap_remove_entry(pmap, m, va, pv);
1189	}
1190	if (freepte)
1191		pmap_free_pte(pte, va);
1192
1193	return (error);
1194}
1195
1196/*
1197 * Extract the physical page address associated with a kernel
1198 * virtual address.
1199 */
1200vm_paddr_t
1201pmap_kextract(vm_offset_t va)
1202{
1203	struct ia64_lpte *pte;
1204	vm_offset_t gwpage;
1205
1206	KASSERT(va >= IA64_RR_BASE(5), ("Must be kernel VA"));
1207
1208	/* Regions 6 and 7 are direct mapped. */
1209	if (va >= IA64_RR_BASE(6))
1210		return (IA64_RR_MASK(va));
1211
1212	/* EPC gateway page? */
1213	gwpage = (vm_offset_t)ia64_get_k5();
1214	if (va >= gwpage && va < gwpage + VM_GATEWAY_SIZE)
1215		return (IA64_RR_MASK((vm_offset_t)ia64_gateway_page));
1216
1217	/* Bail out if the virtual address is beyond our limits. */
1218	if (va >= kernel_vm_end)
1219		return (0);
1220
1221	pte = pmap_find_kpte(va);
1222	if (!pmap_present(pte))
1223		return (0);
1224	return (pmap_ppn(pte) | (va & PAGE_MASK));
1225}
1226
1227/*
1228 * Add a list of wired pages to the kva this routine is only used for
1229 * temporary kernel mappings that do not need to have page modification
1230 * or references recorded.  Note that old mappings are simply written
1231 * over.  The page is effectively wired, but it's customary to not have
1232 * the PTE reflect that, nor update statistics.
1233 */
1234void
1235pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1236{
1237	struct ia64_lpte *pte;
1238	int i;
1239
1240	for (i = 0; i < count; i++) {
1241		pte = pmap_find_kpte(va);
1242		if (pmap_present(pte))
1243			pmap_invalidate_page(kernel_pmap, va);
1244		else
1245			pmap_enter_vhpt(pte, va);
1246		pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1247		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1248		va += PAGE_SIZE;
1249	}
1250}
1251
1252/*
1253 * this routine jerks page mappings from the
1254 * kernel -- it is meant only for temporary mappings.
1255 */
1256void
1257pmap_qremove(vm_offset_t va, int count)
1258{
1259	struct ia64_lpte *pte;
1260	int i;
1261
1262	for (i = 0; i < count; i++) {
1263		pte = pmap_find_kpte(va);
1264		if (pmap_present(pte)) {
1265			pmap_remove_vhpt(va);
1266			pmap_invalidate_page(kernel_pmap, va);
1267			pmap_clear_present(pte);
1268		}
1269		va += PAGE_SIZE;
1270	}
1271}
1272
1273/*
1274 * Add a wired page to the kva.  As for pmap_qenter(), it's customary
1275 * to not have the PTE reflect that, nor update statistics.
1276 */
1277void
1278pmap_kenter(vm_offset_t va, vm_offset_t pa)
1279{
1280	struct ia64_lpte *pte;
1281
1282	pte = pmap_find_kpte(va);
1283	if (pmap_present(pte))
1284		pmap_invalidate_page(kernel_pmap, va);
1285	else
1286		pmap_enter_vhpt(pte, va);
1287	pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1288	pmap_set_pte(pte, va, pa, FALSE, FALSE);
1289}
1290
1291/*
1292 * Remove a page from the kva
1293 */
1294void
1295pmap_kremove(vm_offset_t va)
1296{
1297	struct ia64_lpte *pte;
1298
1299	pte = pmap_find_kpte(va);
1300	if (pmap_present(pte)) {
1301		pmap_remove_vhpt(va);
1302		pmap_invalidate_page(kernel_pmap, va);
1303		pmap_clear_present(pte);
1304	}
1305}
1306
1307/*
1308 *	Used to map a range of physical addresses into kernel
1309 *	virtual address space.
1310 *
1311 *	The value passed in '*virt' is a suggested virtual address for
1312 *	the mapping. Architectures which can support a direct-mapped
1313 *	physical to virtual region can return the appropriate address
1314 *	within that region, leaving '*virt' unchanged. Other
1315 *	architectures should map the pages starting at '*virt' and
1316 *	update '*virt' with the first usable address after the mapped
1317 *	region.
1318 */
1319vm_offset_t
1320pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1321{
1322	return IA64_PHYS_TO_RR7(start);
1323}
1324
1325/*
1326 * Remove a single page from a process address space
1327 */
1328static void
1329pmap_remove_page(pmap_t pmap, vm_offset_t va)
1330{
1331	struct ia64_lpte *pte;
1332
1333	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1334		("removing page for non-current pmap"));
1335
1336	pte = pmap_find_vhpt(va);
1337	if (pte != NULL)
1338		pmap_remove_pte(pmap, pte, va, 0, 1);
1339	return;
1340}
1341
1342/*
1343 *	Remove the given range of addresses from the specified map.
1344 *
1345 *	It is assumed that the start and end are properly
1346 *	rounded to the page size.
1347 */
1348void
1349pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1350{
1351	pmap_t oldpmap;
1352	vm_offset_t va;
1353	pv_entry_t npv, pv;
1354	struct ia64_lpte *pte;
1355
1356	if (pmap->pm_stats.resident_count == 0)
1357		return;
1358
1359	vm_page_lock_queues();
1360	PMAP_LOCK(pmap);
1361	oldpmap = pmap_switch(pmap);
1362
1363	/*
1364	 * special handling of removing one page.  a very
1365	 * common operation and easy to short circuit some
1366	 * code.
1367	 */
1368	if (sva + PAGE_SIZE == eva) {
1369		pmap_remove_page(pmap, sva);
1370		goto out;
1371	}
1372
1373	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1374		TAILQ_FOREACH_SAFE(pv, &pmap->pm_pvlist, pv_plist, npv) {
1375			va = pv->pv_va;
1376			if (va >= sva && va < eva) {
1377				pte = pmap_find_vhpt(va);
1378				KASSERT(pte != NULL, ("pte"));
1379				pmap_remove_pte(pmap, pte, va, pv, 1);
1380			}
1381		}
1382	} else {
1383		for (va = sva; va < eva; va += PAGE_SIZE) {
1384			pte = pmap_find_vhpt(va);
1385			if (pte != NULL)
1386				pmap_remove_pte(pmap, pte, va, 0, 1);
1387		}
1388	}
1389
1390out:
1391	vm_page_unlock_queues();
1392	pmap_switch(oldpmap);
1393	PMAP_UNLOCK(pmap);
1394}
1395
1396/*
1397 *	Routine:	pmap_remove_all
1398 *	Function:
1399 *		Removes this physical page from
1400 *		all physical maps in which it resides.
1401 *		Reflects back modify bits to the pager.
1402 *
1403 *	Notes:
1404 *		Original versions of this routine were very
1405 *		inefficient because they iteratively called
1406 *		pmap_remove (slow...)
1407 */
1408
1409void
1410pmap_remove_all(vm_page_t m)
1411{
1412	pmap_t oldpmap;
1413	pv_entry_t pv;
1414
1415#if defined(DIAGNOSTIC)
1416	/*
1417	 * XXX This makes pmap_remove_all() illegal for non-managed pages!
1418	 */
1419	if (m->flags & PG_FICTITIOUS) {
1420		panic("pmap_remove_all: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1421	}
1422#endif
1423	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1424	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1425		struct ia64_lpte *pte;
1426		pmap_t pmap = pv->pv_pmap;
1427		vm_offset_t va = pv->pv_va;
1428
1429		PMAP_LOCK(pmap);
1430		oldpmap = pmap_switch(pmap);
1431		pte = pmap_find_vhpt(va);
1432		KASSERT(pte != NULL, ("pte"));
1433		if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1434			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1435		pmap_remove_pte(pmap, pte, va, pv, 1);
1436		pmap_switch(oldpmap);
1437		PMAP_UNLOCK(pmap);
1438	}
1439	vm_page_flag_clear(m, PG_WRITEABLE);
1440}
1441
1442/*
1443 *	Set the physical protection on the
1444 *	specified range of this map as requested.
1445 */
1446void
1447pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1448{
1449	pmap_t oldpmap;
1450	struct ia64_lpte *pte;
1451
1452	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1453		pmap_remove(pmap, sva, eva);
1454		return;
1455	}
1456
1457	if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
1458	    (VM_PROT_WRITE|VM_PROT_EXECUTE))
1459		return;
1460
1461	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1462		panic("pmap_protect: unaligned addresses");
1463
1464	vm_page_lock_queues();
1465	PMAP_LOCK(pmap);
1466	oldpmap = pmap_switch(pmap);
1467	for ( ; sva < eva; sva += PAGE_SIZE) {
1468		/* If page is invalid, skip this page */
1469		pte = pmap_find_vhpt(sva);
1470		if (pte == NULL)
1471			continue;
1472
1473		/* If there's no change, skip it too */
1474		if (pmap_prot(pte) == prot)
1475			continue;
1476
1477		if (pmap_managed(pte)) {
1478			vm_offset_t pa = pmap_ppn(pte);
1479			vm_page_t m = PHYS_TO_VM_PAGE(pa);
1480
1481			if (pmap_dirty(pte)) {
1482				vm_page_dirty(m);
1483				pmap_clear_dirty(pte);
1484			}
1485
1486			if (pmap_accessed(pte)) {
1487				vm_page_flag_set(m, PG_REFERENCED);
1488				pmap_clear_accessed(pte);
1489			}
1490		}
1491
1492		if (prot & VM_PROT_EXECUTE)
1493			ia64_sync_icache(sva, PAGE_SIZE);
1494
1495		pmap_pte_prot(pmap, pte, prot);
1496		pmap_invalidate_page(pmap, sva);
1497	}
1498	vm_page_unlock_queues();
1499	pmap_switch(oldpmap);
1500	PMAP_UNLOCK(pmap);
1501}
1502
1503/*
1504 *	Insert the given physical page (p) at
1505 *	the specified virtual address (v) in the
1506 *	target physical map with the protection requested.
1507 *
1508 *	If specified, the page will be wired down, meaning
1509 *	that the related pte can not be reclaimed.
1510 *
1511 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1512 *	or lose information.  That is, this routine must actually
1513 *	insert this page into the given map NOW.
1514 */
1515void
1516pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
1517    vm_prot_t prot, boolean_t wired)
1518{
1519	pmap_t oldpmap;
1520	vm_offset_t pa;
1521	vm_offset_t opa;
1522	struct ia64_lpte origpte;
1523	struct ia64_lpte *pte;
1524	boolean_t icache_inval, managed;
1525
1526	vm_page_lock_queues();
1527	PMAP_LOCK(pmap);
1528	oldpmap = pmap_switch(pmap);
1529
1530	va &= ~PAGE_MASK;
1531#ifdef DIAGNOSTIC
1532	if (va > VM_MAX_KERNEL_ADDRESS)
1533		panic("pmap_enter: toobig");
1534#endif
1535
1536	/*
1537	 * Find (or create) a pte for the given mapping.
1538	 */
1539	while ((pte = pmap_find_pte(va)) == NULL) {
1540		pmap_switch(oldpmap);
1541		PMAP_UNLOCK(pmap);
1542		vm_page_unlock_queues();
1543		VM_WAIT;
1544		vm_page_lock_queues();
1545		PMAP_LOCK(pmap);
1546		oldpmap = pmap_switch(pmap);
1547	}
1548	origpte = *pte;
1549	if (!pmap_present(pte)) {
1550		opa = ~0UL;
1551		pmap_enter_vhpt(pte, va);
1552	} else
1553		opa = pmap_ppn(pte);
1554	managed = FALSE;
1555	pa = VM_PAGE_TO_PHYS(m);
1556
1557	icache_inval = (prot & VM_PROT_EXECUTE) ? TRUE : FALSE;
1558
1559	/*
1560	 * Mapping has not changed, must be protection or wiring change.
1561	 */
1562	if (opa == pa) {
1563		/*
1564		 * Wiring change, just update stats. We don't worry about
1565		 * wiring PT pages as they remain resident as long as there
1566		 * are valid mappings in them. Hence, if a user page is wired,
1567		 * the PT page will be also.
1568		 */
1569		if (wired && !pmap_wired(&origpte))
1570			pmap->pm_stats.wired_count++;
1571		else if (!wired && pmap_wired(&origpte))
1572			pmap->pm_stats.wired_count--;
1573
1574		managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1575
1576		/*
1577		 * We might be turning off write access to the page,
1578		 * so we go ahead and sense modify status. Otherwise,
1579		 * we can avoid I-cache invalidation if the page
1580		 * already allowed execution.
1581		 */
1582		if (managed && pmap_dirty(&origpte))
1583			vm_page_dirty(m);
1584		else if (pmap_exec(&origpte))
1585			icache_inval = FALSE;
1586
1587		pmap_invalidate_page(pmap, va);
1588		goto validate;
1589	}
1590
1591	/*
1592	 * Mapping has changed, invalidate old range and fall
1593	 * through to handle validating new mapping.
1594	 */
1595	if (opa != ~0UL) {
1596		pmap_remove_pte(pmap, pte, va, 0, 0);
1597		pmap_enter_vhpt(pte, va);
1598	}
1599
1600	/*
1601	 * Enter on the PV list if part of our managed memory.
1602	 */
1603	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1604		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1605		    ("pmap_enter: managed mapping within the clean submap"));
1606		pmap_insert_entry(pmap, va, m);
1607		managed = TRUE;
1608	}
1609
1610	/*
1611	 * Increment counters
1612	 */
1613	pmap->pm_stats.resident_count++;
1614	if (wired)
1615		pmap->pm_stats.wired_count++;
1616
1617validate:
1618
1619	/*
1620	 * Now validate mapping with desired protection/wiring. This
1621	 * adds the pte to the VHPT if necessary.
1622	 */
1623	pmap_pte_prot(pmap, pte, prot);
1624	pmap_set_pte(pte, va, pa, wired, managed);
1625
1626	/* Invalidate the I-cache when needed. */
1627	if (icache_inval)
1628		ia64_sync_icache(va, PAGE_SIZE);
1629
1630	if ((prot & VM_PROT_WRITE) != 0)
1631		vm_page_flag_set(m, PG_WRITEABLE);
1632	vm_page_unlock_queues();
1633	pmap_switch(oldpmap);
1634	PMAP_UNLOCK(pmap);
1635}
1636
1637/*
1638 * Maps a sequence of resident pages belonging to the same object.
1639 * The sequence begins with the given page m_start.  This page is
1640 * mapped at the given virtual address start.  Each subsequent page is
1641 * mapped at a virtual address that is offset from start by the same
1642 * amount as the page is offset from m_start within the object.  The
1643 * last page in the sequence is the page with the largest offset from
1644 * m_start that can be mapped at a virtual address less than the given
1645 * virtual address end.  Not every virtual page between start and end
1646 * is mapped; only those for which a resident page exists with the
1647 * corresponding offset from m_start are mapped.
1648 */
1649void
1650pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1651    vm_page_t m_start, vm_prot_t prot)
1652{
1653	pmap_t oldpmap;
1654	vm_page_t m;
1655	vm_pindex_t diff, psize;
1656
1657	VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
1658	psize = atop(end - start);
1659	m = m_start;
1660	PMAP_LOCK(pmap);
1661	oldpmap = pmap_switch(pmap);
1662	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1663		pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1664		m = TAILQ_NEXT(m, listq);
1665	}
1666	pmap_switch(oldpmap);
1667 	PMAP_UNLOCK(pmap);
1668}
1669
1670/*
1671 * this code makes some *MAJOR* assumptions:
1672 * 1. Current pmap & pmap exists.
1673 * 2. Not wired.
1674 * 3. Read access.
1675 * 4. No page table pages.
1676 * but is *MUCH* faster than pmap_enter...
1677 */
1678
1679void
1680pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1681{
1682	pmap_t oldpmap;
1683
1684	PMAP_LOCK(pmap);
1685	oldpmap = pmap_switch(pmap);
1686	pmap_enter_quick_locked(pmap, va, m, prot);
1687	pmap_switch(oldpmap);
1688	PMAP_UNLOCK(pmap);
1689}
1690
1691static void
1692pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1693    vm_prot_t prot)
1694{
1695	struct ia64_lpte *pte;
1696	boolean_t managed;
1697
1698	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1699	    (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
1700	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1701	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1702	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1703
1704	if ((pte = pmap_find_pte(va)) == NULL)
1705		return;
1706
1707	if (!pmap_present(pte)) {
1708		/* Enter on the PV list if the page is managed. */
1709		if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1710			if (!pmap_try_insert_pv_entry(pmap, va, m)) {
1711				pmap_free_pte(pte, va);
1712				return;
1713			}
1714			managed = TRUE;
1715		} else
1716			managed = FALSE;
1717
1718		/* Increment counters. */
1719		pmap->pm_stats.resident_count++;
1720
1721		/* Initialise with R/O protection and enter into VHPT. */
1722		pmap_enter_vhpt(pte, va);
1723		pmap_pte_prot(pmap, pte,
1724		    prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1725		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1726
1727		if (prot & VM_PROT_EXECUTE)
1728			ia64_sync_icache(va, PAGE_SIZE);
1729	}
1730}
1731
1732/*
1733 * pmap_object_init_pt preloads the ptes for a given object
1734 * into the specified pmap.  This eliminates the blast of soft
1735 * faults on process startup and immediately after an mmap.
1736 */
1737void
1738pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1739		    vm_object_t object, vm_pindex_t pindex,
1740		    vm_size_t size)
1741{
1742
1743	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1744	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1745	    ("pmap_object_init_pt: non-device object"));
1746}
1747
1748/*
1749 *	Routine:	pmap_change_wiring
1750 *	Function:	Change the wiring attribute for a map/virtual-address
1751 *			pair.
1752 *	In/out conditions:
1753 *			The mapping must already exist in the pmap.
1754 */
1755void
1756pmap_change_wiring(pmap, va, wired)
1757	register pmap_t pmap;
1758	vm_offset_t va;
1759	boolean_t wired;
1760{
1761	pmap_t oldpmap;
1762	struct ia64_lpte *pte;
1763
1764	PMAP_LOCK(pmap);
1765	oldpmap = pmap_switch(pmap);
1766
1767	pte = pmap_find_vhpt(va);
1768	KASSERT(pte != NULL, ("pte"));
1769	if (wired && !pmap_wired(pte)) {
1770		pmap->pm_stats.wired_count++;
1771		pmap_set_wired(pte);
1772	} else if (!wired && pmap_wired(pte)) {
1773		pmap->pm_stats.wired_count--;
1774		pmap_clear_wired(pte);
1775	}
1776
1777	pmap_switch(oldpmap);
1778	PMAP_UNLOCK(pmap);
1779}
1780
1781
1782
1783/*
1784 *	Copy the range specified by src_addr/len
1785 *	from the source map to the range dst_addr/len
1786 *	in the destination map.
1787 *
1788 *	This routine is only advisory and need not do anything.
1789 */
1790
1791void
1792pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
1793	  vm_offset_t src_addr)
1794{
1795}
1796
1797
1798/*
1799 *	pmap_zero_page zeros the specified hardware page by
1800 *	mapping it into virtual memory and using bzero to clear
1801 *	its contents.
1802 */
1803
1804void
1805pmap_zero_page(vm_page_t m)
1806{
1807	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1808	bzero((caddr_t) va, PAGE_SIZE);
1809}
1810
1811
1812/*
1813 *	pmap_zero_page_area zeros the specified hardware page by
1814 *	mapping it into virtual memory and using bzero to clear
1815 *	its contents.
1816 *
1817 *	off and size must reside within a single page.
1818 */
1819
1820void
1821pmap_zero_page_area(vm_page_t m, int off, int size)
1822{
1823	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1824	bzero((char *)(caddr_t)va + off, size);
1825}
1826
1827
1828/*
1829 *	pmap_zero_page_idle zeros the specified hardware page by
1830 *	mapping it into virtual memory and using bzero to clear
1831 *	its contents.  This is for the vm_idlezero process.
1832 */
1833
1834void
1835pmap_zero_page_idle(vm_page_t m)
1836{
1837	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1838	bzero((caddr_t) va, PAGE_SIZE);
1839}
1840
1841
1842/*
1843 *	pmap_copy_page copies the specified (machine independent)
1844 *	page by mapping the page into virtual memory and using
1845 *	bcopy to copy the page, one machine dependent page at a
1846 *	time.
1847 */
1848void
1849pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1850{
1851	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
1852	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
1853	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
1854}
1855
1856/*
1857 * Returns true if the pmap's pv is one of the first
1858 * 16 pvs linked to from this page.  This count may
1859 * be changed upwards or downwards in the future; it
1860 * is only necessary that true be returned for a small
1861 * subset of pmaps for proper page aging.
1862 */
1863boolean_t
1864pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
1865{
1866	pv_entry_t pv;
1867	int loops = 0;
1868
1869	if (m->flags & PG_FICTITIOUS)
1870		return FALSE;
1871
1872	/*
1873	 * Not found, check current mappings returning immediately if found.
1874	 */
1875	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1876	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1877		if (pv->pv_pmap == pmap) {
1878			return TRUE;
1879		}
1880		loops++;
1881		if (loops >= 16)
1882			break;
1883	}
1884	return (FALSE);
1885}
1886
1887/*
1888 *	pmap_page_wired_mappings:
1889 *
1890 *	Return the number of managed mappings to the given physical page
1891 *	that are wired.
1892 */
1893int
1894pmap_page_wired_mappings(vm_page_t m)
1895{
1896	struct ia64_lpte *pte;
1897	pmap_t oldpmap, pmap;
1898	pv_entry_t pv;
1899	int count;
1900
1901	count = 0;
1902	if ((m->flags & PG_FICTITIOUS) != 0)
1903		return (count);
1904	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1905	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1906		pmap = pv->pv_pmap;
1907		PMAP_LOCK(pmap);
1908		oldpmap = pmap_switch(pmap);
1909		pte = pmap_find_vhpt(pv->pv_va);
1910		KASSERT(pte != NULL, ("pte"));
1911		if (pmap_wired(pte))
1912			count++;
1913		pmap_switch(oldpmap);
1914		PMAP_UNLOCK(pmap);
1915	}
1916	return (count);
1917}
1918
1919/*
1920 * Remove all pages from specified address space
1921 * this aids process exit speeds.  Also, this code
1922 * is special cased for current process only, but
1923 * can have the more generic (and slightly slower)
1924 * mode enabled.  This is much faster than pmap_remove
1925 * in the case of running down an entire address space.
1926 */
1927void
1928pmap_remove_pages(pmap_t pmap)
1929{
1930	pmap_t oldpmap;
1931	pv_entry_t pv, npv;
1932
1933	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
1934		printf("warning: pmap_remove_pages called with non-current pmap\n");
1935		return;
1936	}
1937
1938	vm_page_lock_queues();
1939	PMAP_LOCK(pmap);
1940	oldpmap = pmap_switch(pmap);
1941
1942	for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
1943		struct ia64_lpte *pte;
1944
1945		npv = TAILQ_NEXT(pv, pv_plist);
1946
1947		pte = pmap_find_vhpt(pv->pv_va);
1948		KASSERT(pte != NULL, ("pte"));
1949		if (!pmap_wired(pte))
1950			pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
1951	}
1952
1953	pmap_switch(oldpmap);
1954	PMAP_UNLOCK(pmap);
1955	vm_page_unlock_queues();
1956}
1957
1958/*
1959 *	pmap_ts_referenced:
1960 *
1961 *	Return a count of reference bits for a page, clearing those bits.
1962 *	It is not necessary for every reference bit to be cleared, but it
1963 *	is necessary that 0 only be returned when there are truly no
1964 *	reference bits set.
1965 *
1966 *	XXX: The exact number of bits to check and clear is a matter that
1967 *	should be tested and standardized at some point in the future for
1968 *	optimal aging of shared pages.
1969 */
1970int
1971pmap_ts_referenced(vm_page_t m)
1972{
1973	struct ia64_lpte *pte;
1974	pmap_t oldpmap;
1975	pv_entry_t pv;
1976	int count = 0;
1977
1978	if (m->flags & PG_FICTITIOUS)
1979		return 0;
1980
1981	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1982		PMAP_LOCK(pv->pv_pmap);
1983		oldpmap = pmap_switch(pv->pv_pmap);
1984		pte = pmap_find_vhpt(pv->pv_va);
1985		KASSERT(pte != NULL, ("pte"));
1986		if (pmap_accessed(pte)) {
1987			count++;
1988			pmap_clear_accessed(pte);
1989			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
1990		}
1991		pmap_switch(oldpmap);
1992		PMAP_UNLOCK(pv->pv_pmap);
1993	}
1994
1995	return count;
1996}
1997
1998/*
1999 *	pmap_is_modified:
2000 *
2001 *	Return whether or not the specified physical page was modified
2002 *	in any physical maps.
2003 */
2004boolean_t
2005pmap_is_modified(vm_page_t m)
2006{
2007	struct ia64_lpte *pte;
2008	pmap_t oldpmap;
2009	pv_entry_t pv;
2010	boolean_t rv;
2011
2012	rv = FALSE;
2013	if (m->flags & PG_FICTITIOUS)
2014		return (rv);
2015
2016	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2017		PMAP_LOCK(pv->pv_pmap);
2018		oldpmap = pmap_switch(pv->pv_pmap);
2019		pte = pmap_find_vhpt(pv->pv_va);
2020		pmap_switch(oldpmap);
2021		KASSERT(pte != NULL, ("pte"));
2022		rv = pmap_dirty(pte) ? TRUE : FALSE;
2023		PMAP_UNLOCK(pv->pv_pmap);
2024		if (rv)
2025			break;
2026	}
2027
2028	return (rv);
2029}
2030
2031/*
2032 *	pmap_is_prefaultable:
2033 *
2034 *	Return whether or not the specified virtual address is elgible
2035 *	for prefault.
2036 */
2037boolean_t
2038pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2039{
2040	struct ia64_lpte *pte;
2041
2042	pte = pmap_find_vhpt(addr);
2043	if (pte != NULL && pmap_present(pte))
2044		return (FALSE);
2045	return (TRUE);
2046}
2047
2048/*
2049 *	Clear the modify bits on the specified physical page.
2050 */
2051void
2052pmap_clear_modify(vm_page_t m)
2053{
2054	struct ia64_lpte *pte;
2055	pmap_t oldpmap;
2056	pv_entry_t pv;
2057
2058	if (m->flags & PG_FICTITIOUS)
2059		return;
2060
2061	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2062		PMAP_LOCK(pv->pv_pmap);
2063		oldpmap = pmap_switch(pv->pv_pmap);
2064		pte = pmap_find_vhpt(pv->pv_va);
2065		KASSERT(pte != NULL, ("pte"));
2066		if (pmap_dirty(pte)) {
2067			pmap_clear_dirty(pte);
2068			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2069		}
2070		pmap_switch(oldpmap);
2071		PMAP_UNLOCK(pv->pv_pmap);
2072	}
2073}
2074
2075/*
2076 *	pmap_clear_reference:
2077 *
2078 *	Clear the reference bit on the specified physical page.
2079 */
2080void
2081pmap_clear_reference(vm_page_t m)
2082{
2083	struct ia64_lpte *pte;
2084	pmap_t oldpmap;
2085	pv_entry_t pv;
2086
2087	if (m->flags & PG_FICTITIOUS)
2088		return;
2089
2090	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2091		PMAP_LOCK(pv->pv_pmap);
2092		oldpmap = pmap_switch(pv->pv_pmap);
2093		pte = pmap_find_vhpt(pv->pv_va);
2094		KASSERT(pte != NULL, ("pte"));
2095		if (pmap_accessed(pte)) {
2096			pmap_clear_accessed(pte);
2097			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2098		}
2099		pmap_switch(oldpmap);
2100		PMAP_UNLOCK(pv->pv_pmap);
2101	}
2102}
2103
2104/*
2105 * Clear the write and modified bits in each of the given page's mappings.
2106 */
2107void
2108pmap_remove_write(vm_page_t m)
2109{
2110	struct ia64_lpte *pte;
2111	pmap_t oldpmap, pmap;
2112	pv_entry_t pv;
2113	vm_prot_t prot;
2114
2115	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2116	if ((m->flags & PG_FICTITIOUS) != 0 ||
2117	    (m->flags & PG_WRITEABLE) == 0)
2118		return;
2119	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2120		pmap = pv->pv_pmap;
2121		PMAP_LOCK(pmap);
2122		oldpmap = pmap_switch(pmap);
2123		pte = pmap_find_vhpt(pv->pv_va);
2124		KASSERT(pte != NULL, ("pte"));
2125		prot = pmap_prot(pte);
2126		if ((prot & VM_PROT_WRITE) != 0) {
2127			if (pmap_dirty(pte)) {
2128				vm_page_dirty(m);
2129				pmap_clear_dirty(pte);
2130			}
2131			prot &= ~VM_PROT_WRITE;
2132			pmap_pte_prot(pmap, pte, prot);
2133			pmap_invalidate_page(pmap, pv->pv_va);
2134		}
2135		pmap_switch(oldpmap);
2136		PMAP_UNLOCK(pmap);
2137	}
2138	vm_page_flag_clear(m, PG_WRITEABLE);
2139}
2140
2141/*
2142 * Map a set of physical memory pages into the kernel virtual
2143 * address space. Return a pointer to where it is mapped. This
2144 * routine is intended to be used for mapping device memory,
2145 * NOT real memory.
2146 */
2147void *
2148pmap_mapdev(vm_offset_t pa, vm_size_t size)
2149{
2150	return (void*) IA64_PHYS_TO_RR6(pa);
2151}
2152
2153/*
2154 * 'Unmap' a range mapped by pmap_mapdev().
2155 */
2156void
2157pmap_unmapdev(vm_offset_t va, vm_size_t size)
2158{
2159	return;
2160}
2161
2162/*
2163 * perform the pmap work for mincore
2164 */
2165int
2166pmap_mincore(pmap_t pmap, vm_offset_t addr)
2167{
2168	pmap_t oldpmap;
2169	struct ia64_lpte *pte, tpte;
2170	int val = 0;
2171
2172	PMAP_LOCK(pmap);
2173	oldpmap = pmap_switch(pmap);
2174	pte = pmap_find_vhpt(addr);
2175	if (pte != NULL) {
2176		tpte = *pte;
2177		pte = &tpte;
2178	}
2179	pmap_switch(oldpmap);
2180	PMAP_UNLOCK(pmap);
2181
2182	if (pte == NULL)
2183		return 0;
2184
2185	if (pmap_present(pte)) {
2186		vm_page_t m;
2187		vm_offset_t pa;
2188
2189		val = MINCORE_INCORE;
2190		if (!pmap_managed(pte))
2191			return val;
2192
2193		pa = pmap_ppn(pte);
2194
2195		m = PHYS_TO_VM_PAGE(pa);
2196
2197		/*
2198		 * Modified by us
2199		 */
2200		if (pmap_dirty(pte))
2201			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2202		else {
2203			/*
2204			 * Modified by someone
2205			 */
2206			vm_page_lock_queues();
2207			if (pmap_is_modified(m))
2208				val |= MINCORE_MODIFIED_OTHER;
2209			vm_page_unlock_queues();
2210		}
2211		/*
2212		 * Referenced by us
2213		 */
2214		if (pmap_accessed(pte))
2215			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2216		else {
2217			/*
2218			 * Referenced by someone
2219			 */
2220			vm_page_lock_queues();
2221			if (pmap_ts_referenced(m)) {
2222				val |= MINCORE_REFERENCED_OTHER;
2223				vm_page_flag_set(m, PG_REFERENCED);
2224			}
2225			vm_page_unlock_queues();
2226		}
2227	}
2228	return val;
2229}
2230
2231void
2232pmap_activate(struct thread *td)
2233{
2234	pmap_switch(vmspace_pmap(td->td_proc->p_vmspace));
2235}
2236
2237pmap_t
2238pmap_switch(pmap_t pm)
2239{
2240	pmap_t prevpm;
2241	int i;
2242
2243	critical_enter();
2244	prevpm = PCPU_GET(current_pmap);
2245	if (prevpm == pm)
2246		goto out;
2247	if (prevpm != NULL)
2248		atomic_clear_32(&prevpm->pm_active, PCPU_GET(cpumask));
2249	if (pm == NULL) {
2250		for (i = 0; i < 5; i++) {
2251			ia64_set_rr(IA64_RR_BASE(i),
2252			    (i << 8)|(PAGE_SHIFT << 2)|1);
2253		}
2254	} else {
2255		for (i = 0; i < 5; i++) {
2256			ia64_set_rr(IA64_RR_BASE(i),
2257			    (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2258		}
2259		atomic_set_32(&pm->pm_active, PCPU_GET(cpumask));
2260	}
2261	PCPU_SET(current_pmap, pm);
2262	ia64_srlz_d();
2263
2264out:
2265	critical_exit();
2266	return (prevpm);
2267}
2268
2269void
2270pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2271{
2272	pmap_t oldpm;
2273	struct ia64_lpte *pte;
2274	vm_offset_t lim;
2275	vm_size_t len;
2276
2277	sz += va & 31;
2278	va &= ~31;
2279	sz = (sz + 31) & ~31;
2280
2281	PMAP_LOCK(pm);
2282	oldpm = pmap_switch(pm);
2283	while (sz > 0) {
2284		lim = round_page(va);
2285		len = MIN(lim - va, sz);
2286		pte = pmap_find_vhpt(va);
2287		if (pte != NULL && pmap_present(pte))
2288			ia64_sync_icache(va, len);
2289		va += len;
2290		sz -= len;
2291	}
2292	pmap_switch(oldpm);
2293	PMAP_UNLOCK(pm);
2294}
2295
2296/*
2297 *	Increase the starting virtual address of the given mapping if a
2298 *	different alignment might result in more superpage mappings.
2299 */
2300void
2301pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2302    vm_offset_t *addr, vm_size_t size)
2303{
2304}
2305
2306#include "opt_ddb.h"
2307
2308#ifdef DDB
2309
2310#include <ddb/ddb.h>
2311
2312static const char*	psnames[] = {
2313	"1B",	"2B",	"4B",	"8B",
2314	"16B",	"32B",	"64B",	"128B",
2315	"256B",	"512B",	"1K",	"2K",
2316	"4K",	"8K",	"16K",	"32K",
2317	"64K",	"128K",	"256K",	"512K",
2318	"1M",	"2M",	"4M",	"8M",
2319	"16M",	"32M",	"64M",	"128M",
2320	"256M",	"512M",	"1G",	"2G"
2321};
2322
2323static void
2324print_trs(int type)
2325{
2326	struct ia64_pal_result res;
2327	int i, maxtr;
2328	struct {
2329		pt_entry_t	pte;
2330		uint64_t	itir;
2331		uint64_t	ifa;
2332		struct ia64_rr	rr;
2333	} buf;
2334	static const char *manames[] = {
2335		"WB",	"bad",	"bad",	"bad",
2336		"UC",	"UCE",	"WC",	"NaT",
2337	};
2338
2339	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2340	if (res.pal_status != 0) {
2341		db_printf("Can't get VM summary\n");
2342		return;
2343	}
2344
2345	if (type == 0)
2346		maxtr = (res.pal_result[0] >> 40) & 0xff;
2347	else
2348		maxtr = (res.pal_result[0] >> 32) & 0xff;
2349
2350	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2351	for (i = 0; i <= maxtr; i++) {
2352		bzero(&buf, sizeof(buf));
2353		res = ia64_call_pal_stacked_physical
2354			(PAL_VM_TR_READ, i, type, ia64_tpa((uint64_t) &buf));
2355		if (!(res.pal_result[0] & 1))
2356			buf.pte &= ~PTE_AR_MASK;
2357		if (!(res.pal_result[0] & 2))
2358			buf.pte &= ~PTE_PL_MASK;
2359		if (!(res.pal_result[0] & 4))
2360			pmap_clear_dirty(&buf);
2361		if (!(res.pal_result[0] & 8))
2362			buf.pte &= ~PTE_MA_MASK;
2363		db_printf("%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s "
2364		    "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2365		    buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2366		    psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2367		    (buf.pte & PTE_ED) ? 1 : 0,
2368		    (int)(buf.pte & PTE_AR_MASK) >> 9,
2369		    (int)(buf.pte & PTE_PL_MASK) >> 7,
2370		    (pmap_dirty(&buf)) ? 1 : 0,
2371		    (pmap_accessed(&buf)) ? 1 : 0,
2372		    manames[(buf.pte & PTE_MA_MASK) >> 2],
2373		    (pmap_present(&buf)) ? 1 : 0,
2374		    (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2375	}
2376}
2377
2378DB_COMMAND(itr, db_itr)
2379{
2380	print_trs(0);
2381}
2382
2383DB_COMMAND(dtr, db_dtr)
2384{
2385	print_trs(1);
2386}
2387
2388DB_COMMAND(rr, db_rr)
2389{
2390	int i;
2391	uint64_t t;
2392	struct ia64_rr rr;
2393
2394	printf("RR RID    PgSz VE\n");
2395	for (i = 0; i < 8; i++) {
2396		__asm __volatile ("mov %0=rr[%1]"
2397				  : "=r"(t)
2398				  : "r"(IA64_RR_BASE(i)));
2399		*(uint64_t *) &rr = t;
2400		printf("%d  %06x %4s %d\n",
2401		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2402	}
2403}
2404
2405DB_COMMAND(thash, db_thash)
2406{
2407	if (!have_addr)
2408		return;
2409
2410	db_printf("%p\n", (void *) ia64_thash(addr));
2411}
2412
2413DB_COMMAND(ttag, db_ttag)
2414{
2415	if (!have_addr)
2416		return;
2417
2418	db_printf("0x%lx\n", ia64_ttag(addr));
2419}
2420
2421DB_COMMAND(kpte, db_kpte)
2422{
2423	struct ia64_lpte *pte;
2424
2425	if (!have_addr) {
2426		db_printf("usage: kpte <kva>\n");
2427		return;
2428	}
2429	if (addr < VM_MIN_KERNEL_ADDRESS) {
2430		db_printf("kpte: error: invalid <kva>\n");
2431		return;
2432	}
2433	pte = pmap_find_kpte(addr);
2434	db_printf("kpte at %p:\n", pte);
2435	db_printf("  pte  =%016lx\n", pte->pte);
2436	db_printf("  itir =%016lx\n", pte->itir);
2437	db_printf("  tag  =%016lx\n", pte->tag);
2438	db_printf("  chain=%016lx\n", pte->chain);
2439}
2440
2441#endif
2442