pmap.c revision 152042
1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 */
47
48#include <sys/cdefs.h>
49__FBSDID("$FreeBSD: head/sys/ia64/ia64/pmap.c 152042 2005-11-04 18:03:24Z alc $");
50
51#include <sys/param.h>
52#include <sys/kernel.h>
53#include <sys/lock.h>
54#include <sys/mman.h>
55#include <sys/mutex.h>
56#include <sys/proc.h>
57#include <sys/smp.h>
58#include <sys/sysctl.h>
59#include <sys/systm.h>
60
61#include <vm/vm.h>
62#include <vm/vm_page.h>
63#include <vm/vm_map.h>
64#include <vm/vm_object.h>
65#include <vm/vm_pageout.h>
66#include <vm/uma.h>
67
68#include <machine/md_var.h>
69#include <machine/pal.h>
70
71/*
72 *	Manages physical address maps.
73 *
74 *	In addition to hardware address maps, this
75 *	module is called upon to provide software-use-only
76 *	maps which may or may not be stored in the same
77 *	form as hardware maps.  These pseudo-maps are
78 *	used to store intermediate results from copy
79 *	operations to and from address spaces.
80 *
81 *	Since the information managed by this module is
82 *	also stored by the logical address mapping module,
83 *	this module may throw away valid virtual-to-physical
84 *	mappings at almost any time.  However, invalidations
85 *	of virtual-to-physical mappings must be done as
86 *	requested.
87 *
88 *	In order to cope with hardware architectures which
89 *	make virtual-to-physical map invalidates expensive,
90 *	this module may delay invalidate or reduced protection
91 *	operations until such time as they are actually
92 *	necessary.  This module is given full information as
93 *	to which processors are currently using which maps,
94 *	and to when physical maps must be made correct.
95 */
96
97/*
98 * Following the Linux model, region IDs are allocated in groups of
99 * eight so that a single region ID can be used for as many RRs as we
100 * want by encoding the RR number into the low bits of the ID.
101 *
102 * We reserve region ID 0 for the kernel and allocate the remaining
103 * IDs for user pmaps.
104 *
105 * Region 0..4
106 *	User virtually mapped
107 *
108 * Region 5
109 *	Kernel virtually mapped
110 *
111 * Region 6
112 *	Kernel physically mapped uncacheable
113 *
114 * Region 7
115 *	Kernel physically mapped cacheable
116 */
117
118/* XXX move to a header. */
119extern uint64_t ia64_gateway_page[];
120
121MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
122
123#ifndef PMAP_SHPGPERPROC
124#define PMAP_SHPGPERPROC 200
125#endif
126
127#if !defined(DIAGNOSTIC)
128#define PMAP_INLINE __inline
129#else
130#define PMAP_INLINE
131#endif
132
133#define	pmap_accessed(lpte)		((lpte)->pte & PTE_ACCESSED)
134#define	pmap_dirty(lpte)		((lpte)->pte & PTE_DIRTY)
135#define	pmap_managed(lpte)		((lpte)->pte & PTE_MANAGED)
136#define	pmap_ppn(lpte)			((lpte)->pte & PTE_PPN_MASK)
137#define	pmap_present(lpte)		((lpte)->pte & PTE_PRESENT)
138#define	pmap_prot(lpte)			(((lpte)->pte & PTE_PROT_MASK) >> 56)
139#define	pmap_wired(lpte)		((lpte)->pte & PTE_WIRED)
140
141#define	pmap_clear_accessed(lpte)	(lpte)->pte &= ~PTE_ACCESSED
142#define	pmap_clear_dirty(lpte)		(lpte)->pte &= ~PTE_DIRTY
143#define	pmap_clear_present(lpte)	(lpte)->pte &= ~PTE_PRESENT
144#define	pmap_clear_wired(lpte)		(lpte)->pte &= ~PTE_WIRED
145
146#define	pmap_set_wired(lpte)		(lpte)->pte |= PTE_WIRED
147
148/*
149 * The VHPT bucket head structure.
150 */
151struct ia64_bucket {
152	uint64_t	chain;
153	struct mtx	mutex;
154	u_int		length;
155};
156
157/*
158 * Statically allocated kernel pmap
159 */
160struct pmap kernel_pmap_store;
161
162vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
163vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
164
165/*
166 * Kernel virtual memory management.
167 */
168static int nkpt;
169struct ia64_lpte **ia64_kptdir;
170#define KPTE_DIR_INDEX(va) \
171	((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
172#define KPTE_PTE_INDEX(va) \
173	((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
174#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
175
176vm_offset_t kernel_vm_end;
177
178/* Values for ptc.e. XXX values for SKI. */
179static uint64_t pmap_ptc_e_base = 0x100000000;
180static uint64_t pmap_ptc_e_count1 = 3;
181static uint64_t pmap_ptc_e_count2 = 2;
182static uint64_t pmap_ptc_e_stride1 = 0x2000;
183static uint64_t pmap_ptc_e_stride2 = 0x100000000;
184struct mtx pmap_ptcmutex;
185
186/*
187 * Data for the RID allocator
188 */
189static int pmap_ridcount;
190static int pmap_rididx;
191static int pmap_ridmapsz;
192static int pmap_ridmax;
193static uint64_t *pmap_ridmap;
194struct mtx pmap_ridmutex;
195
196/*
197 * Data for the pv entry allocation mechanism
198 */
199static uma_zone_t pvzone;
200static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
201int pmap_pagedaemon_waken;
202
203/*
204 * Data for allocating PTEs for user processes.
205 */
206static uma_zone_t ptezone;
207
208/*
209 * Virtual Hash Page Table (VHPT) data.
210 */
211/* SYSCTL_DECL(_machdep); */
212SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
213
214struct ia64_bucket *pmap_vhpt_bucket;
215
216int pmap_vhpt_nbuckets;
217SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
218    &pmap_vhpt_nbuckets, 0, "");
219
220uint64_t pmap_vhpt_base[MAXCPU];
221
222int pmap_vhpt_log2size = 0;
223TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
224SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
225    &pmap_vhpt_log2size, 0, "");
226
227static int pmap_vhpt_inserts;
228SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
229    &pmap_vhpt_inserts, 0, "");
230
231static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
232SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
233    NULL, 0, pmap_vhpt_population, "I", "");
234
235static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
236static pv_entry_t get_pv_entry(void);
237
238static pmap_t	pmap_install(pmap_t);
239static void	pmap_invalidate_all(pmap_t pmap);
240
241vm_offset_t
242pmap_steal_memory(vm_size_t size)
243{
244	vm_size_t bank_size;
245	vm_offset_t pa, va;
246
247	size = round_page(size);
248
249	bank_size = phys_avail[1] - phys_avail[0];
250	while (size > bank_size) {
251		int i;
252		for (i = 0; phys_avail[i+2]; i+= 2) {
253			phys_avail[i] = phys_avail[i+2];
254			phys_avail[i+1] = phys_avail[i+3];
255		}
256		phys_avail[i] = 0;
257		phys_avail[i+1] = 0;
258		if (!phys_avail[0])
259			panic("pmap_steal_memory: out of memory");
260		bank_size = phys_avail[1] - phys_avail[0];
261	}
262
263	pa = phys_avail[0];
264	phys_avail[0] += size;
265
266	va = IA64_PHYS_TO_RR7(pa);
267	bzero((caddr_t) va, size);
268	return va;
269}
270
271/*
272 *	Bootstrap the system enough to run with virtual memory.
273 */
274void
275pmap_bootstrap()
276{
277	struct ia64_pal_result res;
278	struct ia64_lpte *pte;
279	vm_offset_t base, limit;
280	size_t size;
281	int i, j, count, ridbits;
282
283	/*
284	 * Query the PAL Code to find the loop parameters for the
285	 * ptc.e instruction.
286	 */
287	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
288	if (res.pal_status != 0)
289		panic("Can't configure ptc.e parameters");
290	pmap_ptc_e_base = res.pal_result[0];
291	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
292	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
293	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
294	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
295	if (bootverbose)
296		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
297		       "stride1=0x%lx, stride2=0x%lx\n",
298		       pmap_ptc_e_base,
299		       pmap_ptc_e_count1,
300		       pmap_ptc_e_count2,
301		       pmap_ptc_e_stride1,
302		       pmap_ptc_e_stride2);
303	mtx_init(&pmap_ptcmutex, "Global PTC lock", NULL, MTX_SPIN);
304
305	/*
306	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
307	 *
308	 * We currently need at least 19 bits in the RID because PID_MAX
309	 * can only be encoded in 17 bits and we need RIDs for 5 regions
310	 * per process. With PID_MAX equalling 99999 this means that we
311	 * need to be able to encode 499995 (=5*PID_MAX).
312	 * The Itanium processor only has 18 bits and the architected
313	 * minimum is exactly that. So, we cannot use a PID based scheme
314	 * in those cases. Enter pmap_ridmap...
315	 * We should avoid the map when running on a processor that has
316	 * implemented enough bits. This means that we should pass the
317	 * process/thread ID to pmap. This we currently don't do, so we
318	 * use the map anyway. However, we don't want to allocate a map
319	 * that is large enough to cover the range dictated by the number
320	 * of bits in the RID, because that may result in a RID map of
321	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
322	 * The bottomline: we create a 32KB map when the processor only
323	 * implements 18 bits (or when we can't figure it out). Otherwise
324	 * we create a 64KB map.
325	 */
326	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
327	if (res.pal_status != 0) {
328		if (bootverbose)
329			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
330		ridbits = 18; /* guaranteed minimum */
331	} else {
332		ridbits = (res.pal_result[1] >> 8) & 0xff;
333		if (bootverbose)
334			printf("Processor supports %d Region ID bits\n",
335			    ridbits);
336	}
337	if (ridbits > 19)
338		ridbits = 19;
339
340	pmap_ridmax = (1 << ridbits);
341	pmap_ridmapsz = pmap_ridmax / 64;
342	pmap_ridmap = (uint64_t *)pmap_steal_memory(pmap_ridmax / 8);
343	pmap_ridmap[0] |= 0xff;
344	pmap_rididx = 0;
345	pmap_ridcount = 8;
346	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
347
348	/*
349	 * Allocate some memory for initial kernel 'page tables'.
350	 */
351	ia64_kptdir = (void *)pmap_steal_memory(PAGE_SIZE);
352	for (i = 0; i < NKPT; i++) {
353		ia64_kptdir[i] = (void*)pmap_steal_memory(PAGE_SIZE);
354	}
355	nkpt = NKPT;
356	kernel_vm_end = NKPT * PAGE_SIZE * NKPTEPG + VM_MIN_KERNEL_ADDRESS -
357	    VM_GATEWAY_SIZE;
358
359	for (i = 0; phys_avail[i+2]; i+= 2)
360		;
361	count = i+2;
362
363	/*
364	 * Figure out a useful size for the VHPT, based on the size of
365	 * physical memory and try to locate a region which is large
366	 * enough to contain the VHPT (which must be a power of two in
367	 * size and aligned to a natural boundary).
368	 * We silently bump up the VHPT size to the minimum size if the
369	 * user has set the tunable too small. Likewise, the VHPT size
370	 * is silently capped to the maximum allowed.
371	 */
372	TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
373	if (pmap_vhpt_log2size == 0) {
374		pmap_vhpt_log2size = 15;
375		size = 1UL << pmap_vhpt_log2size;
376		while (size < Maxmem * 32) {
377			pmap_vhpt_log2size++;
378			size <<= 1;
379		}
380	} else if (pmap_vhpt_log2size < 15)
381		pmap_vhpt_log2size = 15;
382	if (pmap_vhpt_log2size > 61)
383		pmap_vhpt_log2size = 61;
384
385	pmap_vhpt_base[0] = 0;
386	base = limit = 0;
387	size = 1UL << pmap_vhpt_log2size;
388	while (pmap_vhpt_base[0] == 0) {
389		if (bootverbose)
390			printf("Trying VHPT size 0x%lx\n", size);
391		for (i = 0; i < count; i += 2) {
392			base = (phys_avail[i] + size - 1) & ~(size - 1);
393			limit = base + MAXCPU * size;
394			if (limit <= phys_avail[i+1])
395				/*
396				 * VHPT can fit in this region
397				 */
398				break;
399		}
400		if (!phys_avail[i]) {
401			/* Can't fit, try next smaller size. */
402			pmap_vhpt_log2size--;
403			size >>= 1;
404		} else
405			pmap_vhpt_base[0] = IA64_PHYS_TO_RR7(base);
406	}
407	if (pmap_vhpt_log2size < 15)
408		panic("Can't find space for VHPT");
409
410	if (bootverbose)
411		printf("Putting VHPT at 0x%lx\n", base);
412
413	if (base != phys_avail[i]) {
414		/* Split this region. */
415		if (bootverbose)
416			printf("Splitting [%p-%p]\n", (void *)phys_avail[i],
417			    (void *)phys_avail[i+1]);
418		for (j = count; j > i; j -= 2) {
419			phys_avail[j] = phys_avail[j-2];
420			phys_avail[j+1] = phys_avail[j-2+1];
421		}
422		phys_avail[i+1] = base;
423		phys_avail[i+2] = limit;
424	} else
425		phys_avail[i] = limit;
426
427	pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
428
429	pmap_vhpt_bucket = (void *)pmap_steal_memory(pmap_vhpt_nbuckets *
430	    sizeof(struct ia64_bucket));
431	pte = (struct ia64_lpte *)pmap_vhpt_base[0];
432	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
433		pte[i].pte = 0;
434		pte[i].itir = 0;
435		pte[i].tag = 1UL << 63;	/* Invalid tag */
436		pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
437		/* Stolen memory is zeroed! */
438		mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
439		    MTX_SPIN);
440	}
441
442	for (i = 1; i < MAXCPU; i++) {
443		pmap_vhpt_base[i] = pmap_vhpt_base[i - 1] + size;
444		bcopy((void *)pmap_vhpt_base[i - 1], (void *)pmap_vhpt_base[i],
445		    size);
446	}
447
448	__asm __volatile("mov cr.pta=%0;; srlz.i;;" ::
449	    "r" (pmap_vhpt_base[0] + (1<<8) + (pmap_vhpt_log2size<<2) + 1));
450
451	virtual_avail = VM_MIN_KERNEL_ADDRESS;
452	virtual_end = VM_MAX_KERNEL_ADDRESS;
453
454	/*
455	 * Initialize the kernel pmap (which is statically allocated).
456	 */
457	PMAP_LOCK_INIT(kernel_pmap);
458	for (i = 0; i < 5; i++)
459		kernel_pmap->pm_rid[i] = 0;
460	kernel_pmap->pm_active = 1;
461	TAILQ_INIT(&kernel_pmap->pm_pvlist);
462	PCPU_SET(current_pmap, kernel_pmap);
463
464	/*
465	 * Region 5 is mapped via the vhpt.
466	 */
467	ia64_set_rr(IA64_RR_BASE(5),
468		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
469
470	/*
471	 * Region 6 is direct mapped UC and region 7 is direct mapped
472	 * WC. The details of this is controlled by the Alt {I,D}TLB
473	 * handlers. Here we just make sure that they have the largest
474	 * possible page size to minimise TLB usage.
475	 */
476	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (IA64_ID_PAGE_SHIFT << 2));
477	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (IA64_ID_PAGE_SHIFT << 2));
478
479	/*
480	 * Clear out any random TLB entries left over from booting.
481	 */
482	pmap_invalidate_all(kernel_pmap);
483
484	map_gateway_page();
485}
486
487static int
488pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
489{
490	int count, error, i;
491
492	count = 0;
493	for (i = 0; i < pmap_vhpt_nbuckets; i++)
494		count += pmap_vhpt_bucket[i].length;
495
496	error = SYSCTL_OUT(req, &count, sizeof(count));
497	return (error);
498}
499
500/*
501 *	Initialize a vm_page's machine-dependent fields.
502 */
503void
504pmap_page_init(vm_page_t m)
505{
506
507	TAILQ_INIT(&m->md.pv_list);
508	m->md.pv_list_count = 0;
509}
510
511/*
512 *	Initialize the pmap module.
513 *	Called by vm_init, to initialize any structures that the pmap
514 *	system needs to map virtual memory.
515 */
516void
517pmap_init(void)
518{
519	int shpgperproc = PMAP_SHPGPERPROC;
520
521	/*
522	 * Initialize the address space (zone) for the pv entries.  Set a
523	 * high water mark so that the system can recover from excessive
524	 * numbers of pv entries.
525	 */
526	pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
527	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
528	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
529	pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
530	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
531	pv_entry_high_water = 9 * (pv_entry_max / 10);
532
533	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
534	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
535}
536
537void
538pmap_init2()
539{
540}
541
542
543/***************************************************
544 * Manipulate TLBs for a pmap
545 ***************************************************/
546
547#if 0
548static __inline void
549pmap_invalidate_page_locally(void *arg)
550{
551	vm_offset_t va = (uintptr_t)arg;
552	struct ia64_lpte *pte;
553
554	pte = (struct ia64_lpte *)ia64_thash(va);
555	if (pte->tag == ia64_ttag(va))
556		pte->tag = 1UL << 63;
557	ia64_ptc_l(va, PAGE_SHIFT << 2);
558}
559
560#ifdef SMP
561static void
562pmap_invalidate_page_1(void *arg)
563{
564	void **args = arg;
565	pmap_t oldpmap;
566
567	critical_enter();
568	oldpmap = pmap_install(args[0]);
569	pmap_invalidate_page_locally(args[1]);
570	pmap_install(oldpmap);
571	critical_exit();
572}
573#endif
574
575static void
576pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
577{
578
579	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
580		("invalidating TLB for non-current pmap"));
581
582#ifdef SMP
583	if (mp_ncpus > 1) {
584		void *args[2];
585		args[0] = pmap;
586		args[1] = (void *)va;
587		smp_rendezvous(NULL, pmap_invalidate_page_1, NULL, args);
588	} else
589#endif
590	pmap_invalidate_page_locally((void *)va);
591}
592#endif /* 0 */
593
594static void
595pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
596{
597	struct ia64_lpte *pte;
598	int i, vhpt_ofs;
599
600	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
601		("invalidating TLB for non-current pmap"));
602
603	vhpt_ofs = ia64_thash(va) - pmap_vhpt_base[PCPU_GET(cpuid)];
604	critical_enter();
605	for (i = 0; i < MAXCPU; i++) {
606		pte = (struct ia64_lpte *)(pmap_vhpt_base[i] + vhpt_ofs);
607		if (pte->tag == ia64_ttag(va))
608			pte->tag = 1UL << 63;
609	}
610	critical_exit();
611	mtx_lock_spin(&pmap_ptcmutex);
612	ia64_ptc_ga(va, PAGE_SHIFT << 2);
613	mtx_unlock_spin(&pmap_ptcmutex);
614}
615
616static void
617pmap_invalidate_all_1(void *arg)
618{
619	uint64_t addr;
620	int i, j;
621
622	critical_enter();
623	addr = pmap_ptc_e_base;
624	for (i = 0; i < pmap_ptc_e_count1; i++) {
625		for (j = 0; j < pmap_ptc_e_count2; j++) {
626			ia64_ptc_e(addr);
627			addr += pmap_ptc_e_stride2;
628		}
629		addr += pmap_ptc_e_stride1;
630	}
631	critical_exit();
632}
633
634static void
635pmap_invalidate_all(pmap_t pmap)
636{
637
638	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
639		("invalidating TLB for non-current pmap"));
640
641#ifdef SMP
642	if (mp_ncpus > 1)
643		smp_rendezvous(NULL, pmap_invalidate_all_1, NULL, NULL);
644	else
645#endif
646	pmap_invalidate_all_1(NULL);
647}
648
649static uint32_t
650pmap_allocate_rid(void)
651{
652	uint64_t bit, bits;
653	int rid;
654
655	mtx_lock(&pmap_ridmutex);
656	if (pmap_ridcount == pmap_ridmax)
657		panic("pmap_allocate_rid: All Region IDs used");
658
659	/* Find an index with a free bit. */
660	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
661		pmap_rididx++;
662		if (pmap_rididx == pmap_ridmapsz)
663			pmap_rididx = 0;
664	}
665	rid = pmap_rididx * 64;
666
667	/* Find a free bit. */
668	bit = 1UL;
669	while (bits & bit) {
670		rid++;
671		bit <<= 1;
672	}
673
674	pmap_ridmap[pmap_rididx] |= bit;
675	pmap_ridcount++;
676	mtx_unlock(&pmap_ridmutex);
677
678	return rid;
679}
680
681static void
682pmap_free_rid(uint32_t rid)
683{
684	uint64_t bit;
685	int idx;
686
687	idx = rid / 64;
688	bit = ~(1UL << (rid & 63));
689
690	mtx_lock(&pmap_ridmutex);
691	pmap_ridmap[idx] &= bit;
692	pmap_ridcount--;
693	mtx_unlock(&pmap_ridmutex);
694}
695
696/*
697 * this routine defines the region(s) of memory that should
698 * not be tested for the modified bit.
699 */
700static PMAP_INLINE int
701pmap_track_modified(vm_offset_t va)
702{
703	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
704		return 1;
705	else
706		return 0;
707}
708
709/***************************************************
710 * Page table page management routines.....
711 ***************************************************/
712
713void
714pmap_pinit0(struct pmap *pmap)
715{
716	/* kernel_pmap is the same as any other pmap. */
717	pmap_pinit(pmap);
718}
719
720/*
721 * Initialize a preallocated and zeroed pmap structure,
722 * such as one in a vmspace structure.
723 */
724void
725pmap_pinit(struct pmap *pmap)
726{
727	int i;
728
729	PMAP_LOCK_INIT(pmap);
730	for (i = 0; i < 5; i++)
731		pmap->pm_rid[i] = pmap_allocate_rid();
732	pmap->pm_active = 0;
733	TAILQ_INIT(&pmap->pm_pvlist);
734	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
735}
736
737/***************************************************
738 * Pmap allocation/deallocation routines.
739 ***************************************************/
740
741/*
742 * Release any resources held by the given physical map.
743 * Called when a pmap initialized by pmap_pinit is being released.
744 * Should only be called if the map contains no valid mappings.
745 */
746void
747pmap_release(pmap_t pmap)
748{
749	int i;
750
751	for (i = 0; i < 5; i++)
752		if (pmap->pm_rid[i])
753			pmap_free_rid(pmap->pm_rid[i]);
754	PMAP_LOCK_DESTROY(pmap);
755}
756
757/*
758 * grow the number of kernel page table entries, if needed
759 */
760void
761pmap_growkernel(vm_offset_t addr)
762{
763	struct ia64_lpte *ptepage;
764	vm_page_t nkpg;
765
766	if (kernel_vm_end >= addr)
767		return;
768
769	critical_enter();
770
771	while (kernel_vm_end < addr) {
772		/* We could handle more by increasing the size of kptdir. */
773		if (nkpt == MAXKPT)
774			panic("pmap_growkernel: out of kernel address space");
775
776		nkpg = vm_page_alloc(NULL, nkpt,
777		    VM_ALLOC_NOOBJ | VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
778		if (!nkpg)
779			panic("pmap_growkernel: no memory to grow kernel");
780
781		ptepage = (struct ia64_lpte *)
782		    IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
783		bzero(ptepage, PAGE_SIZE);
784		ia64_kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage;
785
786		nkpt++;
787		kernel_vm_end += PAGE_SIZE * NKPTEPG;
788	}
789
790	critical_exit();
791}
792
793/***************************************************
794 * page management routines.
795 ***************************************************/
796
797/*
798 * free the pv_entry back to the free list
799 */
800static PMAP_INLINE void
801free_pv_entry(pv_entry_t pv)
802{
803	pv_entry_count--;
804	uma_zfree(pvzone, pv);
805}
806
807/*
808 * get a new pv_entry, allocating a block from the system
809 * when needed.
810 * the memory allocation is performed bypassing the malloc code
811 * because of the possibility of allocations at interrupt time.
812 */
813static pv_entry_t
814get_pv_entry(void)
815{
816	pv_entry_count++;
817	if ((pv_entry_count > pv_entry_high_water) &&
818		(pmap_pagedaemon_waken == 0)) {
819		pmap_pagedaemon_waken = 1;
820		wakeup (&vm_pages_needed);
821	}
822	return uma_zalloc(pvzone, M_NOWAIT);
823}
824
825/*
826 * Add an ia64_lpte to the VHPT.
827 */
828static void
829pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
830{
831	struct ia64_bucket *bckt;
832	struct ia64_lpte *vhpte;
833	uint64_t pte_pa;
834
835	/* Can fault, so get it out of the way. */
836	pte_pa = ia64_tpa((vm_offset_t)pte);
837
838	vhpte = (struct ia64_lpte *)ia64_thash(va);
839	bckt = (struct ia64_bucket *)vhpte->chain;
840
841	mtx_lock_spin(&bckt->mutex);
842	pte->chain = bckt->chain;
843	ia64_mf();
844	bckt->chain = pte_pa;
845
846	pmap_vhpt_inserts++;
847	bckt->length++;
848	mtx_unlock_spin(&bckt->mutex);
849}
850
851/*
852 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
853 * worked or an appropriate error code otherwise.
854 */
855static int
856pmap_remove_vhpt(vm_offset_t va)
857{
858	struct ia64_bucket *bckt;
859	struct ia64_lpte *pte;
860	struct ia64_lpte *lpte;
861	struct ia64_lpte *vhpte;
862	uint64_t chain, tag;
863
864	tag = ia64_ttag(va);
865	vhpte = (struct ia64_lpte *)ia64_thash(va);
866	bckt = (struct ia64_bucket *)vhpte->chain;
867
868	lpte = NULL;
869	mtx_lock_spin(&bckt->mutex);
870	chain = bckt->chain;
871	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
872	while (chain != 0 && pte->tag != tag) {
873		lpte = pte;
874		chain = pte->chain;
875		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
876	}
877	if (chain == 0) {
878		mtx_unlock_spin(&bckt->mutex);
879		return (ENOENT);
880	}
881
882	/* Snip this pv_entry out of the collision chain. */
883	if (lpte == NULL)
884		bckt->chain = pte->chain;
885	else
886		lpte->chain = pte->chain;
887	ia64_mf();
888
889	bckt->length--;
890	mtx_unlock_spin(&bckt->mutex);
891	return (0);
892}
893
894/*
895 * Find the ia64_lpte for the given va, if any.
896 */
897static struct ia64_lpte *
898pmap_find_vhpt(vm_offset_t va)
899{
900	struct ia64_bucket *bckt;
901	struct ia64_lpte *pte;
902	uint64_t chain, tag;
903
904	tag = ia64_ttag(va);
905	pte = (struct ia64_lpte *)ia64_thash(va);
906	bckt = (struct ia64_bucket *)pte->chain;
907
908	mtx_lock_spin(&bckt->mutex);
909	chain = bckt->chain;
910	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
911	while (chain != 0 && pte->tag != tag) {
912		chain = pte->chain;
913		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
914	}
915	mtx_unlock_spin(&bckt->mutex);
916	return ((chain != 0) ? pte : NULL);
917}
918
919/*
920 * Remove an entry from the list of managed mappings.
921 */
922static int
923pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
924{
925	if (!pv) {
926		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
927			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
928				if (pmap == pv->pv_pmap && va == pv->pv_va)
929					break;
930			}
931		} else {
932			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
933				if (va == pv->pv_va)
934					break;
935			}
936		}
937	}
938
939	if (pv) {
940		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
941		m->md.pv_list_count--;
942		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
943			vm_page_flag_clear(m, PG_WRITEABLE);
944
945		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
946		free_pv_entry(pv);
947		return 0;
948	} else {
949		return ENOENT;
950	}
951}
952
953/*
954 * Create a pv entry for page at pa for
955 * (pmap, va).
956 */
957static void
958pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
959{
960	pv_entry_t pv;
961
962	pv = get_pv_entry();
963	if (pv == NULL)
964		panic("no pv entries: increase vm.pmap.shpgperproc");
965	pv->pv_pmap = pmap;
966	pv->pv_va = va;
967
968	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
969	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
970	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
971	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
972	m->md.pv_list_count++;
973}
974
975/*
976 *	Routine:	pmap_extract
977 *	Function:
978 *		Extract the physical page address associated
979 *		with the given map/virtual_address pair.
980 */
981vm_paddr_t
982pmap_extract(pmap_t pmap, vm_offset_t va)
983{
984	struct ia64_lpte *pte;
985	pmap_t oldpmap;
986	vm_paddr_t pa;
987
988	pa = 0;
989	PMAP_LOCK(pmap);
990	oldpmap = pmap_install(pmap);
991	pte = pmap_find_vhpt(va);
992	if (pte != NULL && pmap_present(pte))
993		pa = pmap_ppn(pte);
994	pmap_install(oldpmap);
995	PMAP_UNLOCK(pmap);
996	return (pa);
997}
998
999/*
1000 *	Routine:	pmap_extract_and_hold
1001 *	Function:
1002 *		Atomically extract and hold the physical page
1003 *		with the given pmap and virtual address pair
1004 *		if that mapping permits the given protection.
1005 */
1006vm_page_t
1007pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1008{
1009	struct ia64_lpte *pte;
1010	pmap_t oldpmap;
1011	vm_page_t m;
1012
1013	m = NULL;
1014	vm_page_lock_queues();
1015	PMAP_LOCK(pmap);
1016	oldpmap = pmap_install(pmap);
1017	pte = pmap_find_vhpt(va);
1018	if (pte != NULL && pmap_present(pte) &&
1019	    (pmap_prot(pte) & prot) == prot) {
1020		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1021		vm_page_hold(m);
1022	}
1023	vm_page_unlock_queues();
1024	pmap_install(oldpmap);
1025	PMAP_UNLOCK(pmap);
1026	return (m);
1027}
1028
1029/***************************************************
1030 * Low level mapping routines.....
1031 ***************************************************/
1032
1033/*
1034 * Find the kernel lpte for mapping the given virtual address, which
1035 * must be in the part of region 5 which we can cover with our kernel
1036 * 'page tables'.
1037 */
1038static struct ia64_lpte *
1039pmap_find_kpte(vm_offset_t va)
1040{
1041	KASSERT((va >> 61) == 5,
1042		("kernel mapping 0x%lx not in region 5", va));
1043	KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG),
1044		("kernel mapping 0x%lx out of range", va));
1045	return (&ia64_kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)]);
1046}
1047
1048/*
1049 * Find a pte suitable for mapping a user-space address. If one exists
1050 * in the VHPT, that one will be returned, otherwise a new pte is
1051 * allocated.
1052 */
1053static struct ia64_lpte *
1054pmap_find_pte(vm_offset_t va)
1055{
1056	struct ia64_lpte *pte;
1057
1058	if (va >= VM_MAXUSER_ADDRESS)
1059		return pmap_find_kpte(va);
1060
1061	pte = pmap_find_vhpt(va);
1062	if (pte == NULL) {
1063		pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1064		pte->tag = 1UL << 63;
1065	}
1066	return (pte);
1067}
1068
1069/*
1070 * Free a pte which is now unused. This simply returns it to the zone
1071 * allocator if it is a user mapping. For kernel mappings, clear the
1072 * valid bit to make it clear that the mapping is not currently used.
1073 */
1074static void
1075pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1076{
1077	if (va < VM_MAXUSER_ADDRESS)
1078		uma_zfree(ptezone, pte);
1079	else
1080		pmap_clear_present(pte);
1081}
1082
1083static PMAP_INLINE void
1084pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1085{
1086	static int prot2ar[4] = {
1087		PTE_AR_R,	/* VM_PROT_NONE */
1088		PTE_AR_RW,	/* VM_PROT_WRITE */
1089		PTE_AR_RX,	/* VM_PROT_EXECUTE */
1090		PTE_AR_RWX	/* VM_PROT_WRITE|VM_PROT_EXECUTE */
1091	};
1092
1093	pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK);
1094	pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1095	pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1096	    ? PTE_PL_KERN : PTE_PL_USER;
1097	pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1098}
1099
1100/*
1101 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1102 * the pte was orginally valid, then its assumed to already be in the
1103 * VHPT.
1104 * This functions does not set the protection bits.  It's expected
1105 * that those have been set correctly prior to calling this function.
1106 */
1107static void
1108pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1109    boolean_t wired, boolean_t managed)
1110{
1111
1112	pte->pte &= PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK;
1113	pte->pte |= PTE_PRESENT | PTE_MA_WB;
1114	pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1115	pte->pte |= (wired) ? PTE_WIRED : 0;
1116	pte->pte |= pa & PTE_PPN_MASK;
1117
1118	pte->itir = PAGE_SHIFT << 2;
1119
1120	pte->tag = ia64_ttag(va);
1121}
1122
1123/*
1124 * Remove the (possibly managed) mapping represented by pte from the
1125 * given pmap.
1126 */
1127static int
1128pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1129		pv_entry_t pv, int freepte)
1130{
1131	int error;
1132	vm_page_t m;
1133
1134	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1135		("removing pte for non-current pmap"));
1136
1137	/*
1138	 * First remove from the VHPT.
1139	 */
1140	error = pmap_remove_vhpt(va);
1141	if (error)
1142		return (error);
1143
1144	pmap_invalidate_page(pmap, va);
1145
1146	if (pmap_wired(pte))
1147		pmap->pm_stats.wired_count -= 1;
1148
1149	pmap->pm_stats.resident_count -= 1;
1150	if (pmap_managed(pte)) {
1151		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1152		if (pmap_dirty(pte))
1153			if (pmap_track_modified(va))
1154				vm_page_dirty(m);
1155		if (pmap_accessed(pte))
1156			vm_page_flag_set(m, PG_REFERENCED);
1157
1158		error = pmap_remove_entry(pmap, m, va, pv);
1159	}
1160	if (freepte)
1161		pmap_free_pte(pte, va);
1162
1163	return (error);
1164}
1165
1166/*
1167 * Extract the physical page address associated with a kernel
1168 * virtual address.
1169 */
1170vm_paddr_t
1171pmap_kextract(vm_offset_t va)
1172{
1173	struct ia64_lpte *pte;
1174	vm_offset_t gwpage;
1175
1176	KASSERT(va >= IA64_RR_BASE(5), ("Must be kernel VA"));
1177
1178	/* Regions 6 and 7 are direct mapped. */
1179	if (va >= IA64_RR_BASE(6))
1180		return (IA64_RR_MASK(va));
1181
1182	/* EPC gateway page? */
1183	gwpage = (vm_offset_t)ia64_get_k5();
1184	if (va >= gwpage && va < gwpage + VM_GATEWAY_SIZE)
1185		return (IA64_RR_MASK((vm_offset_t)ia64_gateway_page));
1186
1187	/* Bail out if the virtual address is beyond our limits. */
1188	if (IA64_RR_MASK(va) >= nkpt * PAGE_SIZE * NKPTEPG)
1189		return (0);
1190
1191	pte = pmap_find_kpte(va);
1192	if (!pmap_present(pte))
1193		return (0);
1194	return (pmap_ppn(pte) | (va & PAGE_MASK));
1195}
1196
1197/*
1198 * Add a list of wired pages to the kva this routine is only used for
1199 * temporary kernel mappings that do not need to have page modification
1200 * or references recorded.  Note that old mappings are simply written
1201 * over.  The page is effectively wired, but it's customary to not have
1202 * the PTE reflect that, nor update statistics.
1203 */
1204void
1205pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1206{
1207	struct ia64_lpte *pte;
1208	int i;
1209
1210	for (i = 0; i < count; i++) {
1211		pte = pmap_find_kpte(va);
1212		if (pmap_present(pte))
1213			pmap_invalidate_page(kernel_pmap, va);
1214		else
1215			pmap_enter_vhpt(pte, va);
1216		pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1217		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1218		va += PAGE_SIZE;
1219	}
1220}
1221
1222/*
1223 * this routine jerks page mappings from the
1224 * kernel -- it is meant only for temporary mappings.
1225 */
1226void
1227pmap_qremove(vm_offset_t va, int count)
1228{
1229	struct ia64_lpte *pte;
1230	int i;
1231
1232	for (i = 0; i < count; i++) {
1233		pte = pmap_find_kpte(va);
1234		if (pmap_present(pte)) {
1235			pmap_remove_vhpt(va);
1236			pmap_invalidate_page(kernel_pmap, va);
1237			pmap_clear_present(pte);
1238		}
1239		va += PAGE_SIZE;
1240	}
1241}
1242
1243/*
1244 * Add a wired page to the kva.  As for pmap_qenter(), it's customary
1245 * to not have the PTE reflect that, nor update statistics.
1246 */
1247void
1248pmap_kenter(vm_offset_t va, vm_offset_t pa)
1249{
1250	struct ia64_lpte *pte;
1251
1252	pte = pmap_find_kpte(va);
1253	if (pmap_present(pte))
1254		pmap_invalidate_page(kernel_pmap, va);
1255	else
1256		pmap_enter_vhpt(pte, va);
1257	pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1258	pmap_set_pte(pte, va, pa, FALSE, FALSE);
1259}
1260
1261/*
1262 * Remove a page from the kva
1263 */
1264void
1265pmap_kremove(vm_offset_t va)
1266{
1267	struct ia64_lpte *pte;
1268
1269	pte = pmap_find_kpte(va);
1270	if (pmap_present(pte)) {
1271		pmap_remove_vhpt(va);
1272		pmap_invalidate_page(kernel_pmap, va);
1273		pmap_clear_present(pte);
1274	}
1275}
1276
1277/*
1278 *	Used to map a range of physical addresses into kernel
1279 *	virtual address space.
1280 *
1281 *	The value passed in '*virt' is a suggested virtual address for
1282 *	the mapping. Architectures which can support a direct-mapped
1283 *	physical to virtual region can return the appropriate address
1284 *	within that region, leaving '*virt' unchanged. Other
1285 *	architectures should map the pages starting at '*virt' and
1286 *	update '*virt' with the first usable address after the mapped
1287 *	region.
1288 */
1289vm_offset_t
1290pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1291{
1292	return IA64_PHYS_TO_RR7(start);
1293}
1294
1295/*
1296 * Remove a single page from a process address space
1297 */
1298static void
1299pmap_remove_page(pmap_t pmap, vm_offset_t va)
1300{
1301	struct ia64_lpte *pte;
1302
1303	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1304		("removing page for non-current pmap"));
1305
1306	pte = pmap_find_vhpt(va);
1307	if (pte != NULL)
1308		pmap_remove_pte(pmap, pte, va, 0, 1);
1309	return;
1310}
1311
1312/*
1313 *	Remove the given range of addresses from the specified map.
1314 *
1315 *	It is assumed that the start and end are properly
1316 *	rounded to the page size.
1317 */
1318void
1319pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1320{
1321	pmap_t oldpmap;
1322	vm_offset_t va;
1323	pv_entry_t npv, pv;
1324	struct ia64_lpte *pte;
1325
1326	if (pmap->pm_stats.resident_count == 0)
1327		return;
1328
1329	vm_page_lock_queues();
1330	PMAP_LOCK(pmap);
1331	oldpmap = pmap_install(pmap);
1332
1333	/*
1334	 * special handling of removing one page.  a very
1335	 * common operation and easy to short circuit some
1336	 * code.
1337	 */
1338	if (sva + PAGE_SIZE == eva) {
1339		pmap_remove_page(pmap, sva);
1340		goto out;
1341	}
1342
1343	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1344		TAILQ_FOREACH_SAFE(pv, &pmap->pm_pvlist, pv_plist, npv) {
1345			va = pv->pv_va;
1346			if (va >= sva && va < eva) {
1347				pte = pmap_find_vhpt(va);
1348				KASSERT(pte != NULL, ("pte"));
1349				pmap_remove_pte(pmap, pte, va, pv, 1);
1350			}
1351		}
1352
1353	} else {
1354		for (va = sva; va < eva; va = va += PAGE_SIZE) {
1355			pte = pmap_find_vhpt(va);
1356			if (pte != NULL)
1357				pmap_remove_pte(pmap, pte, va, 0, 1);
1358		}
1359	}
1360out:
1361	vm_page_unlock_queues();
1362	pmap_install(oldpmap);
1363	PMAP_UNLOCK(pmap);
1364}
1365
1366/*
1367 *	Routine:	pmap_remove_all
1368 *	Function:
1369 *		Removes this physical page from
1370 *		all physical maps in which it resides.
1371 *		Reflects back modify bits to the pager.
1372 *
1373 *	Notes:
1374 *		Original versions of this routine were very
1375 *		inefficient because they iteratively called
1376 *		pmap_remove (slow...)
1377 */
1378
1379void
1380pmap_remove_all(vm_page_t m)
1381{
1382	pmap_t oldpmap;
1383	pv_entry_t pv;
1384
1385#if defined(DIAGNOSTIC)
1386	/*
1387	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1388	 * pages!
1389	 */
1390	if (m->flags & PG_FICTITIOUS) {
1391		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1392	}
1393#endif
1394	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1395	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1396		struct ia64_lpte *pte;
1397		pmap_t pmap = pv->pv_pmap;
1398		vm_offset_t va = pv->pv_va;
1399
1400		PMAP_LOCK(pmap);
1401		oldpmap = pmap_install(pmap);
1402		pte = pmap_find_vhpt(va);
1403		KASSERT(pte != NULL, ("pte"));
1404		if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1405			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1406		pmap_remove_pte(pmap, pte, va, pv, 1);
1407		pmap_install(oldpmap);
1408		PMAP_UNLOCK(pmap);
1409	}
1410	vm_page_flag_clear(m, PG_WRITEABLE);
1411}
1412
1413/*
1414 *	Set the physical protection on the
1415 *	specified range of this map as requested.
1416 */
1417void
1418pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1419{
1420	pmap_t oldpmap;
1421	struct ia64_lpte *pte;
1422
1423	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1424		pmap_remove(pmap, sva, eva);
1425		return;
1426	}
1427
1428	if (prot & VM_PROT_WRITE)
1429		return;
1430
1431	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1432		panic("pmap_protect: unaligned addresses");
1433
1434	vm_page_lock_queues();
1435	PMAP_LOCK(pmap);
1436	oldpmap = pmap_install(pmap);
1437	while (sva < eva) {
1438		/*
1439		 * If page is invalid, skip this page
1440		 */
1441		pte = pmap_find_vhpt(sva);
1442		if (pte == NULL) {
1443			sva += PAGE_SIZE;
1444			continue;
1445		}
1446
1447		if (pmap_prot(pte) != prot) {
1448			if (pmap_managed(pte)) {
1449				vm_offset_t pa = pmap_ppn(pte);
1450				vm_page_t m = PHYS_TO_VM_PAGE(pa);
1451				if (pmap_dirty(pte)) {
1452					if (pmap_track_modified(sva))
1453						vm_page_dirty(m);
1454					pmap_clear_dirty(pte);
1455				}
1456				if (pmap_accessed(pte)) {
1457					vm_page_flag_set(m, PG_REFERENCED);
1458					pmap_clear_accessed(pte);
1459				}
1460			}
1461			pmap_pte_prot(pmap, pte, prot);
1462			pmap_invalidate_page(pmap, sva);
1463		}
1464
1465		sva += PAGE_SIZE;
1466	}
1467	vm_page_unlock_queues();
1468	pmap_install(oldpmap);
1469	PMAP_UNLOCK(pmap);
1470}
1471
1472/*
1473 *	Insert the given physical page (p) at
1474 *	the specified virtual address (v) in the
1475 *	target physical map with the protection requested.
1476 *
1477 *	If specified, the page will be wired down, meaning
1478 *	that the related pte can not be reclaimed.
1479 *
1480 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1481 *	or lose information.  That is, this routine must actually
1482 *	insert this page into the given map NOW.
1483 */
1484void
1485pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1486    boolean_t wired)
1487{
1488	pmap_t oldpmap;
1489	vm_offset_t pa;
1490	vm_offset_t opa;
1491	struct ia64_lpte origpte;
1492	struct ia64_lpte *pte;
1493	boolean_t managed;
1494
1495	vm_page_lock_queues();
1496	PMAP_LOCK(pmap);
1497	oldpmap = pmap_install(pmap);
1498
1499	va &= ~PAGE_MASK;
1500#ifdef DIAGNOSTIC
1501	if (va > VM_MAX_KERNEL_ADDRESS)
1502		panic("pmap_enter: toobig");
1503#endif
1504
1505	/*
1506	 * Find (or create) a pte for the given mapping.
1507	 */
1508	while ((pte = pmap_find_pte(va)) == NULL) {
1509		pmap_install(oldpmap);
1510		PMAP_UNLOCK(pmap);
1511		vm_page_unlock_queues();
1512		VM_WAIT;
1513		vm_page_lock_queues();
1514		PMAP_LOCK(pmap);
1515		oldpmap = pmap_install(pmap);
1516	}
1517	origpte = *pte;
1518	if (!pmap_present(pte)) {
1519		opa = ~0UL;
1520		pmap_enter_vhpt(pte, va);
1521	} else
1522		opa = pmap_ppn(pte);
1523	managed = FALSE;
1524	pa = VM_PAGE_TO_PHYS(m);
1525
1526	/*
1527	 * Mapping has not changed, must be protection or wiring change.
1528	 */
1529	if (opa == pa) {
1530		/*
1531		 * Wiring change, just update stats. We don't worry about
1532		 * wiring PT pages as they remain resident as long as there
1533		 * are valid mappings in them. Hence, if a user page is wired,
1534		 * the PT page will be also.
1535		 */
1536		if (wired && !pmap_wired(&origpte))
1537			pmap->pm_stats.wired_count++;
1538		else if (!wired && pmap_wired(&origpte))
1539			pmap->pm_stats.wired_count--;
1540
1541		managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1542
1543		/*
1544		 * We might be turning off write access to the page,
1545		 * so we go ahead and sense modify status.
1546		 */
1547		if (managed && pmap_dirty(&origpte) &&
1548		    pmap_track_modified(va))
1549			vm_page_dirty(m);
1550
1551		pmap_invalidate_page(pmap, va);
1552		goto validate;
1553	}
1554
1555	/*
1556	 * Mapping has changed, invalidate old range and fall
1557	 * through to handle validating new mapping.
1558	 */
1559	if (opa != ~0UL) {
1560		pmap_remove_pte(pmap, pte, va, 0, 0);
1561		pmap_enter_vhpt(pte, va);
1562	}
1563
1564	/*
1565	 * Enter on the PV list if part of our managed memory.
1566	 */
1567	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1568		pmap_insert_entry(pmap, va, m);
1569		managed = TRUE;
1570	}
1571
1572	/*
1573	 * Increment counters
1574	 */
1575	pmap->pm_stats.resident_count++;
1576	if (wired)
1577		pmap->pm_stats.wired_count++;
1578
1579validate:
1580
1581	/*
1582	 * Now validate mapping with desired protection/wiring. This
1583	 * adds the pte to the VHPT if necessary.
1584	 */
1585	pmap_pte_prot(pmap, pte, prot);
1586	pmap_set_pte(pte, va, pa, wired, managed);
1587
1588	vm_page_unlock_queues();
1589	pmap_install(oldpmap);
1590	PMAP_UNLOCK(pmap);
1591}
1592
1593/*
1594 * this code makes some *MAJOR* assumptions:
1595 * 1. Current pmap & pmap exists.
1596 * 2. Not wired.
1597 * 3. Read access.
1598 * 4. No page table pages.
1599 * but is *MUCH* faster than pmap_enter...
1600 */
1601
1602vm_page_t
1603pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1604    vm_page_t mpte)
1605{
1606	struct ia64_lpte *pte;
1607	pmap_t oldpmap;
1608	boolean_t managed;
1609
1610	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1611	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
1612	PMAP_LOCK(pmap);
1613	oldpmap = pmap_install(pmap);
1614
1615	while ((pte = pmap_find_pte(va)) == NULL) {
1616		pmap_install(oldpmap);
1617		PMAP_UNLOCK(pmap);
1618		vm_page_busy(m);
1619		vm_page_unlock_queues();
1620		VM_OBJECT_UNLOCK(m->object);
1621		VM_WAIT;
1622		VM_OBJECT_LOCK(m->object);
1623		vm_page_lock_queues();
1624		vm_page_wakeup(m);
1625		PMAP_LOCK(pmap);
1626		oldpmap = pmap_install(pmap);
1627	}
1628
1629	if (!pmap_present(pte)) {
1630		/* Enter on the PV list if its managed. */
1631		if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1632			pmap_insert_entry(pmap, va, m);
1633			managed = TRUE;
1634		} else
1635			managed = FALSE;
1636
1637		/* Increment counters. */
1638		pmap->pm_stats.resident_count++;
1639
1640		/* Initialise with R/O protection and enter into VHPT. */
1641		pmap_enter_vhpt(pte, va);
1642		pmap_pte_prot(pmap, pte,
1643		    prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1644		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1645	}
1646
1647	pmap_install(oldpmap);
1648	PMAP_UNLOCK(pmap);
1649	return (NULL);
1650}
1651
1652/*
1653 * pmap_object_init_pt preloads the ptes for a given object
1654 * into the specified pmap.  This eliminates the blast of soft
1655 * faults on process startup and immediately after an mmap.
1656 */
1657void
1658pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1659		    vm_object_t object, vm_pindex_t pindex,
1660		    vm_size_t size)
1661{
1662
1663	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1664	KASSERT(object->type == OBJT_DEVICE,
1665	    ("pmap_object_init_pt: non-device object"));
1666}
1667
1668/*
1669 *	Routine:	pmap_change_wiring
1670 *	Function:	Change the wiring attribute for a map/virtual-address
1671 *			pair.
1672 *	In/out conditions:
1673 *			The mapping must already exist in the pmap.
1674 */
1675void
1676pmap_change_wiring(pmap, va, wired)
1677	register pmap_t pmap;
1678	vm_offset_t va;
1679	boolean_t wired;
1680{
1681	pmap_t oldpmap;
1682	struct ia64_lpte *pte;
1683
1684	PMAP_LOCK(pmap);
1685	oldpmap = pmap_install(pmap);
1686
1687	pte = pmap_find_vhpt(va);
1688	KASSERT(pte != NULL, ("pte"));
1689	if (wired && !pmap_wired(pte)) {
1690		pmap->pm_stats.wired_count++;
1691		pmap_set_wired(pte);
1692	} else if (!wired && pmap_wired(pte)) {
1693		pmap->pm_stats.wired_count--;
1694		pmap_clear_wired(pte);
1695	}
1696
1697	pmap_install(oldpmap);
1698	PMAP_UNLOCK(pmap);
1699}
1700
1701
1702
1703/*
1704 *	Copy the range specified by src_addr/len
1705 *	from the source map to the range dst_addr/len
1706 *	in the destination map.
1707 *
1708 *	This routine is only advisory and need not do anything.
1709 */
1710
1711void
1712pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
1713	  vm_offset_t src_addr)
1714{
1715}
1716
1717
1718/*
1719 *	pmap_zero_page zeros the specified hardware page by
1720 *	mapping it into virtual memory and using bzero to clear
1721 *	its contents.
1722 */
1723
1724void
1725pmap_zero_page(vm_page_t m)
1726{
1727	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1728	bzero((caddr_t) va, PAGE_SIZE);
1729}
1730
1731
1732/*
1733 *	pmap_zero_page_area zeros the specified hardware page by
1734 *	mapping it into virtual memory and using bzero to clear
1735 *	its contents.
1736 *
1737 *	off and size must reside within a single page.
1738 */
1739
1740void
1741pmap_zero_page_area(vm_page_t m, int off, int size)
1742{
1743	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1744	bzero((char *)(caddr_t)va + off, size);
1745}
1746
1747
1748/*
1749 *	pmap_zero_page_idle zeros the specified hardware page by
1750 *	mapping it into virtual memory and using bzero to clear
1751 *	its contents.  This is for the vm_idlezero process.
1752 */
1753
1754void
1755pmap_zero_page_idle(vm_page_t m)
1756{
1757	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
1758	bzero((caddr_t) va, PAGE_SIZE);
1759}
1760
1761
1762/*
1763 *	pmap_copy_page copies the specified (machine independent)
1764 *	page by mapping the page into virtual memory and using
1765 *	bcopy to copy the page, one machine dependent page at a
1766 *	time.
1767 */
1768void
1769pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1770{
1771	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
1772	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
1773	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
1774}
1775
1776/*
1777 * Returns true if the pmap's pv is one of the first
1778 * 16 pvs linked to from this page.  This count may
1779 * be changed upwards or downwards in the future; it
1780 * is only necessary that true be returned for a small
1781 * subset of pmaps for proper page aging.
1782 */
1783boolean_t
1784pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
1785{
1786	pv_entry_t pv;
1787	int loops = 0;
1788
1789	if (m->flags & PG_FICTITIOUS)
1790		return FALSE;
1791
1792	/*
1793	 * Not found, check current mappings returning immediately if found.
1794	 */
1795	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1796	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1797		if (pv->pv_pmap == pmap) {
1798			return TRUE;
1799		}
1800		loops++;
1801		if (loops >= 16)
1802			break;
1803	}
1804	return (FALSE);
1805}
1806
1807/*
1808 * Remove all pages from specified address space
1809 * this aids process exit speeds.  Also, this code
1810 * is special cased for current process only, but
1811 * can have the more generic (and slightly slower)
1812 * mode enabled.  This is much faster than pmap_remove
1813 * in the case of running down an entire address space.
1814 */
1815void
1816pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1817{
1818	pmap_t oldpmap;
1819	pv_entry_t pv, npv;
1820
1821	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
1822		printf("warning: pmap_remove_pages called with non-current pmap\n");
1823		return;
1824	}
1825
1826	vm_page_lock_queues();
1827	PMAP_LOCK(pmap);
1828	oldpmap = pmap_install(pmap);
1829
1830	for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
1831		struct ia64_lpte *pte;
1832
1833		npv = TAILQ_NEXT(pv, pv_plist);
1834
1835		if (pv->pv_va >= eva || pv->pv_va < sva)
1836			continue;
1837
1838		pte = pmap_find_vhpt(pv->pv_va);
1839		KASSERT(pte != NULL, ("pte"));
1840		if (!pmap_wired(pte))
1841			pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
1842	}
1843
1844	pmap_install(oldpmap);
1845	PMAP_UNLOCK(pmap);
1846	vm_page_unlock_queues();
1847}
1848
1849/*
1850 *      pmap_page_protect:
1851 *
1852 *      Lower the permission for all mappings to a given page.
1853 */
1854void
1855pmap_page_protect(vm_page_t m, vm_prot_t prot)
1856{
1857	struct ia64_lpte *pte;
1858	pmap_t oldpmap, pmap;
1859	pv_entry_t pv;
1860
1861	if ((prot & VM_PROT_WRITE) != 0)
1862		return;
1863	if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
1864		if ((m->flags & PG_WRITEABLE) == 0)
1865			return;
1866		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1867			pmap = pv->pv_pmap;
1868			PMAP_LOCK(pmap);
1869			oldpmap = pmap_install(pmap);
1870			pte = pmap_find_vhpt(pv->pv_va);
1871			KASSERT(pte != NULL, ("pte"));
1872			pmap_pte_prot(pmap, pte, prot);
1873			pmap_invalidate_page(pmap, pv->pv_va);
1874			pmap_install(oldpmap);
1875			PMAP_UNLOCK(pmap);
1876		}
1877		vm_page_flag_clear(m, PG_WRITEABLE);
1878	} else {
1879		pmap_remove_all(m);
1880	}
1881}
1882
1883/*
1884 *	pmap_ts_referenced:
1885 *
1886 *	Return a count of reference bits for a page, clearing those bits.
1887 *	It is not necessary for every reference bit to be cleared, but it
1888 *	is necessary that 0 only be returned when there are truly no
1889 *	reference bits set.
1890 *
1891 *	XXX: The exact number of bits to check and clear is a matter that
1892 *	should be tested and standardized at some point in the future for
1893 *	optimal aging of shared pages.
1894 */
1895int
1896pmap_ts_referenced(vm_page_t m)
1897{
1898	struct ia64_lpte *pte;
1899	pmap_t oldpmap;
1900	pv_entry_t pv;
1901	int count = 0;
1902
1903	if (m->flags & PG_FICTITIOUS)
1904		return 0;
1905
1906	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1907		PMAP_LOCK(pv->pv_pmap);
1908		oldpmap = pmap_install(pv->pv_pmap);
1909		pte = pmap_find_vhpt(pv->pv_va);
1910		KASSERT(pte != NULL, ("pte"));
1911		if (pmap_accessed(pte)) {
1912			count++;
1913			pmap_clear_accessed(pte);
1914			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
1915		}
1916		pmap_install(oldpmap);
1917		PMAP_UNLOCK(pv->pv_pmap);
1918	}
1919
1920	return count;
1921}
1922
1923/*
1924 *	pmap_is_modified:
1925 *
1926 *	Return whether or not the specified physical page was modified
1927 *	in any physical maps.
1928 */
1929boolean_t
1930pmap_is_modified(vm_page_t m)
1931{
1932	struct ia64_lpte *pte;
1933	pmap_t oldpmap;
1934	pv_entry_t pv;
1935	boolean_t rv;
1936
1937	rv = FALSE;
1938	if (m->flags & PG_FICTITIOUS)
1939		return (rv);
1940
1941	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1942		PMAP_LOCK(pv->pv_pmap);
1943		oldpmap = pmap_install(pv->pv_pmap);
1944		pte = pmap_find_vhpt(pv->pv_va);
1945		pmap_install(oldpmap);
1946		KASSERT(pte != NULL, ("pte"));
1947		rv = pmap_dirty(pte) ? TRUE : FALSE;
1948		PMAP_UNLOCK(pv->pv_pmap);
1949		if (rv)
1950			break;
1951	}
1952
1953	return (rv);
1954}
1955
1956/*
1957 *	pmap_is_prefaultable:
1958 *
1959 *	Return whether or not the specified virtual address is elgible
1960 *	for prefault.
1961 */
1962boolean_t
1963pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
1964{
1965	struct ia64_lpte *pte;
1966
1967	pte = pmap_find_vhpt(addr);
1968	if (pte != NULL && pmap_present(pte))
1969		return (FALSE);
1970	return (TRUE);
1971}
1972
1973/*
1974 *	Clear the modify bits on the specified physical page.
1975 */
1976void
1977pmap_clear_modify(vm_page_t m)
1978{
1979	struct ia64_lpte *pte;
1980	pmap_t oldpmap;
1981	pv_entry_t pv;
1982
1983	if (m->flags & PG_FICTITIOUS)
1984		return;
1985
1986	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1987		PMAP_LOCK(pv->pv_pmap);
1988		oldpmap = pmap_install(pv->pv_pmap);
1989		pte = pmap_find_vhpt(pv->pv_va);
1990		KASSERT(pte != NULL, ("pte"));
1991		if (pmap_dirty(pte)) {
1992			pmap_clear_dirty(pte);
1993			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
1994		}
1995		pmap_install(oldpmap);
1996		PMAP_UNLOCK(pv->pv_pmap);
1997	}
1998}
1999
2000/*
2001 *	pmap_clear_reference:
2002 *
2003 *	Clear the reference bit on the specified physical page.
2004 */
2005void
2006pmap_clear_reference(vm_page_t m)
2007{
2008	struct ia64_lpte *pte;
2009	pmap_t oldpmap;
2010	pv_entry_t pv;
2011
2012	if (m->flags & PG_FICTITIOUS)
2013		return;
2014
2015	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2016		PMAP_LOCK(pv->pv_pmap);
2017		oldpmap = pmap_install(pv->pv_pmap);
2018		pte = pmap_find_vhpt(pv->pv_va);
2019		KASSERT(pte != NULL, ("pte"));
2020		if (pmap_accessed(pte)) {
2021			pmap_clear_accessed(pte);
2022			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2023		}
2024		pmap_install(oldpmap);
2025		PMAP_UNLOCK(pv->pv_pmap);
2026	}
2027}
2028
2029/*
2030 * Map a set of physical memory pages into the kernel virtual
2031 * address space. Return a pointer to where it is mapped. This
2032 * routine is intended to be used for mapping device memory,
2033 * NOT real memory.
2034 */
2035void *
2036pmap_mapdev(vm_offset_t pa, vm_size_t size)
2037{
2038	return (void*) IA64_PHYS_TO_RR6(pa);
2039}
2040
2041/*
2042 * 'Unmap' a range mapped by pmap_mapdev().
2043 */
2044void
2045pmap_unmapdev(vm_offset_t va, vm_size_t size)
2046{
2047	return;
2048}
2049
2050/*
2051 * perform the pmap work for mincore
2052 */
2053int
2054pmap_mincore(pmap_t pmap, vm_offset_t addr)
2055{
2056	pmap_t oldpmap;
2057	struct ia64_lpte *pte, tpte;
2058	int val = 0;
2059
2060	PMAP_LOCK(pmap);
2061	oldpmap = pmap_install(pmap);
2062	pte = pmap_find_vhpt(addr);
2063	if (pte != NULL) {
2064		tpte = *pte;
2065		pte = &tpte;
2066	}
2067	pmap_install(oldpmap);
2068	PMAP_UNLOCK(pmap);
2069
2070	if (pte == NULL)
2071		return 0;
2072
2073	if (pmap_present(pte)) {
2074		vm_page_t m;
2075		vm_offset_t pa;
2076
2077		val = MINCORE_INCORE;
2078		if (!pmap_managed(pte))
2079			return val;
2080
2081		pa = pmap_ppn(pte);
2082
2083		m = PHYS_TO_VM_PAGE(pa);
2084
2085		/*
2086		 * Modified by us
2087		 */
2088		if (pmap_dirty(pte))
2089			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2090		else {
2091			/*
2092			 * Modified by someone
2093			 */
2094			vm_page_lock_queues();
2095			if (pmap_is_modified(m))
2096				val |= MINCORE_MODIFIED_OTHER;
2097			vm_page_unlock_queues();
2098		}
2099		/*
2100		 * Referenced by us
2101		 */
2102		if (pmap_accessed(pte))
2103			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2104		else {
2105			/*
2106			 * Referenced by someone
2107			 */
2108			vm_page_lock_queues();
2109			if (pmap_ts_referenced(m)) {
2110				val |= MINCORE_REFERENCED_OTHER;
2111				vm_page_flag_set(m, PG_REFERENCED);
2112			}
2113			vm_page_unlock_queues();
2114		}
2115	}
2116	return val;
2117}
2118
2119void
2120pmap_activate(struct thread *td)
2121{
2122	pmap_install(vmspace_pmap(td->td_proc->p_vmspace));
2123}
2124
2125pmap_t
2126pmap_switch(pmap_t pm)
2127{
2128	pmap_t prevpm;
2129	int i;
2130
2131	mtx_assert(&sched_lock, MA_OWNED);
2132
2133	prevpm = PCPU_GET(current_pmap);
2134	if (prevpm == pm)
2135		return (prevpm);
2136	if (prevpm != NULL)
2137		atomic_clear_32(&prevpm->pm_active, PCPU_GET(cpumask));
2138	if (pm == NULL) {
2139		for (i = 0; i < 5; i++) {
2140			ia64_set_rr(IA64_RR_BASE(i),
2141			    (i << 8)|(PAGE_SHIFT << 2)|1);
2142		}
2143	} else {
2144		for (i = 0; i < 5; i++) {
2145			ia64_set_rr(IA64_RR_BASE(i),
2146			    (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2147		}
2148		atomic_set_32(&pm->pm_active, PCPU_GET(cpumask));
2149	}
2150	PCPU_SET(current_pmap, pm);
2151	__asm __volatile("srlz.d");
2152	return (prevpm);
2153}
2154
2155static pmap_t
2156pmap_install(pmap_t pm)
2157{
2158	pmap_t prevpm;
2159
2160	mtx_lock_spin(&sched_lock);
2161	prevpm = pmap_switch(pm);
2162	mtx_unlock_spin(&sched_lock);
2163	return (prevpm);
2164}
2165
2166vm_offset_t
2167pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2168{
2169
2170	return addr;
2171}
2172
2173#include "opt_ddb.h"
2174
2175#ifdef DDB
2176
2177#include <ddb/ddb.h>
2178
2179static const char*	psnames[] = {
2180	"1B",	"2B",	"4B",	"8B",
2181	"16B",	"32B",	"64B",	"128B",
2182	"256B",	"512B",	"1K",	"2K",
2183	"4K",	"8K",	"16K",	"32K",
2184	"64K",	"128K",	"256K",	"512K",
2185	"1M",	"2M",	"4M",	"8M",
2186	"16M",	"32M",	"64M",	"128M",
2187	"256M",	"512M",	"1G",	"2G"
2188};
2189
2190static void
2191print_trs(int type)
2192{
2193	struct ia64_pal_result res;
2194	int i, maxtr;
2195	struct {
2196		pt_entry_t	pte;
2197		uint64_t	itir;
2198		uint64_t	ifa;
2199		struct ia64_rr	rr;
2200	} buf;
2201	static const char *manames[] = {
2202		"WB",	"bad",	"bad",	"bad",
2203		"UC",	"UCE",	"WC",	"NaT",
2204	};
2205
2206	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2207	if (res.pal_status != 0) {
2208		db_printf("Can't get VM summary\n");
2209		return;
2210	}
2211
2212	if (type == 0)
2213		maxtr = (res.pal_result[0] >> 40) & 0xff;
2214	else
2215		maxtr = (res.pal_result[0] >> 32) & 0xff;
2216
2217	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2218	for (i = 0; i <= maxtr; i++) {
2219		bzero(&buf, sizeof(buf));
2220		res = ia64_call_pal_stacked_physical
2221			(PAL_VM_TR_READ, i, type, ia64_tpa((uint64_t) &buf));
2222		if (!(res.pal_result[0] & 1))
2223			buf.pte &= ~PTE_AR_MASK;
2224		if (!(res.pal_result[0] & 2))
2225			buf.pte &= ~PTE_PL_MASK;
2226		if (!(res.pal_result[0] & 4))
2227			pmap_clear_dirty(&buf);
2228		if (!(res.pal_result[0] & 8))
2229			buf.pte &= ~PTE_MA_MASK;
2230		db_printf("%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s "
2231		    "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2232		    buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2233		    psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2234		    (buf.pte & PTE_ED) ? 1 : 0,
2235		    (int)(buf.pte & PTE_AR_MASK) >> 9,
2236		    (int)(buf.pte & PTE_PL_MASK) >> 7,
2237		    (pmap_dirty(&buf)) ? 1 : 0,
2238		    (pmap_accessed(&buf)) ? 1 : 0,
2239		    manames[(buf.pte & PTE_MA_MASK) >> 2],
2240		    (pmap_present(&buf)) ? 1 : 0,
2241		    (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2242	}
2243}
2244
2245DB_COMMAND(itr, db_itr)
2246{
2247	print_trs(0);
2248}
2249
2250DB_COMMAND(dtr, db_dtr)
2251{
2252	print_trs(1);
2253}
2254
2255DB_COMMAND(rr, db_rr)
2256{
2257	int i;
2258	uint64_t t;
2259	struct ia64_rr rr;
2260
2261	printf("RR RID    PgSz VE\n");
2262	for (i = 0; i < 8; i++) {
2263		__asm __volatile ("mov %0=rr[%1]"
2264				  : "=r"(t)
2265				  : "r"(IA64_RR_BASE(i)));
2266		*(uint64_t *) &rr = t;
2267		printf("%d  %06x %4s %d\n",
2268		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2269	}
2270}
2271
2272DB_COMMAND(thash, db_thash)
2273{
2274	if (!have_addr)
2275		return;
2276
2277	db_printf("%p\n", (void *) ia64_thash(addr));
2278}
2279
2280DB_COMMAND(ttag, db_ttag)
2281{
2282	if (!have_addr)
2283		return;
2284
2285	db_printf("0x%lx\n", ia64_ttag(addr));
2286}
2287
2288DB_COMMAND(kpte, db_kpte)
2289{
2290	struct ia64_lpte *pte;
2291
2292	if (!have_addr) {
2293		db_printf("usage: kpte <kva>\n");
2294		return;
2295	}
2296	if (addr < VM_MIN_KERNEL_ADDRESS) {
2297		db_printf("kpte: error: invalid <kva>\n");
2298		return;
2299	}
2300	pte = &ia64_kptdir[KPTE_DIR_INDEX(addr)][KPTE_PTE_INDEX(addr)];
2301	db_printf("kpte at %p:\n", pte);
2302	db_printf("  pte  =%016lx\n", pte->pte);
2303	db_printf("  itir =%016lx\n", pte->itir);
2304	db_printf("  tag  =%016lx\n", pte->tag);
2305	db_printf("  chain=%016lx\n", pte->chain);
2306}
2307
2308#endif
2309