1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 */
47
48#include <sys/cdefs.h>
49__FBSDID("$FreeBSD$");
50
51#include <sys/param.h>
52#include <sys/kernel.h>
53#include <sys/lock.h>
54#include <sys/mman.h>
55#include <sys/mutex.h>
56#include <sys/proc.h>
57#include <sys/smp.h>
58#include <sys/sysctl.h>
59#include <sys/systm.h>
60
61#include <vm/vm.h>
62#include <vm/vm_param.h>
63#include <vm/vm_page.h>
64#include <vm/vm_map.h>
65#include <vm/vm_object.h>
66#include <vm/vm_pageout.h>
67#include <vm/uma.h>
68
69#include <machine/bootinfo.h>
70#include <machine/efi.h>
71#include <machine/md_var.h>
72#include <machine/pal.h>
73
74/*
75 *	Manages physical address maps.
76 *
77 *	In addition to hardware address maps, this
78 *	module is called upon to provide software-use-only
79 *	maps which may or may not be stored in the same
80 *	form as hardware maps.  These pseudo-maps are
81 *	used to store intermediate results from copy
82 *	operations to and from address spaces.
83 *
84 *	Since the information managed by this module is
85 *	also stored by the logical address mapping module,
86 *	this module may throw away valid virtual-to-physical
87 *	mappings at almost any time.  However, invalidations
88 *	of virtual-to-physical mappings must be done as
89 *	requested.
90 *
91 *	In order to cope with hardware architectures which
92 *	make virtual-to-physical map invalidates expensive,
93 *	this module may delay invalidate or reduced protection
94 *	operations until such time as they are actually
95 *	necessary.  This module is given full information as
96 *	to which processors are currently using which maps,
97 *	and to when physical maps must be made correct.
98 */
99
100/*
101 * Following the Linux model, region IDs are allocated in groups of
102 * eight so that a single region ID can be used for as many RRs as we
103 * want by encoding the RR number into the low bits of the ID.
104 *
105 * We reserve region ID 0 for the kernel and allocate the remaining
106 * IDs for user pmaps.
107 *
108 * Region 0-3:	User virtually mapped
109 * Region 4:	PBVM and special mappings
110 * Region 5:	Kernel virtual memory
111 * Region 6:	Direct-mapped uncacheable
112 * Region 7:	Direct-mapped cacheable
113 */
114
115/* XXX move to a header. */
116extern uint64_t ia64_gateway_page[];
117
118#ifndef PMAP_SHPGPERPROC
119#define PMAP_SHPGPERPROC 200
120#endif
121
122#if !defined(DIAGNOSTIC)
123#define PMAP_INLINE __inline
124#else
125#define PMAP_INLINE
126#endif
127
128#define	pmap_accessed(lpte)		((lpte)->pte & PTE_ACCESSED)
129#define	pmap_dirty(lpte)		((lpte)->pte & PTE_DIRTY)
130#define	pmap_exec(lpte)			((lpte)->pte & PTE_AR_RX)
131#define	pmap_managed(lpte)		((lpte)->pte & PTE_MANAGED)
132#define	pmap_ppn(lpte)			((lpte)->pte & PTE_PPN_MASK)
133#define	pmap_present(lpte)		((lpte)->pte & PTE_PRESENT)
134#define	pmap_prot(lpte)			(((lpte)->pte & PTE_PROT_MASK) >> 56)
135#define	pmap_wired(lpte)		((lpte)->pte & PTE_WIRED)
136
137#define	pmap_clear_accessed(lpte)	(lpte)->pte &= ~PTE_ACCESSED
138#define	pmap_clear_dirty(lpte)		(lpte)->pte &= ~PTE_DIRTY
139#define	pmap_clear_present(lpte)	(lpte)->pte &= ~PTE_PRESENT
140#define	pmap_clear_wired(lpte)		(lpte)->pte &= ~PTE_WIRED
141
142#define	pmap_set_wired(lpte)		(lpte)->pte |= PTE_WIRED
143
144/*
145 * The VHPT bucket head structure.
146 */
147struct ia64_bucket {
148	uint64_t	chain;
149	struct mtx	mutex;
150	u_int		length;
151};
152
153/*
154 * Statically allocated kernel pmap
155 */
156struct pmap kernel_pmap_store;
157
158vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
159vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
160
161/*
162 * Kernel virtual memory management.
163 */
164static int nkpt;
165extern struct ia64_lpte ***ia64_kptdir;
166
167#define KPTE_DIR0_INDEX(va) \
168	(((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1))
169#define KPTE_DIR1_INDEX(va) \
170	(((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
171#define KPTE_PTE_INDEX(va) \
172	(((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
173#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
174
175vm_offset_t kernel_vm_end;
176
177/* Values for ptc.e. XXX values for SKI. */
178static uint64_t pmap_ptc_e_base = 0x100000000;
179static uint64_t pmap_ptc_e_count1 = 3;
180static uint64_t pmap_ptc_e_count2 = 2;
181static uint64_t pmap_ptc_e_stride1 = 0x2000;
182static uint64_t pmap_ptc_e_stride2 = 0x100000000;
183
184struct mtx pmap_ptc_mutex;
185
186/*
187 * Data for the RID allocator
188 */
189static int pmap_ridcount;
190static int pmap_rididx;
191static int pmap_ridmapsz;
192static int pmap_ridmax;
193static uint64_t *pmap_ridmap;
194struct mtx pmap_ridmutex;
195
196/*
197 * Data for the pv entry allocation mechanism
198 */
199static uma_zone_t pvzone;
200static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
201
202/*
203 * Data for allocating PTEs for user processes.
204 */
205static uma_zone_t ptezone;
206
207/*
208 * Virtual Hash Page Table (VHPT) data.
209 */
210/* SYSCTL_DECL(_machdep); */
211static SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
212
213struct ia64_bucket *pmap_vhpt_bucket;
214
215int pmap_vhpt_nbuckets;
216SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
217    &pmap_vhpt_nbuckets, 0, "");
218
219int pmap_vhpt_log2size = 0;
220TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
221SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
222    &pmap_vhpt_log2size, 0, "");
223
224static int pmap_vhpt_inserts;
225SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
226    &pmap_vhpt_inserts, 0, "");
227
228static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
229SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
230    NULL, 0, pmap_vhpt_population, "I", "");
231
232static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
233
234static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
235static pv_entry_t get_pv_entry(pmap_t locked_pmap);
236
237static void	pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
238		    vm_page_t m, vm_prot_t prot);
239static void	pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va);
240static void	pmap_invalidate_all(void);
241static int	pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
242		    vm_offset_t va, pv_entry_t pv, int freepte);
243static int	pmap_remove_vhpt(vm_offset_t va);
244static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
245		    vm_page_t m);
246
247static void
248pmap_initialize_vhpt(vm_offset_t vhpt)
249{
250	struct ia64_lpte *pte;
251	u_int i;
252
253	pte = (struct ia64_lpte *)vhpt;
254	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
255		pte[i].pte = 0;
256		pte[i].itir = 0;
257		pte[i].tag = 1UL << 63; /* Invalid tag */
258		pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
259	}
260}
261
262#ifdef SMP
263MALLOC_DECLARE(M_SMP);
264
265vm_offset_t
266pmap_alloc_vhpt(void)
267{
268	vm_offset_t vhpt;
269	vm_size_t size;
270
271	size = 1UL << pmap_vhpt_log2size;
272	vhpt = (uintptr_t)contigmalloc(size, M_SMP, 0, 0UL, ~0UL, size, 0UL);
273	if (vhpt != 0) {
274		vhpt = IA64_PHYS_TO_RR7(ia64_tpa(vhpt));
275		pmap_initialize_vhpt(vhpt);
276	}
277	return (vhpt);
278}
279#endif
280
281/*
282 *	Bootstrap the system enough to run with virtual memory.
283 */
284void
285pmap_bootstrap()
286{
287	struct ia64_pal_result res;
288	vm_offset_t base;
289	size_t size;
290	int i, ridbits;
291
292	/*
293	 * Query the PAL Code to find the loop parameters for the
294	 * ptc.e instruction.
295	 */
296	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
297	if (res.pal_status != 0)
298		panic("Can't configure ptc.e parameters");
299	pmap_ptc_e_base = res.pal_result[0];
300	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
301	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
302	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
303	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
304	if (bootverbose)
305		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
306		       "stride1=0x%lx, stride2=0x%lx\n",
307		       pmap_ptc_e_base,
308		       pmap_ptc_e_count1,
309		       pmap_ptc_e_count2,
310		       pmap_ptc_e_stride1,
311		       pmap_ptc_e_stride2);
312
313	mtx_init(&pmap_ptc_mutex, "PTC.G mutex", NULL, MTX_SPIN);
314
315	/*
316	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
317	 *
318	 * We currently need at least 19 bits in the RID because PID_MAX
319	 * can only be encoded in 17 bits and we need RIDs for 4 regions
320	 * per process. With PID_MAX equalling 99999 this means that we
321	 * need to be able to encode 399996 (=4*PID_MAX).
322	 * The Itanium processor only has 18 bits and the architected
323	 * minimum is exactly that. So, we cannot use a PID based scheme
324	 * in those cases. Enter pmap_ridmap...
325	 * We should avoid the map when running on a processor that has
326	 * implemented enough bits. This means that we should pass the
327	 * process/thread ID to pmap. This we currently don't do, so we
328	 * use the map anyway. However, we don't want to allocate a map
329	 * that is large enough to cover the range dictated by the number
330	 * of bits in the RID, because that may result in a RID map of
331	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
332	 * The bottomline: we create a 32KB map when the processor only
333	 * implements 18 bits (or when we can't figure it out). Otherwise
334	 * we create a 64KB map.
335	 */
336	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
337	if (res.pal_status != 0) {
338		if (bootverbose)
339			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
340		ridbits = 18; /* guaranteed minimum */
341	} else {
342		ridbits = (res.pal_result[1] >> 8) & 0xff;
343		if (bootverbose)
344			printf("Processor supports %d Region ID bits\n",
345			    ridbits);
346	}
347	if (ridbits > 19)
348		ridbits = 19;
349
350	pmap_ridmax = (1 << ridbits);
351	pmap_ridmapsz = pmap_ridmax / 64;
352	pmap_ridmap = ia64_physmem_alloc(pmap_ridmax / 8, PAGE_SIZE);
353	pmap_ridmap[0] |= 0xff;
354	pmap_rididx = 0;
355	pmap_ridcount = 8;
356	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
357
358	/*
359	 * Allocate some memory for initial kernel 'page tables'.
360	 */
361	ia64_kptdir = ia64_physmem_alloc(PAGE_SIZE, PAGE_SIZE);
362	nkpt = 0;
363	kernel_vm_end = VM_MIN_KERNEL_ADDRESS;
364
365	/*
366	 * Determine a valid (mappable) VHPT size.
367	 */
368	TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
369	if (pmap_vhpt_log2size == 0)
370		pmap_vhpt_log2size = 20;
371	else if (pmap_vhpt_log2size < 16)
372		pmap_vhpt_log2size = 16;
373	else if (pmap_vhpt_log2size > 28)
374		pmap_vhpt_log2size = 28;
375	if (pmap_vhpt_log2size & 1)
376		pmap_vhpt_log2size--;
377
378	size = 1UL << pmap_vhpt_log2size;
379	base = (uintptr_t)ia64_physmem_alloc(size, size);
380	if (base == 0)
381		panic("Unable to allocate VHPT");
382
383	PCPU_SET(md.vhpt, base);
384	if (bootverbose)
385		printf("VHPT: address=%#lx, size=%#lx\n", base, size);
386
387	pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
388	pmap_vhpt_bucket = ia64_physmem_alloc(pmap_vhpt_nbuckets *
389	    sizeof(struct ia64_bucket), PAGE_SIZE);
390	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
391		/* Stolen memory is zeroed. */
392		mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
393		    MTX_NOWITNESS | MTX_SPIN);
394	}
395
396	pmap_initialize_vhpt(base);
397	map_vhpt(base);
398	ia64_set_pta(base + (1 << 8) + (pmap_vhpt_log2size << 2) + 1);
399	ia64_srlz_i();
400
401	virtual_avail = VM_MIN_KERNEL_ADDRESS;
402	virtual_end = VM_MAX_KERNEL_ADDRESS;
403
404	/*
405	 * Initialize the kernel pmap (which is statically allocated).
406	 */
407	PMAP_LOCK_INIT(kernel_pmap);
408	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
409		kernel_pmap->pm_rid[i] = 0;
410	TAILQ_INIT(&kernel_pmap->pm_pvlist);
411	PCPU_SET(md.current_pmap, kernel_pmap);
412
413	/* Region 5 is mapped via the VHPT. */
414	ia64_set_rr(IA64_RR_BASE(5), (5 << 8) | (PAGE_SHIFT << 2) | 1);
415
416	/*
417	 * Clear out any random TLB entries left over from booting.
418	 */
419	pmap_invalidate_all();
420
421	map_gateway_page();
422}
423
424static int
425pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
426{
427	int count, error, i;
428
429	count = 0;
430	for (i = 0; i < pmap_vhpt_nbuckets; i++)
431		count += pmap_vhpt_bucket[i].length;
432
433	error = SYSCTL_OUT(req, &count, sizeof(count));
434	return (error);
435}
436
437vm_offset_t
438pmap_page_to_va(vm_page_t m)
439{
440	vm_paddr_t pa;
441	vm_offset_t va;
442
443	pa = VM_PAGE_TO_PHYS(m);
444	va = (m->md.memattr == VM_MEMATTR_UNCACHEABLE) ? IA64_PHYS_TO_RR6(pa) :
445	    IA64_PHYS_TO_RR7(pa);
446	return (va);
447}
448
449/*
450 *	Initialize a vm_page's machine-dependent fields.
451 */
452void
453pmap_page_init(vm_page_t m)
454{
455
456	TAILQ_INIT(&m->md.pv_list);
457	m->md.pv_list_count = 0;
458	m->md.memattr = VM_MEMATTR_DEFAULT;
459}
460
461/*
462 *	Initialize the pmap module.
463 *	Called by vm_init, to initialize any structures that the pmap
464 *	system needs to map virtual memory.
465 */
466void
467pmap_init(void)
468{
469	int shpgperproc = PMAP_SHPGPERPROC;
470
471	/*
472	 * Initialize the address space (zone) for the pv entries.  Set a
473	 * high water mark so that the system can recover from excessive
474	 * numbers of pv entries.
475	 */
476	pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
477	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
478	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
479	pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
480	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
481	pv_entry_high_water = 9 * (pv_entry_max / 10);
482
483	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
484	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
485}
486
487
488/***************************************************
489 * Manipulate TLBs for a pmap
490 ***************************************************/
491
492static void
493pmap_invalidate_page(vm_offset_t va)
494{
495	struct ia64_lpte *pte;
496	struct pcpu *pc;
497	uint64_t tag;
498	u_int vhpt_ofs;
499
500	critical_enter();
501
502	vhpt_ofs = ia64_thash(va) - PCPU_GET(md.vhpt);
503	tag = ia64_ttag(va);
504	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
505		pte = (struct ia64_lpte *)(pc->pc_md.vhpt + vhpt_ofs);
506		atomic_cmpset_64(&pte->tag, tag, 1UL << 63);
507	}
508
509	mtx_lock_spin(&pmap_ptc_mutex);
510
511	ia64_ptc_ga(va, PAGE_SHIFT << 2);
512	ia64_mf();
513	ia64_srlz_i();
514
515	mtx_unlock_spin(&pmap_ptc_mutex);
516
517	ia64_invala();
518
519	critical_exit();
520}
521
522static void
523pmap_invalidate_all_1(void *arg)
524{
525	uint64_t addr;
526	int i, j;
527
528	critical_enter();
529	addr = pmap_ptc_e_base;
530	for (i = 0; i < pmap_ptc_e_count1; i++) {
531		for (j = 0; j < pmap_ptc_e_count2; j++) {
532			ia64_ptc_e(addr);
533			addr += pmap_ptc_e_stride2;
534		}
535		addr += pmap_ptc_e_stride1;
536	}
537	critical_exit();
538}
539
540static void
541pmap_invalidate_all(void)
542{
543
544#ifdef SMP
545	if (mp_ncpus > 1) {
546		smp_rendezvous(NULL, pmap_invalidate_all_1, NULL, NULL);
547		return;
548	}
549#endif
550	pmap_invalidate_all_1(NULL);
551}
552
553static uint32_t
554pmap_allocate_rid(void)
555{
556	uint64_t bit, bits;
557	int rid;
558
559	mtx_lock(&pmap_ridmutex);
560	if (pmap_ridcount == pmap_ridmax)
561		panic("pmap_allocate_rid: All Region IDs used");
562
563	/* Find an index with a free bit. */
564	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
565		pmap_rididx++;
566		if (pmap_rididx == pmap_ridmapsz)
567			pmap_rididx = 0;
568	}
569	rid = pmap_rididx * 64;
570
571	/* Find a free bit. */
572	bit = 1UL;
573	while (bits & bit) {
574		rid++;
575		bit <<= 1;
576	}
577
578	pmap_ridmap[pmap_rididx] |= bit;
579	pmap_ridcount++;
580	mtx_unlock(&pmap_ridmutex);
581
582	return rid;
583}
584
585static void
586pmap_free_rid(uint32_t rid)
587{
588	uint64_t bit;
589	int idx;
590
591	idx = rid / 64;
592	bit = ~(1UL << (rid & 63));
593
594	mtx_lock(&pmap_ridmutex);
595	pmap_ridmap[idx] &= bit;
596	pmap_ridcount--;
597	mtx_unlock(&pmap_ridmutex);
598}
599
600/***************************************************
601 * Page table page management routines.....
602 ***************************************************/
603
604void
605pmap_pinit0(struct pmap *pmap)
606{
607	/* kernel_pmap is the same as any other pmap. */
608	pmap_pinit(pmap);
609}
610
611/*
612 * Initialize a preallocated and zeroed pmap structure,
613 * such as one in a vmspace structure.
614 */
615int
616pmap_pinit(struct pmap *pmap)
617{
618	int i;
619
620	PMAP_LOCK_INIT(pmap);
621	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
622		pmap->pm_rid[i] = pmap_allocate_rid();
623	TAILQ_INIT(&pmap->pm_pvlist);
624	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
625	return (1);
626}
627
628/***************************************************
629 * Pmap allocation/deallocation routines.
630 ***************************************************/
631
632/*
633 * Release any resources held by the given physical map.
634 * Called when a pmap initialized by pmap_pinit is being released.
635 * Should only be called if the map contains no valid mappings.
636 */
637void
638pmap_release(pmap_t pmap)
639{
640	int i;
641
642	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
643		if (pmap->pm_rid[i])
644			pmap_free_rid(pmap->pm_rid[i]);
645	PMAP_LOCK_DESTROY(pmap);
646}
647
648/*
649 * grow the number of kernel page table entries, if needed
650 */
651void
652pmap_growkernel(vm_offset_t addr)
653{
654	struct ia64_lpte **dir1;
655	struct ia64_lpte *leaf;
656	vm_page_t nkpg;
657
658	while (kernel_vm_end <= addr) {
659		if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64)
660			panic("%s: out of kernel address space", __func__);
661
662		dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)];
663		if (dir1 == NULL) {
664			nkpg = vm_page_alloc(NULL, nkpt++,
665			    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
666			if (!nkpg)
667				panic("%s: cannot add dir. page", __func__);
668
669			dir1 = (struct ia64_lpte **)pmap_page_to_va(nkpg);
670			bzero(dir1, PAGE_SIZE);
671			ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1;
672		}
673
674		nkpg = vm_page_alloc(NULL, nkpt++,
675		    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
676		if (!nkpg)
677			panic("%s: cannot add PTE page", __func__);
678
679		leaf = (struct ia64_lpte *)pmap_page_to_va(nkpg);
680		bzero(leaf, PAGE_SIZE);
681		dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf;
682
683		kernel_vm_end += PAGE_SIZE * NKPTEPG;
684	}
685}
686
687/***************************************************
688 * page management routines.
689 ***************************************************/
690
691/*
692 * free the pv_entry back to the free list
693 */
694static PMAP_INLINE void
695free_pv_entry(pv_entry_t pv)
696{
697	pv_entry_count--;
698	uma_zfree(pvzone, pv);
699}
700
701/*
702 * get a new pv_entry, allocating a block from the system
703 * when needed.
704 */
705static pv_entry_t
706get_pv_entry(pmap_t locked_pmap)
707{
708	static const struct timeval printinterval = { 60, 0 };
709	static struct timeval lastprint;
710	struct vpgqueues *vpq;
711	struct ia64_lpte *pte;
712	pmap_t oldpmap, pmap;
713	pv_entry_t allocated_pv, next_pv, pv;
714	vm_offset_t va;
715	vm_page_t m;
716
717	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
718	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
719	allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
720	if (allocated_pv != NULL) {
721		pv_entry_count++;
722		if (pv_entry_count > pv_entry_high_water)
723			pagedaemon_wakeup();
724		else
725			return (allocated_pv);
726	}
727
728	/*
729	 * Reclaim pv entries: At first, destroy mappings to inactive
730	 * pages.  After that, if a pv entry is still needed, destroy
731	 * mappings to active pages.
732	 */
733	if (ratecheck(&lastprint, &printinterval))
734		printf("Approaching the limit on PV entries, "
735		    "increase the vm.pmap.shpgperproc tunable.\n");
736	vpq = &vm_page_queues[PQ_INACTIVE];
737retry:
738	TAILQ_FOREACH(m, &vpq->pl, pageq) {
739		if ((m->flags & PG_MARKER) != 0 || m->hold_count || m->busy)
740			continue;
741		TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
742			va = pv->pv_va;
743			pmap = pv->pv_pmap;
744			/* Avoid deadlock and lock recursion. */
745			if (pmap > locked_pmap)
746				PMAP_LOCK(pmap);
747			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
748				continue;
749			pmap->pm_stats.resident_count--;
750			oldpmap = pmap_switch(pmap);
751			pte = pmap_find_vhpt(va);
752			KASSERT(pte != NULL, ("pte"));
753			pmap_remove_vhpt(va);
754			pmap_invalidate_page(va);
755			pmap_switch(oldpmap);
756			if (pmap_accessed(pte))
757				vm_page_aflag_set(m, PGA_REFERENCED);
758			if (pmap_dirty(pte))
759				vm_page_dirty(m);
760			pmap_free_pte(pte, va);
761			TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
762			m->md.pv_list_count--;
763			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
764			if (pmap != locked_pmap)
765				PMAP_UNLOCK(pmap);
766			if (allocated_pv == NULL)
767				allocated_pv = pv;
768			else
769				free_pv_entry(pv);
770		}
771		if (TAILQ_EMPTY(&m->md.pv_list))
772			vm_page_aflag_clear(m, PGA_WRITEABLE);
773	}
774	if (allocated_pv == NULL) {
775		if (vpq == &vm_page_queues[PQ_INACTIVE]) {
776			vpq = &vm_page_queues[PQ_ACTIVE];
777			goto retry;
778		}
779		panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
780	}
781	return (allocated_pv);
782}
783
784/*
785 * Conditionally create a pv entry.
786 */
787static boolean_t
788pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
789{
790	pv_entry_t pv;
791
792	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
793	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
794	if (pv_entry_count < pv_entry_high_water &&
795	    (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) {
796		pv_entry_count++;
797		pv->pv_va = va;
798		pv->pv_pmap = pmap;
799		TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
800		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
801		m->md.pv_list_count++;
802		return (TRUE);
803	} else
804		return (FALSE);
805}
806
807/*
808 * Add an ia64_lpte to the VHPT.
809 */
810static void
811pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
812{
813	struct ia64_bucket *bckt;
814	struct ia64_lpte *vhpte;
815	uint64_t pte_pa;
816
817	/* Can fault, so get it out of the way. */
818	pte_pa = ia64_tpa((vm_offset_t)pte);
819
820	vhpte = (struct ia64_lpte *)ia64_thash(va);
821	bckt = (struct ia64_bucket *)vhpte->chain;
822
823	mtx_lock_spin(&bckt->mutex);
824	pte->chain = bckt->chain;
825	ia64_mf();
826	bckt->chain = pte_pa;
827
828	pmap_vhpt_inserts++;
829	bckt->length++;
830	mtx_unlock_spin(&bckt->mutex);
831}
832
833/*
834 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
835 * worked or an appropriate error code otherwise.
836 */
837static int
838pmap_remove_vhpt(vm_offset_t va)
839{
840	struct ia64_bucket *bckt;
841	struct ia64_lpte *pte;
842	struct ia64_lpte *lpte;
843	struct ia64_lpte *vhpte;
844	uint64_t chain, tag;
845
846	tag = ia64_ttag(va);
847	vhpte = (struct ia64_lpte *)ia64_thash(va);
848	bckt = (struct ia64_bucket *)vhpte->chain;
849
850	lpte = NULL;
851	mtx_lock_spin(&bckt->mutex);
852	chain = bckt->chain;
853	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
854	while (chain != 0 && pte->tag != tag) {
855		lpte = pte;
856		chain = pte->chain;
857		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
858	}
859	if (chain == 0) {
860		mtx_unlock_spin(&bckt->mutex);
861		return (ENOENT);
862	}
863
864	/* Snip this pv_entry out of the collision chain. */
865	if (lpte == NULL)
866		bckt->chain = pte->chain;
867	else
868		lpte->chain = pte->chain;
869	ia64_mf();
870
871	bckt->length--;
872	mtx_unlock_spin(&bckt->mutex);
873	return (0);
874}
875
876/*
877 * Find the ia64_lpte for the given va, if any.
878 */
879static struct ia64_lpte *
880pmap_find_vhpt(vm_offset_t va)
881{
882	struct ia64_bucket *bckt;
883	struct ia64_lpte *pte;
884	uint64_t chain, tag;
885
886	tag = ia64_ttag(va);
887	pte = (struct ia64_lpte *)ia64_thash(va);
888	bckt = (struct ia64_bucket *)pte->chain;
889
890	mtx_lock_spin(&bckt->mutex);
891	chain = bckt->chain;
892	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
893	while (chain != 0 && pte->tag != tag) {
894		chain = pte->chain;
895		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
896	}
897	mtx_unlock_spin(&bckt->mutex);
898	return ((chain != 0) ? pte : NULL);
899}
900
901/*
902 * Remove an entry from the list of managed mappings.
903 */
904static int
905pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
906{
907	if (!pv) {
908		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
909			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
910				if (pmap == pv->pv_pmap && va == pv->pv_va)
911					break;
912			}
913		} else {
914			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
915				if (va == pv->pv_va)
916					break;
917			}
918		}
919	}
920
921	if (pv) {
922		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
923		m->md.pv_list_count--;
924		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
925			vm_page_aflag_clear(m, PGA_WRITEABLE);
926
927		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
928		free_pv_entry(pv);
929		return 0;
930	} else {
931		return ENOENT;
932	}
933}
934
935/*
936 * Create a pv entry for page at pa for
937 * (pmap, va).
938 */
939static void
940pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
941{
942	pv_entry_t pv;
943
944	pv = get_pv_entry(pmap);
945	pv->pv_pmap = pmap;
946	pv->pv_va = va;
947
948	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
949	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
950	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
951	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
952	m->md.pv_list_count++;
953}
954
955/*
956 *	Routine:	pmap_extract
957 *	Function:
958 *		Extract the physical page address associated
959 *		with the given map/virtual_address pair.
960 */
961vm_paddr_t
962pmap_extract(pmap_t pmap, vm_offset_t va)
963{
964	struct ia64_lpte *pte;
965	pmap_t oldpmap;
966	vm_paddr_t pa;
967
968	pa = 0;
969	PMAP_LOCK(pmap);
970	oldpmap = pmap_switch(pmap);
971	pte = pmap_find_vhpt(va);
972	if (pte != NULL && pmap_present(pte))
973		pa = pmap_ppn(pte);
974	pmap_switch(oldpmap);
975	PMAP_UNLOCK(pmap);
976	return (pa);
977}
978
979/*
980 *	Routine:	pmap_extract_and_hold
981 *	Function:
982 *		Atomically extract and hold the physical page
983 *		with the given pmap and virtual address pair
984 *		if that mapping permits the given protection.
985 */
986vm_page_t
987pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
988{
989	struct ia64_lpte *pte;
990	pmap_t oldpmap;
991	vm_page_t m;
992	vm_paddr_t pa;
993
994	pa = 0;
995	m = NULL;
996	PMAP_LOCK(pmap);
997	oldpmap = pmap_switch(pmap);
998retry:
999	pte = pmap_find_vhpt(va);
1000	if (pte != NULL && pmap_present(pte) &&
1001	    (pmap_prot(pte) & prot) == prot) {
1002		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1003		if (vm_page_pa_tryrelock(pmap, pmap_ppn(pte), &pa))
1004			goto retry;
1005		vm_page_hold(m);
1006	}
1007	PA_UNLOCK_COND(pa);
1008	pmap_switch(oldpmap);
1009	PMAP_UNLOCK(pmap);
1010	return (m);
1011}
1012
1013/***************************************************
1014 * Low level mapping routines.....
1015 ***************************************************/
1016
1017/*
1018 * Find the kernel lpte for mapping the given virtual address, which
1019 * must be in the part of region 5 which we can cover with our kernel
1020 * 'page tables'.
1021 */
1022static struct ia64_lpte *
1023pmap_find_kpte(vm_offset_t va)
1024{
1025	struct ia64_lpte **dir1;
1026	struct ia64_lpte *leaf;
1027
1028	KASSERT((va >> 61) == 5,
1029		("kernel mapping 0x%lx not in region 5", va));
1030	KASSERT(va < kernel_vm_end,
1031		("kernel mapping 0x%lx out of range", va));
1032
1033	dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)];
1034	leaf = dir1[KPTE_DIR1_INDEX(va)];
1035	return (&leaf[KPTE_PTE_INDEX(va)]);
1036}
1037
1038/*
1039 * Find a pte suitable for mapping a user-space address. If one exists
1040 * in the VHPT, that one will be returned, otherwise a new pte is
1041 * allocated.
1042 */
1043static struct ia64_lpte *
1044pmap_find_pte(vm_offset_t va)
1045{
1046	struct ia64_lpte *pte;
1047
1048	if (va >= VM_MAXUSER_ADDRESS)
1049		return pmap_find_kpte(va);
1050
1051	pte = pmap_find_vhpt(va);
1052	if (pte == NULL) {
1053		pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1054		pte->tag = 1UL << 63;
1055	}
1056	return (pte);
1057}
1058
1059/*
1060 * Free a pte which is now unused. This simply returns it to the zone
1061 * allocator if it is a user mapping. For kernel mappings, clear the
1062 * valid bit to make it clear that the mapping is not currently used.
1063 */
1064static void
1065pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1066{
1067	if (va < VM_MAXUSER_ADDRESS)
1068		uma_zfree(ptezone, pte);
1069	else
1070		pmap_clear_present(pte);
1071}
1072
1073static PMAP_INLINE void
1074pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1075{
1076	static long prot2ar[4] = {
1077		PTE_AR_R,		/* VM_PROT_NONE */
1078		PTE_AR_RW,		/* VM_PROT_WRITE */
1079		PTE_AR_RX|PTE_ED,	/* VM_PROT_EXECUTE */
1080		PTE_AR_RWX|PTE_ED	/* VM_PROT_WRITE|VM_PROT_EXECUTE */
1081	};
1082
1083	pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED);
1084	pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1085	pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1086	    ? PTE_PL_KERN : PTE_PL_USER;
1087	pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1088}
1089
1090static PMAP_INLINE void
1091pmap_pte_attr(struct ia64_lpte *pte, vm_memattr_t ma)
1092{
1093
1094	pte->pte &= ~PTE_MA_MASK;
1095	pte->pte |= (ma & PTE_MA_MASK);
1096}
1097
1098/*
1099 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1100 * the pte was orginally valid, then its assumed to already be in the
1101 * VHPT.
1102 * This functions does not set the protection bits.  It's expected
1103 * that those have been set correctly prior to calling this function.
1104 */
1105static void
1106pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1107    boolean_t wired, boolean_t managed)
1108{
1109
1110	pte->pte &= PTE_PROT_MASK | PTE_MA_MASK | PTE_PL_MASK |
1111	    PTE_AR_MASK | PTE_ED;
1112	pte->pte |= PTE_PRESENT;
1113	pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1114	pte->pte |= (wired) ? PTE_WIRED : 0;
1115	pte->pte |= pa & PTE_PPN_MASK;
1116
1117	pte->itir = PAGE_SHIFT << 2;
1118
1119	pte->tag = ia64_ttag(va);
1120}
1121
1122/*
1123 * Remove the (possibly managed) mapping represented by pte from the
1124 * given pmap.
1125 */
1126static int
1127pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1128		pv_entry_t pv, int freepte)
1129{
1130	int error;
1131	vm_page_t m;
1132
1133	/*
1134	 * First remove from the VHPT.
1135	 */
1136	error = pmap_remove_vhpt(va);
1137	if (error)
1138		return (error);
1139
1140	pmap_invalidate_page(va);
1141
1142	if (pmap_wired(pte))
1143		pmap->pm_stats.wired_count -= 1;
1144
1145	pmap->pm_stats.resident_count -= 1;
1146	if (pmap_managed(pte)) {
1147		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1148		if (pmap_dirty(pte))
1149			vm_page_dirty(m);
1150		if (pmap_accessed(pte))
1151			vm_page_aflag_set(m, PGA_REFERENCED);
1152
1153		error = pmap_remove_entry(pmap, m, va, pv);
1154	}
1155	if (freepte)
1156		pmap_free_pte(pte, va);
1157
1158	return (error);
1159}
1160
1161/*
1162 * Extract the physical page address associated with a kernel
1163 * virtual address.
1164 */
1165vm_paddr_t
1166pmap_kextract(vm_offset_t va)
1167{
1168	struct ia64_lpte *pte;
1169	uint64_t *pbvm_pgtbl;
1170	vm_paddr_t pa;
1171	u_int idx;
1172
1173	KASSERT(va >= VM_MAXUSER_ADDRESS, ("Must be kernel VA"));
1174
1175	/* Regions 6 and 7 are direct mapped. */
1176	if (va >= IA64_RR_BASE(6)) {
1177		pa = IA64_RR_MASK(va);
1178		goto out;
1179	}
1180
1181	/* Region 5 is our KVA. Bail out if the VA is beyond our limits. */
1182	if (va >= kernel_vm_end)
1183		goto err_out;
1184	if (va >= VM_MIN_KERNEL_ADDRESS) {
1185		pte = pmap_find_kpte(va);
1186		pa = pmap_present(pte) ? pmap_ppn(pte) | (va & PAGE_MASK) : 0;
1187		goto out;
1188	}
1189
1190	/* The PBVM page table. */
1191	if (va >= IA64_PBVM_PGTBL + bootinfo->bi_pbvm_pgtblsz)
1192		goto err_out;
1193	if (va >= IA64_PBVM_PGTBL) {
1194		pa = (va - IA64_PBVM_PGTBL) + bootinfo->bi_pbvm_pgtbl;
1195		goto out;
1196	}
1197
1198	/* The PBVM itself. */
1199	if (va >= IA64_PBVM_BASE) {
1200		pbvm_pgtbl = (void *)IA64_PBVM_PGTBL;
1201		idx = (va - IA64_PBVM_BASE) >> IA64_PBVM_PAGE_SHIFT;
1202		if (idx >= (bootinfo->bi_pbvm_pgtblsz >> 3))
1203			goto err_out;
1204		if ((pbvm_pgtbl[idx] & PTE_PRESENT) == 0)
1205			goto err_out;
1206		pa = (pbvm_pgtbl[idx] & PTE_PPN_MASK) +
1207		    (va & IA64_PBVM_PAGE_MASK);
1208		goto out;
1209	}
1210
1211 err_out:
1212	printf("XXX: %s: va=%#lx is invalid\n", __func__, va);
1213	pa = 0;
1214	/* FALLTHROUGH */
1215
1216 out:
1217	return (pa);
1218}
1219
1220/*
1221 * Add a list of wired pages to the kva this routine is only used for
1222 * temporary kernel mappings that do not need to have page modification
1223 * or references recorded.  Note that old mappings are simply written
1224 * over.  The page is effectively wired, but it's customary to not have
1225 * the PTE reflect that, nor update statistics.
1226 */
1227void
1228pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1229{
1230	struct ia64_lpte *pte;
1231	int i;
1232
1233	for (i = 0; i < count; i++) {
1234		pte = pmap_find_kpte(va);
1235		if (pmap_present(pte))
1236			pmap_invalidate_page(va);
1237		else
1238			pmap_enter_vhpt(pte, va);
1239		pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1240		pmap_pte_attr(pte, m[i]->md.memattr);
1241		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1242		va += PAGE_SIZE;
1243	}
1244}
1245
1246/*
1247 * this routine jerks page mappings from the
1248 * kernel -- it is meant only for temporary mappings.
1249 */
1250void
1251pmap_qremove(vm_offset_t va, int count)
1252{
1253	struct ia64_lpte *pte;
1254	int i;
1255
1256	for (i = 0; i < count; i++) {
1257		pte = pmap_find_kpte(va);
1258		if (pmap_present(pte)) {
1259			pmap_remove_vhpt(va);
1260			pmap_invalidate_page(va);
1261			pmap_clear_present(pte);
1262		}
1263		va += PAGE_SIZE;
1264	}
1265}
1266
1267/*
1268 * Add a wired page to the kva.  As for pmap_qenter(), it's customary
1269 * to not have the PTE reflect that, nor update statistics.
1270 */
1271void
1272pmap_kenter(vm_offset_t va, vm_offset_t pa)
1273{
1274	struct ia64_lpte *pte;
1275
1276	pte = pmap_find_kpte(va);
1277	if (pmap_present(pte))
1278		pmap_invalidate_page(va);
1279	else
1280		pmap_enter_vhpt(pte, va);
1281	pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1282	pmap_pte_attr(pte, VM_MEMATTR_DEFAULT);
1283	pmap_set_pte(pte, va, pa, FALSE, FALSE);
1284}
1285
1286/*
1287 * Remove a page from the kva
1288 */
1289void
1290pmap_kremove(vm_offset_t va)
1291{
1292	struct ia64_lpte *pte;
1293
1294	pte = pmap_find_kpte(va);
1295	if (pmap_present(pte)) {
1296		pmap_remove_vhpt(va);
1297		pmap_invalidate_page(va);
1298		pmap_clear_present(pte);
1299	}
1300}
1301
1302/*
1303 *	Used to map a range of physical addresses into kernel
1304 *	virtual address space.
1305 *
1306 *	The value passed in '*virt' is a suggested virtual address for
1307 *	the mapping. Architectures which can support a direct-mapped
1308 *	physical to virtual region can return the appropriate address
1309 *	within that region, leaving '*virt' unchanged. Other
1310 *	architectures should map the pages starting at '*virt' and
1311 *	update '*virt' with the first usable address after the mapped
1312 *	region.
1313 */
1314vm_offset_t
1315pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1316{
1317	return IA64_PHYS_TO_RR7(start);
1318}
1319
1320/*
1321 *	Remove the given range of addresses from the specified map.
1322 *
1323 *	It is assumed that the start and end are properly
1324 *	rounded to the page size.
1325 */
1326void
1327pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1328{
1329	pmap_t oldpmap;
1330	vm_offset_t va;
1331	pv_entry_t npv, pv;
1332	struct ia64_lpte *pte;
1333
1334	if (pmap->pm_stats.resident_count == 0)
1335		return;
1336
1337	vm_page_lock_queues();
1338	PMAP_LOCK(pmap);
1339	oldpmap = pmap_switch(pmap);
1340
1341	/*
1342	 * special handling of removing one page.  a very
1343	 * common operation and easy to short circuit some
1344	 * code.
1345	 */
1346	if (sva + PAGE_SIZE == eva) {
1347		pte = pmap_find_vhpt(sva);
1348		if (pte != NULL)
1349			pmap_remove_pte(pmap, pte, sva, 0, 1);
1350		goto out;
1351	}
1352
1353	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1354		TAILQ_FOREACH_SAFE(pv, &pmap->pm_pvlist, pv_plist, npv) {
1355			va = pv->pv_va;
1356			if (va >= sva && va < eva) {
1357				pte = pmap_find_vhpt(va);
1358				KASSERT(pte != NULL, ("pte"));
1359				pmap_remove_pte(pmap, pte, va, pv, 1);
1360			}
1361		}
1362	} else {
1363		for (va = sva; va < eva; va += PAGE_SIZE) {
1364			pte = pmap_find_vhpt(va);
1365			if (pte != NULL)
1366				pmap_remove_pte(pmap, pte, va, 0, 1);
1367		}
1368	}
1369
1370out:
1371	vm_page_unlock_queues();
1372	pmap_switch(oldpmap);
1373	PMAP_UNLOCK(pmap);
1374}
1375
1376/*
1377 *	Routine:	pmap_remove_all
1378 *	Function:
1379 *		Removes this physical page from
1380 *		all physical maps in which it resides.
1381 *		Reflects back modify bits to the pager.
1382 *
1383 *	Notes:
1384 *		Original versions of this routine were very
1385 *		inefficient because they iteratively called
1386 *		pmap_remove (slow...)
1387 */
1388
1389void
1390pmap_remove_all(vm_page_t m)
1391{
1392	pmap_t oldpmap;
1393	pv_entry_t pv;
1394
1395	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1396	    ("pmap_remove_all: page %p is not managed", m));
1397	vm_page_lock_queues();
1398	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1399		struct ia64_lpte *pte;
1400		pmap_t pmap = pv->pv_pmap;
1401		vm_offset_t va = pv->pv_va;
1402
1403		PMAP_LOCK(pmap);
1404		oldpmap = pmap_switch(pmap);
1405		pte = pmap_find_vhpt(va);
1406		KASSERT(pte != NULL, ("pte"));
1407		if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1408			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1409		pmap_remove_pte(pmap, pte, va, pv, 1);
1410		pmap_switch(oldpmap);
1411		PMAP_UNLOCK(pmap);
1412	}
1413	vm_page_aflag_clear(m, PGA_WRITEABLE);
1414	vm_page_unlock_queues();
1415}
1416
1417/*
1418 *	Set the physical protection on the
1419 *	specified range of this map as requested.
1420 */
1421void
1422pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1423{
1424	pmap_t oldpmap;
1425	struct ia64_lpte *pte;
1426
1427	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1428		pmap_remove(pmap, sva, eva);
1429		return;
1430	}
1431
1432	if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
1433	    (VM_PROT_WRITE|VM_PROT_EXECUTE))
1434		return;
1435
1436	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1437		panic("pmap_protect: unaligned addresses");
1438
1439	PMAP_LOCK(pmap);
1440	oldpmap = pmap_switch(pmap);
1441	for ( ; sva < eva; sva += PAGE_SIZE) {
1442		/* If page is invalid, skip this page */
1443		pte = pmap_find_vhpt(sva);
1444		if (pte == NULL)
1445			continue;
1446
1447		/* If there's no change, skip it too */
1448		if (pmap_prot(pte) == prot)
1449			continue;
1450
1451		if ((prot & VM_PROT_WRITE) == 0 &&
1452		    pmap_managed(pte) && pmap_dirty(pte)) {
1453			vm_paddr_t pa = pmap_ppn(pte);
1454			vm_page_t m = PHYS_TO_VM_PAGE(pa);
1455
1456			vm_page_dirty(m);
1457			pmap_clear_dirty(pte);
1458		}
1459
1460		if (prot & VM_PROT_EXECUTE)
1461			ia64_sync_icache(sva, PAGE_SIZE);
1462
1463		pmap_pte_prot(pmap, pte, prot);
1464		pmap_invalidate_page(sva);
1465	}
1466	pmap_switch(oldpmap);
1467	PMAP_UNLOCK(pmap);
1468}
1469
1470/*
1471 *	Insert the given physical page (p) at
1472 *	the specified virtual address (v) in the
1473 *	target physical map with the protection requested.
1474 *
1475 *	If specified, the page will be wired down, meaning
1476 *	that the related pte can not be reclaimed.
1477 *
1478 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1479 *	or lose information.  That is, this routine must actually
1480 *	insert this page into the given map NOW.
1481 */
1482void
1483pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
1484    vm_prot_t prot, boolean_t wired)
1485{
1486	pmap_t oldpmap;
1487	vm_offset_t pa;
1488	vm_offset_t opa;
1489	struct ia64_lpte origpte;
1490	struct ia64_lpte *pte;
1491	boolean_t icache_inval, managed;
1492
1493	vm_page_lock_queues();
1494	PMAP_LOCK(pmap);
1495	oldpmap = pmap_switch(pmap);
1496
1497	va &= ~PAGE_MASK;
1498 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
1499	KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0,
1500	    ("pmap_enter: page %p is not busy", m));
1501
1502	/*
1503	 * Find (or create) a pte for the given mapping.
1504	 */
1505	while ((pte = pmap_find_pte(va)) == NULL) {
1506		pmap_switch(oldpmap);
1507		PMAP_UNLOCK(pmap);
1508		vm_page_unlock_queues();
1509		VM_WAIT;
1510		vm_page_lock_queues();
1511		PMAP_LOCK(pmap);
1512		oldpmap = pmap_switch(pmap);
1513	}
1514	origpte = *pte;
1515	if (!pmap_present(pte)) {
1516		opa = ~0UL;
1517		pmap_enter_vhpt(pte, va);
1518	} else
1519		opa = pmap_ppn(pte);
1520	managed = FALSE;
1521	pa = VM_PAGE_TO_PHYS(m);
1522
1523	icache_inval = (prot & VM_PROT_EXECUTE) ? TRUE : FALSE;
1524
1525	/*
1526	 * Mapping has not changed, must be protection or wiring change.
1527	 */
1528	if (opa == pa) {
1529		/*
1530		 * Wiring change, just update stats. We don't worry about
1531		 * wiring PT pages as they remain resident as long as there
1532		 * are valid mappings in them. Hence, if a user page is wired,
1533		 * the PT page will be also.
1534		 */
1535		if (wired && !pmap_wired(&origpte))
1536			pmap->pm_stats.wired_count++;
1537		else if (!wired && pmap_wired(&origpte))
1538			pmap->pm_stats.wired_count--;
1539
1540		managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1541
1542		/*
1543		 * We might be turning off write access to the page,
1544		 * so we go ahead and sense modify status. Otherwise,
1545		 * we can avoid I-cache invalidation if the page
1546		 * already allowed execution.
1547		 */
1548		if (managed && pmap_dirty(&origpte))
1549			vm_page_dirty(m);
1550		else if (pmap_exec(&origpte))
1551			icache_inval = FALSE;
1552
1553		pmap_invalidate_page(va);
1554		goto validate;
1555	}
1556
1557	/*
1558	 * Mapping has changed, invalidate old range and fall
1559	 * through to handle validating new mapping.
1560	 */
1561	if (opa != ~0UL) {
1562		pmap_remove_pte(pmap, pte, va, 0, 0);
1563		pmap_enter_vhpt(pte, va);
1564	}
1565
1566	/*
1567	 * Enter on the PV list if part of our managed memory.
1568	 */
1569	if ((m->oflags & VPO_UNMANAGED) == 0) {
1570		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1571		    ("pmap_enter: managed mapping within the clean submap"));
1572		pmap_insert_entry(pmap, va, m);
1573		managed = TRUE;
1574	}
1575
1576	/*
1577	 * Increment counters
1578	 */
1579	pmap->pm_stats.resident_count++;
1580	if (wired)
1581		pmap->pm_stats.wired_count++;
1582
1583validate:
1584
1585	/*
1586	 * Now validate mapping with desired protection/wiring. This
1587	 * adds the pte to the VHPT if necessary.
1588	 */
1589	pmap_pte_prot(pmap, pte, prot);
1590	pmap_pte_attr(pte, m->md.memattr);
1591	pmap_set_pte(pte, va, pa, wired, managed);
1592
1593	/* Invalidate the I-cache when needed. */
1594	if (icache_inval)
1595		ia64_sync_icache(va, PAGE_SIZE);
1596
1597	if ((prot & VM_PROT_WRITE) != 0 && managed)
1598		vm_page_aflag_set(m, PGA_WRITEABLE);
1599	vm_page_unlock_queues();
1600	pmap_switch(oldpmap);
1601	PMAP_UNLOCK(pmap);
1602}
1603
1604/*
1605 * Maps a sequence of resident pages belonging to the same object.
1606 * The sequence begins with the given page m_start.  This page is
1607 * mapped at the given virtual address start.  Each subsequent page is
1608 * mapped at a virtual address that is offset from start by the same
1609 * amount as the page is offset from m_start within the object.  The
1610 * last page in the sequence is the page with the largest offset from
1611 * m_start that can be mapped at a virtual address less than the given
1612 * virtual address end.  Not every virtual page between start and end
1613 * is mapped; only those for which a resident page exists with the
1614 * corresponding offset from m_start are mapped.
1615 */
1616void
1617pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1618    vm_page_t m_start, vm_prot_t prot)
1619{
1620	pmap_t oldpmap;
1621	vm_page_t m;
1622	vm_pindex_t diff, psize;
1623
1624	VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
1625	psize = atop(end - start);
1626	m = m_start;
1627	vm_page_lock_queues();
1628	PMAP_LOCK(pmap);
1629	oldpmap = pmap_switch(pmap);
1630	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1631		pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1632		m = TAILQ_NEXT(m, listq);
1633	}
1634	vm_page_unlock_queues();
1635	pmap_switch(oldpmap);
1636 	PMAP_UNLOCK(pmap);
1637}
1638
1639/*
1640 * this code makes some *MAJOR* assumptions:
1641 * 1. Current pmap & pmap exists.
1642 * 2. Not wired.
1643 * 3. Read access.
1644 * 4. No page table pages.
1645 * but is *MUCH* faster than pmap_enter...
1646 */
1647
1648void
1649pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1650{
1651	pmap_t oldpmap;
1652
1653	vm_page_lock_queues();
1654	PMAP_LOCK(pmap);
1655	oldpmap = pmap_switch(pmap);
1656	pmap_enter_quick_locked(pmap, va, m, prot);
1657	vm_page_unlock_queues();
1658	pmap_switch(oldpmap);
1659	PMAP_UNLOCK(pmap);
1660}
1661
1662static void
1663pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1664    vm_prot_t prot)
1665{
1666	struct ia64_lpte *pte;
1667	boolean_t managed;
1668
1669	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1670	    (m->oflags & VPO_UNMANAGED) != 0,
1671	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1672	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1673	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1674
1675	if ((pte = pmap_find_pte(va)) == NULL)
1676		return;
1677
1678	if (!pmap_present(pte)) {
1679		/* Enter on the PV list if the page is managed. */
1680		if ((m->oflags & VPO_UNMANAGED) == 0) {
1681			if (!pmap_try_insert_pv_entry(pmap, va, m)) {
1682				pmap_free_pte(pte, va);
1683				return;
1684			}
1685			managed = TRUE;
1686		} else
1687			managed = FALSE;
1688
1689		/* Increment counters. */
1690		pmap->pm_stats.resident_count++;
1691
1692		/* Initialise with R/O protection and enter into VHPT. */
1693		pmap_enter_vhpt(pte, va);
1694		pmap_pte_prot(pmap, pte,
1695		    prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1696		pmap_pte_attr(pte, m->md.memattr);
1697		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1698
1699		if (prot & VM_PROT_EXECUTE)
1700			ia64_sync_icache(va, PAGE_SIZE);
1701	}
1702}
1703
1704/*
1705 * pmap_object_init_pt preloads the ptes for a given object
1706 * into the specified pmap.  This eliminates the blast of soft
1707 * faults on process startup and immediately after an mmap.
1708 */
1709void
1710pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1711		    vm_object_t object, vm_pindex_t pindex,
1712		    vm_size_t size)
1713{
1714
1715	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
1716	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1717	    ("pmap_object_init_pt: non-device object"));
1718}
1719
1720/*
1721 *	Routine:	pmap_change_wiring
1722 *	Function:	Change the wiring attribute for a map/virtual-address
1723 *			pair.
1724 *	In/out conditions:
1725 *			The mapping must already exist in the pmap.
1726 */
1727void
1728pmap_change_wiring(pmap, va, wired)
1729	register pmap_t pmap;
1730	vm_offset_t va;
1731	boolean_t wired;
1732{
1733	pmap_t oldpmap;
1734	struct ia64_lpte *pte;
1735
1736	PMAP_LOCK(pmap);
1737	oldpmap = pmap_switch(pmap);
1738
1739	pte = pmap_find_vhpt(va);
1740	KASSERT(pte != NULL, ("pte"));
1741	if (wired && !pmap_wired(pte)) {
1742		pmap->pm_stats.wired_count++;
1743		pmap_set_wired(pte);
1744	} else if (!wired && pmap_wired(pte)) {
1745		pmap->pm_stats.wired_count--;
1746		pmap_clear_wired(pte);
1747	}
1748
1749	pmap_switch(oldpmap);
1750	PMAP_UNLOCK(pmap);
1751}
1752
1753
1754
1755/*
1756 *	Copy the range specified by src_addr/len
1757 *	from the source map to the range dst_addr/len
1758 *	in the destination map.
1759 *
1760 *	This routine is only advisory and need not do anything.
1761 */
1762
1763void
1764pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
1765	  vm_offset_t src_addr)
1766{
1767}
1768
1769
1770/*
1771 *	pmap_zero_page zeros the specified hardware page by
1772 *	mapping it into virtual memory and using bzero to clear
1773 *	its contents.
1774 */
1775
1776void
1777pmap_zero_page(vm_page_t m)
1778{
1779	void *p;
1780
1781	p = (void *)pmap_page_to_va(m);
1782	bzero(p, PAGE_SIZE);
1783}
1784
1785
1786/*
1787 *	pmap_zero_page_area zeros the specified hardware page by
1788 *	mapping it into virtual memory and using bzero to clear
1789 *	its contents.
1790 *
1791 *	off and size must reside within a single page.
1792 */
1793
1794void
1795pmap_zero_page_area(vm_page_t m, int off, int size)
1796{
1797	char *p;
1798
1799	p = (void *)pmap_page_to_va(m);
1800	bzero(p + off, size);
1801}
1802
1803
1804/*
1805 *	pmap_zero_page_idle zeros the specified hardware page by
1806 *	mapping it into virtual memory and using bzero to clear
1807 *	its contents.  This is for the vm_idlezero process.
1808 */
1809
1810void
1811pmap_zero_page_idle(vm_page_t m)
1812{
1813	void *p;
1814
1815	p = (void *)pmap_page_to_va(m);
1816	bzero(p, PAGE_SIZE);
1817}
1818
1819
1820/*
1821 *	pmap_copy_page copies the specified (machine independent)
1822 *	page by mapping the page into virtual memory and using
1823 *	bcopy to copy the page, one machine dependent page at a
1824 *	time.
1825 */
1826void
1827pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
1828{
1829	void *dst, *src;
1830
1831	src = (void *)pmap_page_to_va(msrc);
1832	dst = (void *)pmap_page_to_va(mdst);
1833	bcopy(src, dst, PAGE_SIZE);
1834}
1835
1836int unmapped_buf_allowed;
1837
1838void
1839pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
1840    vm_offset_t b_offset, int xfersize)
1841{
1842	void *a_cp, *b_cp;
1843	vm_offset_t a_pg_offset, b_pg_offset;
1844	int cnt;
1845
1846	while (xfersize > 0) {
1847		a_pg_offset = a_offset & PAGE_MASK;
1848		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
1849		a_cp = (char *)pmap_page_to_va(ma[a_offset >> PAGE_SHIFT]) +
1850		    a_pg_offset;
1851		b_pg_offset = b_offset & PAGE_MASK;
1852		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
1853		b_cp = (char *)pmap_page_to_va(mb[b_offset >> PAGE_SHIFT]) +
1854		    b_pg_offset;
1855		bcopy(a_cp, b_cp, cnt);
1856		a_offset += cnt;
1857		b_offset += cnt;
1858		xfersize -= cnt;
1859	}
1860}
1861
1862/*
1863 * Returns true if the pmap's pv is one of the first
1864 * 16 pvs linked to from this page.  This count may
1865 * be changed upwards or downwards in the future; it
1866 * is only necessary that true be returned for a small
1867 * subset of pmaps for proper page aging.
1868 */
1869boolean_t
1870pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
1871{
1872	pv_entry_t pv;
1873	int loops = 0;
1874	boolean_t rv;
1875
1876	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1877	    ("pmap_page_exists_quick: page %p is not managed", m));
1878	rv = FALSE;
1879	vm_page_lock_queues();
1880	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1881		if (pv->pv_pmap == pmap) {
1882			rv = TRUE;
1883			break;
1884		}
1885		loops++;
1886		if (loops >= 16)
1887			break;
1888	}
1889	vm_page_unlock_queues();
1890	return (rv);
1891}
1892
1893/*
1894 *	pmap_page_wired_mappings:
1895 *
1896 *	Return the number of managed mappings to the given physical page
1897 *	that are wired.
1898 */
1899int
1900pmap_page_wired_mappings(vm_page_t m)
1901{
1902	struct ia64_lpte *pte;
1903	pmap_t oldpmap, pmap;
1904	pv_entry_t pv;
1905	int count;
1906
1907	count = 0;
1908	if ((m->oflags & VPO_UNMANAGED) != 0)
1909		return (count);
1910	vm_page_lock_queues();
1911	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1912		pmap = pv->pv_pmap;
1913		PMAP_LOCK(pmap);
1914		oldpmap = pmap_switch(pmap);
1915		pte = pmap_find_vhpt(pv->pv_va);
1916		KASSERT(pte != NULL, ("pte"));
1917		if (pmap_wired(pte))
1918			count++;
1919		pmap_switch(oldpmap);
1920		PMAP_UNLOCK(pmap);
1921	}
1922	vm_page_unlock_queues();
1923	return (count);
1924}
1925
1926/*
1927 * Remove all pages from specified address space
1928 * this aids process exit speeds.  Also, this code
1929 * is special cased for current process only, but
1930 * can have the more generic (and slightly slower)
1931 * mode enabled.  This is much faster than pmap_remove
1932 * in the case of running down an entire address space.
1933 */
1934void
1935pmap_remove_pages(pmap_t pmap)
1936{
1937	pmap_t oldpmap;
1938	pv_entry_t pv, npv;
1939
1940	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
1941		printf("warning: %s called with non-current pmap\n",
1942		    __func__);
1943		return;
1944	}
1945
1946	vm_page_lock_queues();
1947	PMAP_LOCK(pmap);
1948	oldpmap = pmap_switch(pmap);
1949
1950	for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
1951		struct ia64_lpte *pte;
1952
1953		npv = TAILQ_NEXT(pv, pv_plist);
1954
1955		pte = pmap_find_vhpt(pv->pv_va);
1956		KASSERT(pte != NULL, ("pte"));
1957		if (!pmap_wired(pte))
1958			pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
1959	}
1960
1961	pmap_switch(oldpmap);
1962	PMAP_UNLOCK(pmap);
1963	vm_page_unlock_queues();
1964}
1965
1966/*
1967 *	pmap_ts_referenced:
1968 *
1969 *	Return a count of reference bits for a page, clearing those bits.
1970 *	It is not necessary for every reference bit to be cleared, but it
1971 *	is necessary that 0 only be returned when there are truly no
1972 *	reference bits set.
1973 *
1974 *	XXX: The exact number of bits to check and clear is a matter that
1975 *	should be tested and standardized at some point in the future for
1976 *	optimal aging of shared pages.
1977 */
1978int
1979pmap_ts_referenced(vm_page_t m)
1980{
1981	struct ia64_lpte *pte;
1982	pmap_t oldpmap;
1983	pv_entry_t pv;
1984	int count = 0;
1985
1986	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1987	    ("pmap_ts_referenced: page %p is not managed", m));
1988	vm_page_lock_queues();
1989	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1990		PMAP_LOCK(pv->pv_pmap);
1991		oldpmap = pmap_switch(pv->pv_pmap);
1992		pte = pmap_find_vhpt(pv->pv_va);
1993		KASSERT(pte != NULL, ("pte"));
1994		if (pmap_accessed(pte)) {
1995			count++;
1996			pmap_clear_accessed(pte);
1997			pmap_invalidate_page(pv->pv_va);
1998		}
1999		pmap_switch(oldpmap);
2000		PMAP_UNLOCK(pv->pv_pmap);
2001	}
2002	vm_page_unlock_queues();
2003	return (count);
2004}
2005
2006/*
2007 *	pmap_is_modified:
2008 *
2009 *	Return whether or not the specified physical page was modified
2010 *	in any physical maps.
2011 */
2012boolean_t
2013pmap_is_modified(vm_page_t m)
2014{
2015	struct ia64_lpte *pte;
2016	pmap_t oldpmap;
2017	pv_entry_t pv;
2018	boolean_t rv;
2019
2020	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2021	    ("pmap_is_modified: page %p is not managed", m));
2022	rv = FALSE;
2023
2024	/*
2025	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be
2026	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2027	 * is clear, no PTEs can be dirty.
2028	 */
2029	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2030	if ((m->oflags & VPO_BUSY) == 0 &&
2031	    (m->aflags & PGA_WRITEABLE) == 0)
2032		return (rv);
2033	vm_page_lock_queues();
2034	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2035		PMAP_LOCK(pv->pv_pmap);
2036		oldpmap = pmap_switch(pv->pv_pmap);
2037		pte = pmap_find_vhpt(pv->pv_va);
2038		pmap_switch(oldpmap);
2039		KASSERT(pte != NULL, ("pte"));
2040		rv = pmap_dirty(pte) ? TRUE : FALSE;
2041		PMAP_UNLOCK(pv->pv_pmap);
2042		if (rv)
2043			break;
2044	}
2045	vm_page_unlock_queues();
2046	return (rv);
2047}
2048
2049/*
2050 *	pmap_is_prefaultable:
2051 *
2052 *	Return whether or not the specified virtual address is elgible
2053 *	for prefault.
2054 */
2055boolean_t
2056pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2057{
2058	struct ia64_lpte *pte;
2059
2060	pte = pmap_find_vhpt(addr);
2061	if (pte != NULL && pmap_present(pte))
2062		return (FALSE);
2063	return (TRUE);
2064}
2065
2066/*
2067 *	pmap_is_referenced:
2068 *
2069 *	Return whether or not the specified physical page was referenced
2070 *	in any physical maps.
2071 */
2072boolean_t
2073pmap_is_referenced(vm_page_t m)
2074{
2075	struct ia64_lpte *pte;
2076	pmap_t oldpmap;
2077	pv_entry_t pv;
2078	boolean_t rv;
2079
2080	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2081	    ("pmap_is_referenced: page %p is not managed", m));
2082	rv = FALSE;
2083	vm_page_lock_queues();
2084	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2085		PMAP_LOCK(pv->pv_pmap);
2086		oldpmap = pmap_switch(pv->pv_pmap);
2087		pte = pmap_find_vhpt(pv->pv_va);
2088		pmap_switch(oldpmap);
2089		KASSERT(pte != NULL, ("pte"));
2090		rv = pmap_accessed(pte) ? TRUE : FALSE;
2091		PMAP_UNLOCK(pv->pv_pmap);
2092		if (rv)
2093			break;
2094	}
2095	vm_page_unlock_queues();
2096	return (rv);
2097}
2098
2099/*
2100 *	Clear the modify bits on the specified physical page.
2101 */
2102void
2103pmap_clear_modify(vm_page_t m)
2104{
2105	struct ia64_lpte *pte;
2106	pmap_t oldpmap;
2107	pv_entry_t pv;
2108
2109	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2110	    ("pmap_clear_modify: page %p is not managed", m));
2111	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2112	KASSERT((m->oflags & VPO_BUSY) == 0,
2113	    ("pmap_clear_modify: page %p is busy", m));
2114
2115	/*
2116	 * If the page is not PGA_WRITEABLE, then no PTEs can be modified.
2117	 * If the object containing the page is locked and the page is not
2118	 * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set.
2119	 */
2120	if ((m->aflags & PGA_WRITEABLE) == 0)
2121		return;
2122	vm_page_lock_queues();
2123	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2124		PMAP_LOCK(pv->pv_pmap);
2125		oldpmap = pmap_switch(pv->pv_pmap);
2126		pte = pmap_find_vhpt(pv->pv_va);
2127		KASSERT(pte != NULL, ("pte"));
2128		if (pmap_dirty(pte)) {
2129			pmap_clear_dirty(pte);
2130			pmap_invalidate_page(pv->pv_va);
2131		}
2132		pmap_switch(oldpmap);
2133		PMAP_UNLOCK(pv->pv_pmap);
2134	}
2135	vm_page_unlock_queues();
2136}
2137
2138/*
2139 *	pmap_clear_reference:
2140 *
2141 *	Clear the reference bit on the specified physical page.
2142 */
2143void
2144pmap_clear_reference(vm_page_t m)
2145{
2146	struct ia64_lpte *pte;
2147	pmap_t oldpmap;
2148	pv_entry_t pv;
2149
2150	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2151	    ("pmap_clear_reference: page %p is not managed", m));
2152	vm_page_lock_queues();
2153	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2154		PMAP_LOCK(pv->pv_pmap);
2155		oldpmap = pmap_switch(pv->pv_pmap);
2156		pte = pmap_find_vhpt(pv->pv_va);
2157		KASSERT(pte != NULL, ("pte"));
2158		if (pmap_accessed(pte)) {
2159			pmap_clear_accessed(pte);
2160			pmap_invalidate_page(pv->pv_va);
2161		}
2162		pmap_switch(oldpmap);
2163		PMAP_UNLOCK(pv->pv_pmap);
2164	}
2165	vm_page_unlock_queues();
2166}
2167
2168/*
2169 * Clear the write and modified bits in each of the given page's mappings.
2170 */
2171void
2172pmap_remove_write(vm_page_t m)
2173{
2174	struct ia64_lpte *pte;
2175	pmap_t oldpmap, pmap;
2176	pv_entry_t pv;
2177	vm_prot_t prot;
2178
2179	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2180	    ("pmap_remove_write: page %p is not managed", m));
2181
2182	/*
2183	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
2184	 * another thread while the object is locked.  Thus, if PGA_WRITEABLE
2185	 * is clear, no page table entries need updating.
2186	 */
2187	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
2188	if ((m->oflags & VPO_BUSY) == 0 &&
2189	    (m->aflags & PGA_WRITEABLE) == 0)
2190		return;
2191	vm_page_lock_queues();
2192	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2193		pmap = pv->pv_pmap;
2194		PMAP_LOCK(pmap);
2195		oldpmap = pmap_switch(pmap);
2196		pte = pmap_find_vhpt(pv->pv_va);
2197		KASSERT(pte != NULL, ("pte"));
2198		prot = pmap_prot(pte);
2199		if ((prot & VM_PROT_WRITE) != 0) {
2200			if (pmap_dirty(pte)) {
2201				vm_page_dirty(m);
2202				pmap_clear_dirty(pte);
2203			}
2204			prot &= ~VM_PROT_WRITE;
2205			pmap_pte_prot(pmap, pte, prot);
2206			pmap_pte_attr(pte, m->md.memattr);
2207			pmap_invalidate_page(pv->pv_va);
2208		}
2209		pmap_switch(oldpmap);
2210		PMAP_UNLOCK(pmap);
2211	}
2212	vm_page_aflag_clear(m, PGA_WRITEABLE);
2213	vm_page_unlock_queues();
2214}
2215
2216/*
2217 * Map a set of physical memory pages into the kernel virtual
2218 * address space. Return a pointer to where it is mapped. This
2219 * routine is intended to be used for mapping device memory,
2220 * NOT real memory.
2221 */
2222void *
2223pmap_mapdev(vm_paddr_t pa, vm_size_t sz)
2224{
2225	static void *last_va = NULL;
2226	static vm_paddr_t last_pa = 0;
2227	static vm_size_t last_sz = 0;
2228	struct efi_md *md;
2229	vm_offset_t va;
2230
2231	if (pa == last_pa && sz == last_sz)
2232		return (last_va);
2233
2234	md = efi_md_find(pa);
2235	if (md == NULL) {
2236		printf("%s: [%#lx..%#lx] not covered by memory descriptor\n",
2237		    __func__, pa, pa + sz - 1);
2238		return (NULL);
2239	}
2240
2241	if (md->md_type == EFI_MD_TYPE_FREE) {
2242		printf("%s: [%#lx..%#lx] is in DRAM\n", __func__, pa,
2243		    pa + sz - 1);
2244                return (NULL);
2245	}
2246
2247	va = (md->md_attr & EFI_MD_ATTR_WB) ? IA64_PHYS_TO_RR7(pa) :
2248	    IA64_PHYS_TO_RR6(pa);
2249
2250	last_va = (void *)va;
2251	last_pa = pa;
2252	last_sz = sz;
2253	return (last_va);
2254}
2255
2256/*
2257 * 'Unmap' a range mapped by pmap_mapdev().
2258 */
2259void
2260pmap_unmapdev(vm_offset_t va, vm_size_t size)
2261{
2262}
2263
2264/*
2265 * Sets the memory attribute for the specified page.
2266 */
2267static void
2268pmap_page_set_memattr_1(void *arg)
2269{
2270	struct ia64_pal_result res;
2271	register_t is;
2272	uintptr_t pp = (uintptr_t)arg;
2273
2274	is = intr_disable();
2275	res = ia64_call_pal_static(pp, 0, 0, 0);
2276	intr_restore(is);
2277}
2278
2279void
2280pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
2281{
2282	struct ia64_lpte *pte;
2283	pmap_t oldpmap;
2284	pv_entry_t pv;
2285	void *va;
2286
2287	vm_page_lock_queues();
2288	m->md.memattr = ma;
2289	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2290		PMAP_LOCK(pv->pv_pmap);
2291		oldpmap = pmap_switch(pv->pv_pmap);
2292		pte = pmap_find_vhpt(pv->pv_va);
2293		KASSERT(pte != NULL, ("pte"));
2294		pmap_pte_attr(pte, ma);
2295		pmap_invalidate_page(pv->pv_va);
2296		pmap_switch(oldpmap);
2297		PMAP_UNLOCK(pv->pv_pmap);
2298	}
2299	vm_page_unlock_queues();
2300
2301	if (ma == VM_MEMATTR_UNCACHEABLE) {
2302#ifdef SMP
2303		smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL,
2304		    (void *)PAL_PREFETCH_VISIBILITY);
2305#else
2306		pmap_page_set_memattr_1((void *)PAL_PREFETCH_VISIBILITY);
2307#endif
2308		va = (void *)pmap_page_to_va(m);
2309		critical_enter();
2310		cpu_flush_dcache(va, PAGE_SIZE);
2311		critical_exit();
2312#ifdef SMP
2313		smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL,
2314		    (void *)PAL_MC_DRAIN);
2315#else
2316		pmap_page_set_memattr_1((void *)PAL_MC_DRAIN);
2317#endif
2318	}
2319}
2320
2321/*
2322 * perform the pmap work for mincore
2323 */
2324int
2325pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
2326{
2327	pmap_t oldpmap;
2328	struct ia64_lpte *pte, tpte;
2329	vm_paddr_t pa;
2330	int val;
2331
2332	PMAP_LOCK(pmap);
2333retry:
2334	oldpmap = pmap_switch(pmap);
2335	pte = pmap_find_vhpt(addr);
2336	if (pte != NULL) {
2337		tpte = *pte;
2338		pte = &tpte;
2339	}
2340	pmap_switch(oldpmap);
2341	if (pte == NULL || !pmap_present(pte)) {
2342		val = 0;
2343		goto out;
2344	}
2345	val = MINCORE_INCORE;
2346	if (pmap_dirty(pte))
2347		val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
2348	if (pmap_accessed(pte))
2349		val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
2350	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
2351	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
2352	    pmap_managed(pte)) {
2353		pa = pmap_ppn(pte);
2354		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
2355		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
2356			goto retry;
2357	} else
2358out:
2359		PA_UNLOCK_COND(*locked_pa);
2360	PMAP_UNLOCK(pmap);
2361	return (val);
2362}
2363
2364void
2365pmap_activate(struct thread *td)
2366{
2367	pmap_switch(vmspace_pmap(td->td_proc->p_vmspace));
2368}
2369
2370pmap_t
2371pmap_switch(pmap_t pm)
2372{
2373	pmap_t prevpm;
2374	int i;
2375
2376	critical_enter();
2377	prevpm = PCPU_GET(md.current_pmap);
2378	if (prevpm == pm)
2379		goto out;
2380	if (pm == NULL) {
2381		for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2382			ia64_set_rr(IA64_RR_BASE(i),
2383			    (i << 8)|(PAGE_SHIFT << 2)|1);
2384		}
2385	} else {
2386		for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2387			ia64_set_rr(IA64_RR_BASE(i),
2388			    (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2389		}
2390	}
2391	PCPU_SET(md.current_pmap, pm);
2392	ia64_srlz_d();
2393
2394out:
2395	critical_exit();
2396	return (prevpm);
2397}
2398
2399void
2400pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2401{
2402	pmap_t oldpm;
2403	struct ia64_lpte *pte;
2404	vm_offset_t lim;
2405	vm_size_t len;
2406
2407	sz += va & 31;
2408	va &= ~31;
2409	sz = (sz + 31) & ~31;
2410
2411	PMAP_LOCK(pm);
2412	oldpm = pmap_switch(pm);
2413	while (sz > 0) {
2414		lim = round_page(va);
2415		len = MIN(lim - va, sz);
2416		pte = pmap_find_vhpt(va);
2417		if (pte != NULL && pmap_present(pte))
2418			ia64_sync_icache(va, len);
2419		va += len;
2420		sz -= len;
2421	}
2422	pmap_switch(oldpm);
2423	PMAP_UNLOCK(pm);
2424}
2425
2426/*
2427 *	Increase the starting virtual address of the given mapping if a
2428 *	different alignment might result in more superpage mappings.
2429 */
2430void
2431pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2432    vm_offset_t *addr, vm_size_t size)
2433{
2434}
2435
2436#include "opt_ddb.h"
2437
2438#ifdef DDB
2439
2440#include <ddb/ddb.h>
2441
2442static const char*	psnames[] = {
2443	"1B",	"2B",	"4B",	"8B",
2444	"16B",	"32B",	"64B",	"128B",
2445	"256B",	"512B",	"1K",	"2K",
2446	"4K",	"8K",	"16K",	"32K",
2447	"64K",	"128K",	"256K",	"512K",
2448	"1M",	"2M",	"4M",	"8M",
2449	"16M",	"32M",	"64M",	"128M",
2450	"256M",	"512M",	"1G",	"2G"
2451};
2452
2453static void
2454print_trs(int type)
2455{
2456	struct ia64_pal_result res;
2457	int i, maxtr;
2458	struct {
2459		pt_entry_t	pte;
2460		uint64_t	itir;
2461		uint64_t	ifa;
2462		struct ia64_rr	rr;
2463	} buf;
2464	static const char *manames[] = {
2465		"WB",	"bad",	"bad",	"bad",
2466		"UC",	"UCE",	"WC",	"NaT",
2467	};
2468
2469	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2470	if (res.pal_status != 0) {
2471		db_printf("Can't get VM summary\n");
2472		return;
2473	}
2474
2475	if (type == 0)
2476		maxtr = (res.pal_result[0] >> 40) & 0xff;
2477	else
2478		maxtr = (res.pal_result[0] >> 32) & 0xff;
2479
2480	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2481	for (i = 0; i <= maxtr; i++) {
2482		bzero(&buf, sizeof(buf));
2483		res = ia64_pal_physical(PAL_VM_TR_READ, i, type,
2484		    ia64_tpa((uint64_t)&buf));
2485		if (!(res.pal_result[0] & 1))
2486			buf.pte &= ~PTE_AR_MASK;
2487		if (!(res.pal_result[0] & 2))
2488			buf.pte &= ~PTE_PL_MASK;
2489		if (!(res.pal_result[0] & 4))
2490			pmap_clear_dirty(&buf);
2491		if (!(res.pal_result[0] & 8))
2492			buf.pte &= ~PTE_MA_MASK;
2493		db_printf("%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s "
2494		    "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2495		    buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2496		    psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2497		    (buf.pte & PTE_ED) ? 1 : 0,
2498		    (int)(buf.pte & PTE_AR_MASK) >> 9,
2499		    (int)(buf.pte & PTE_PL_MASK) >> 7,
2500		    (pmap_dirty(&buf)) ? 1 : 0,
2501		    (pmap_accessed(&buf)) ? 1 : 0,
2502		    manames[(buf.pte & PTE_MA_MASK) >> 2],
2503		    (pmap_present(&buf)) ? 1 : 0,
2504		    (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2505	}
2506}
2507
2508DB_COMMAND(itr, db_itr)
2509{
2510	print_trs(0);
2511}
2512
2513DB_COMMAND(dtr, db_dtr)
2514{
2515	print_trs(1);
2516}
2517
2518DB_COMMAND(rr, db_rr)
2519{
2520	int i;
2521	uint64_t t;
2522	struct ia64_rr rr;
2523
2524	printf("RR RID    PgSz VE\n");
2525	for (i = 0; i < 8; i++) {
2526		__asm __volatile ("mov %0=rr[%1]"
2527				  : "=r"(t)
2528				  : "r"(IA64_RR_BASE(i)));
2529		*(uint64_t *) &rr = t;
2530		printf("%d  %06x %4s %d\n",
2531		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2532	}
2533}
2534
2535DB_COMMAND(thash, db_thash)
2536{
2537	if (!have_addr)
2538		return;
2539
2540	db_printf("%p\n", (void *) ia64_thash(addr));
2541}
2542
2543DB_COMMAND(ttag, db_ttag)
2544{
2545	if (!have_addr)
2546		return;
2547
2548	db_printf("0x%lx\n", ia64_ttag(addr));
2549}
2550
2551DB_COMMAND(kpte, db_kpte)
2552{
2553	struct ia64_lpte *pte;
2554
2555	if (!have_addr) {
2556		db_printf("usage: kpte <kva>\n");
2557		return;
2558	}
2559	if (addr < VM_MIN_KERNEL_ADDRESS) {
2560		db_printf("kpte: error: invalid <kva>\n");
2561		return;
2562	}
2563	pte = pmap_find_kpte(addr);
2564	db_printf("kpte at %p:\n", pte);
2565	db_printf("  pte  =%016lx\n", pte->pte);
2566	db_printf("  itir =%016lx\n", pte->itir);
2567	db_printf("  tag  =%016lx\n", pte->tag);
2568	db_printf("  chain=%016lx\n", pte->chain);
2569}
2570
2571#endif
2572