1/*-
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 */
47
48#include <sys/cdefs.h>
49__FBSDID("$FreeBSD$");
50
51#include "opt_pmap.h"
52
53#include <sys/param.h>
54#include <sys/kernel.h>
55#include <sys/lock.h>
56#include <sys/mman.h>
57#include <sys/mutex.h>
58#include <sys/proc.h>
59#include <sys/rwlock.h>
60#include <sys/smp.h>
61#include <sys/sysctl.h>
62#include <sys/systm.h>
63
64#include <vm/vm.h>
65#include <vm/vm_param.h>
66#include <vm/vm_page.h>
67#include <vm/vm_map.h>
68#include <vm/vm_object.h>
69#include <vm/vm_pageout.h>
70#include <vm/uma.h>
71
72#include <machine/bootinfo.h>
73#include <machine/efi.h>
74#include <machine/md_var.h>
75#include <machine/pal.h>
76
77/*
78 *	Manages physical address maps.
79 *
80 *	Since the information managed by this module is
81 *	also stored by the logical address mapping module,
82 *	this module may throw away valid virtual-to-physical
83 *	mappings at almost any time.  However, invalidations
84 *	of virtual-to-physical mappings must be done as
85 *	requested.
86 *
87 *	In order to cope with hardware architectures which
88 *	make virtual-to-physical map invalidates expensive,
89 *	this module may delay invalidate or reduced protection
90 *	operations until such time as they are actually
91 *	necessary.  This module is given full information as
92 *	to which processors are currently using which maps,
93 *	and to when physical maps must be made correct.
94 */
95
96/*
97 * Following the Linux model, region IDs are allocated in groups of
98 * eight so that a single region ID can be used for as many RRs as we
99 * want by encoding the RR number into the low bits of the ID.
100 *
101 * We reserve region ID 0 for the kernel and allocate the remaining
102 * IDs for user pmaps.
103 *
104 * Region 0-3:	User virtually mapped
105 * Region 4:	PBVM and special mappings
106 * Region 5:	Kernel virtual memory
107 * Region 6:	Direct-mapped uncacheable
108 * Region 7:	Direct-mapped cacheable
109 */
110
111/* XXX move to a header. */
112extern uint64_t ia64_gateway_page[];
113
114#if !defined(DIAGNOSTIC)
115#define PMAP_INLINE __inline
116#else
117#define PMAP_INLINE
118#endif
119
120#ifdef PV_STATS
121#define PV_STAT(x)	do { x ; } while (0)
122#else
123#define PV_STAT(x)	do { } while (0)
124#endif
125
126#define	pmap_accessed(lpte)		((lpte)->pte & PTE_ACCESSED)
127#define	pmap_dirty(lpte)		((lpte)->pte & PTE_DIRTY)
128#define	pmap_exec(lpte)			((lpte)->pte & PTE_AR_RX)
129#define	pmap_managed(lpte)		((lpte)->pte & PTE_MANAGED)
130#define	pmap_ppn(lpte)			((lpte)->pte & PTE_PPN_MASK)
131#define	pmap_present(lpte)		((lpte)->pte & PTE_PRESENT)
132#define	pmap_prot(lpte)			(((lpte)->pte & PTE_PROT_MASK) >> 56)
133#define	pmap_wired(lpte)		((lpte)->pte & PTE_WIRED)
134
135#define	pmap_clear_accessed(lpte)	(lpte)->pte &= ~PTE_ACCESSED
136#define	pmap_clear_dirty(lpte)		(lpte)->pte &= ~PTE_DIRTY
137#define	pmap_clear_present(lpte)	(lpte)->pte &= ~PTE_PRESENT
138#define	pmap_clear_wired(lpte)		(lpte)->pte &= ~PTE_WIRED
139
140#define	pmap_set_wired(lpte)		(lpte)->pte |= PTE_WIRED
141
142/*
143 * Individual PV entries are stored in per-pmap chunks.  This saves
144 * space by eliminating the need to record the pmap within every PV
145 * entry.
146 */
147#if PAGE_SIZE == 8192
148#define	_NPCM	6
149#define	_NPCPV	337
150#define	_NPCS	2
151#elif PAGE_SIZE == 16384
152#define	_NPCM	11
153#define	_NPCPV	677
154#define	_NPCS	1
155#endif
156struct pv_chunk {
157	pmap_t			pc_pmap;
158	TAILQ_ENTRY(pv_chunk)	pc_list;
159	u_long			pc_map[_NPCM];	/* bitmap; 1 = free */
160	TAILQ_ENTRY(pv_chunk)	pc_lru;
161	u_long			pc_spare[_NPCS];
162	struct pv_entry		pc_pventry[_NPCPV];
163};
164
165/*
166 * The VHPT bucket head structure.
167 */
168struct ia64_bucket {
169	uint64_t	chain;
170	struct mtx	mutex;
171	u_int		length;
172};
173
174/*
175 * Statically allocated kernel pmap
176 */
177struct pmap kernel_pmap_store;
178
179vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
180vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
181
182/*
183 * Kernel virtual memory management.
184 */
185static int nkpt;
186extern struct ia64_lpte ***ia64_kptdir;
187
188#define KPTE_DIR0_INDEX(va) \
189	(((va) >> (3*PAGE_SHIFT-8)) & ((1<<(PAGE_SHIFT-3))-1))
190#define KPTE_DIR1_INDEX(va) \
191	(((va) >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
192#define KPTE_PTE_INDEX(va) \
193	(((va) >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
194#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
195
196vm_offset_t kernel_vm_end;
197
198/* Values for ptc.e. XXX values for SKI. */
199static uint64_t pmap_ptc_e_base = 0x100000000;
200static uint64_t pmap_ptc_e_count1 = 3;
201static uint64_t pmap_ptc_e_count2 = 2;
202static uint64_t pmap_ptc_e_stride1 = 0x2000;
203static uint64_t pmap_ptc_e_stride2 = 0x100000000;
204
205struct mtx pmap_ptc_mutex;
206
207/*
208 * Data for the RID allocator
209 */
210static int pmap_ridcount;
211static int pmap_rididx;
212static int pmap_ridmapsz;
213static int pmap_ridmax;
214static uint64_t *pmap_ridmap;
215struct mtx pmap_ridmutex;
216
217static struct rwlock_padalign pvh_global_lock;
218
219/*
220 * Data for the pv entry allocation mechanism
221 */
222static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
223static int pv_entry_count;
224
225/*
226 * Data for allocating PTEs for user processes.
227 */
228static uma_zone_t ptezone;
229
230/*
231 * Virtual Hash Page Table (VHPT) data.
232 */
233/* SYSCTL_DECL(_machdep); */
234static SYSCTL_NODE(_machdep, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
235
236struct ia64_bucket *pmap_vhpt_bucket;
237
238int pmap_vhpt_nbuckets;
239SYSCTL_INT(_machdep_vhpt, OID_AUTO, nbuckets, CTLFLAG_RD,
240    &pmap_vhpt_nbuckets, 0, "");
241
242int pmap_vhpt_log2size = 0;
243TUNABLE_INT("machdep.vhpt.log2size", &pmap_vhpt_log2size);
244SYSCTL_INT(_machdep_vhpt, OID_AUTO, log2size, CTLFLAG_RD,
245    &pmap_vhpt_log2size, 0, "");
246
247static int pmap_vhpt_inserts;
248SYSCTL_INT(_machdep_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
249    &pmap_vhpt_inserts, 0, "");
250
251static int pmap_vhpt_population(SYSCTL_HANDLER_ARGS);
252SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
253    NULL, 0, pmap_vhpt_population, "I", "");
254
255static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
256
257static void free_pv_chunk(struct pv_chunk *pc);
258static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
259static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
260static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap);
261
262static void	pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
263		    vm_page_t m, vm_prot_t prot);
264static void	pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va);
265static void	pmap_invalidate_all(void);
266static int	pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte,
267		    vm_offset_t va, pv_entry_t pv, int freepte);
268static int	pmap_remove_vhpt(vm_offset_t va);
269static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
270		    vm_page_t m);
271
272static void
273pmap_initialize_vhpt(vm_offset_t vhpt)
274{
275	struct ia64_lpte *pte;
276	u_int i;
277
278	pte = (struct ia64_lpte *)vhpt;
279	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
280		pte[i].pte = 0;
281		pte[i].itir = 0;
282		pte[i].tag = 1UL << 63; /* Invalid tag */
283		pte[i].chain = (uintptr_t)(pmap_vhpt_bucket + i);
284	}
285}
286
287#ifdef SMP
288vm_offset_t
289pmap_alloc_vhpt(void)
290{
291	vm_offset_t vhpt;
292	vm_page_t m;
293	vm_size_t size;
294
295	size = 1UL << pmap_vhpt_log2size;
296	m = vm_page_alloc_contig(NULL, 0, VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ |
297	    VM_ALLOC_WIRED, atop(size), 0UL, ~0UL, size, 0UL,
298	    VM_MEMATTR_DEFAULT);
299	if (m != NULL) {
300		vhpt = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
301		pmap_initialize_vhpt(vhpt);
302		return (vhpt);
303	}
304	return (0);
305}
306#endif
307
308/*
309 *	Bootstrap the system enough to run with virtual memory.
310 */
311void
312pmap_bootstrap()
313{
314	struct ia64_pal_result res;
315	vm_offset_t base;
316	size_t size;
317	int i, ridbits;
318
319	/*
320	 * Query the PAL Code to find the loop parameters for the
321	 * ptc.e instruction.
322	 */
323	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
324	if (res.pal_status != 0)
325		panic("Can't configure ptc.e parameters");
326	pmap_ptc_e_base = res.pal_result[0];
327	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
328	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
329	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
330	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
331	if (bootverbose)
332		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
333		       "stride1=0x%lx, stride2=0x%lx\n",
334		       pmap_ptc_e_base,
335		       pmap_ptc_e_count1,
336		       pmap_ptc_e_count2,
337		       pmap_ptc_e_stride1,
338		       pmap_ptc_e_stride2);
339
340	mtx_init(&pmap_ptc_mutex, "PTC.G mutex", NULL, MTX_SPIN);
341
342	/*
343	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
344	 *
345	 * We currently need at least 19 bits in the RID because PID_MAX
346	 * can only be encoded in 17 bits and we need RIDs for 4 regions
347	 * per process. With PID_MAX equalling 99999 this means that we
348	 * need to be able to encode 399996 (=4*PID_MAX).
349	 * The Itanium processor only has 18 bits and the architected
350	 * minimum is exactly that. So, we cannot use a PID based scheme
351	 * in those cases. Enter pmap_ridmap...
352	 * We should avoid the map when running on a processor that has
353	 * implemented enough bits. This means that we should pass the
354	 * process/thread ID to pmap. This we currently don't do, so we
355	 * use the map anyway. However, we don't want to allocate a map
356	 * that is large enough to cover the range dictated by the number
357	 * of bits in the RID, because that may result in a RID map of
358	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
359	 * The bottomline: we create a 32KB map when the processor only
360	 * implements 18 bits (or when we can't figure it out). Otherwise
361	 * we create a 64KB map.
362	 */
363	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
364	if (res.pal_status != 0) {
365		if (bootverbose)
366			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
367		ridbits = 18; /* guaranteed minimum */
368	} else {
369		ridbits = (res.pal_result[1] >> 8) & 0xff;
370		if (bootverbose)
371			printf("Processor supports %d Region ID bits\n",
372			    ridbits);
373	}
374	if (ridbits > 19)
375		ridbits = 19;
376
377	pmap_ridmax = (1 << ridbits);
378	pmap_ridmapsz = pmap_ridmax / 64;
379	pmap_ridmap = ia64_physmem_alloc(pmap_ridmax / 8, PAGE_SIZE);
380	pmap_ridmap[0] |= 0xff;
381	pmap_rididx = 0;
382	pmap_ridcount = 8;
383	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
384
385	/*
386	 * Allocate some memory for initial kernel 'page tables'.
387	 */
388	ia64_kptdir = ia64_physmem_alloc(PAGE_SIZE, PAGE_SIZE);
389	nkpt = 0;
390	kernel_vm_end = VM_INIT_KERNEL_ADDRESS;
391
392	/*
393	 * Determine a valid (mappable) VHPT size.
394	 */
395	TUNABLE_INT_FETCH("machdep.vhpt.log2size", &pmap_vhpt_log2size);
396	if (pmap_vhpt_log2size == 0)
397		pmap_vhpt_log2size = 20;
398	else if (pmap_vhpt_log2size < 16)
399		pmap_vhpt_log2size = 16;
400	else if (pmap_vhpt_log2size > 28)
401		pmap_vhpt_log2size = 28;
402	if (pmap_vhpt_log2size & 1)
403		pmap_vhpt_log2size--;
404
405	size = 1UL << pmap_vhpt_log2size;
406	base = (uintptr_t)ia64_physmem_alloc(size, size);
407	if (base == 0)
408		panic("Unable to allocate VHPT");
409
410	PCPU_SET(md.vhpt, base);
411	if (bootverbose)
412		printf("VHPT: address=%#lx, size=%#lx\n", base, size);
413
414	pmap_vhpt_nbuckets = size / sizeof(struct ia64_lpte);
415	pmap_vhpt_bucket = ia64_physmem_alloc(pmap_vhpt_nbuckets *
416	    sizeof(struct ia64_bucket), PAGE_SIZE);
417	for (i = 0; i < pmap_vhpt_nbuckets; i++) {
418		/* Stolen memory is zeroed. */
419		mtx_init(&pmap_vhpt_bucket[i].mutex, "VHPT bucket lock", NULL,
420		    MTX_NOWITNESS | MTX_SPIN);
421	}
422
423	pmap_initialize_vhpt(base);
424	map_vhpt(base);
425	ia64_set_pta(base + (1 << 8) + (pmap_vhpt_log2size << 2) + 1);
426	ia64_srlz_i();
427
428	virtual_avail = VM_INIT_KERNEL_ADDRESS;
429	virtual_end = VM_MAX_KERNEL_ADDRESS;
430
431	/*
432	 * Initialize the kernel pmap (which is statically allocated).
433	 */
434	PMAP_LOCK_INIT(kernel_pmap);
435	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
436		kernel_pmap->pm_rid[i] = 0;
437	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
438	PCPU_SET(md.current_pmap, kernel_pmap);
439
440 	/*
441	 * Initialize the global pv list lock.
442	 */
443	rw_init(&pvh_global_lock, "pmap pv global");
444
445	/* Region 5 is mapped via the VHPT. */
446	ia64_set_rr(IA64_RR_BASE(5), (5 << 8) | (PAGE_SHIFT << 2) | 1);
447
448	/*
449	 * Clear out any random TLB entries left over from booting.
450	 */
451	pmap_invalidate_all();
452
453	map_gateway_page();
454}
455
456static int
457pmap_vhpt_population(SYSCTL_HANDLER_ARGS)
458{
459	int count, error, i;
460
461	count = 0;
462	for (i = 0; i < pmap_vhpt_nbuckets; i++)
463		count += pmap_vhpt_bucket[i].length;
464
465	error = SYSCTL_OUT(req, &count, sizeof(count));
466	return (error);
467}
468
469vm_offset_t
470pmap_page_to_va(vm_page_t m)
471{
472	vm_paddr_t pa;
473	vm_offset_t va;
474
475	pa = VM_PAGE_TO_PHYS(m);
476	va = (m->md.memattr == VM_MEMATTR_UNCACHEABLE) ? IA64_PHYS_TO_RR6(pa) :
477	    IA64_PHYS_TO_RR7(pa);
478	return (va);
479}
480
481/*
482 *	Initialize a vm_page's machine-dependent fields.
483 */
484void
485pmap_page_init(vm_page_t m)
486{
487
488	TAILQ_INIT(&m->md.pv_list);
489	m->md.memattr = VM_MEMATTR_DEFAULT;
490}
491
492/*
493 *	Initialize the pmap module.
494 *	Called by vm_init, to initialize any structures that the pmap
495 *	system needs to map virtual memory.
496 */
497void
498pmap_init(void)
499{
500
501	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
502	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
503}
504
505
506/***************************************************
507 * Manipulate TLBs for a pmap
508 ***************************************************/
509
510static void
511pmap_invalidate_page(vm_offset_t va)
512{
513	struct ia64_lpte *pte;
514	struct pcpu *pc;
515	uint64_t tag;
516	u_int vhpt_ofs;
517
518	critical_enter();
519
520	vhpt_ofs = ia64_thash(va) - PCPU_GET(md.vhpt);
521	tag = ia64_ttag(va);
522	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
523		pte = (struct ia64_lpte *)(pc->pc_md.vhpt + vhpt_ofs);
524		atomic_cmpset_64(&pte->tag, tag, 1UL << 63);
525	}
526
527	mtx_lock_spin(&pmap_ptc_mutex);
528
529	ia64_ptc_ga(va, PAGE_SHIFT << 2);
530	ia64_mf();
531	ia64_srlz_i();
532
533	mtx_unlock_spin(&pmap_ptc_mutex);
534
535	ia64_invala();
536
537	critical_exit();
538}
539
540static void
541pmap_invalidate_all_1(void *arg)
542{
543	uint64_t addr;
544	int i, j;
545
546	critical_enter();
547	addr = pmap_ptc_e_base;
548	for (i = 0; i < pmap_ptc_e_count1; i++) {
549		for (j = 0; j < pmap_ptc_e_count2; j++) {
550			ia64_ptc_e(addr);
551			addr += pmap_ptc_e_stride2;
552		}
553		addr += pmap_ptc_e_stride1;
554	}
555	critical_exit();
556}
557
558static void
559pmap_invalidate_all(void)
560{
561
562#ifdef SMP
563	if (mp_ncpus > 1) {
564		smp_rendezvous(NULL, pmap_invalidate_all_1, NULL, NULL);
565		return;
566	}
567#endif
568	pmap_invalidate_all_1(NULL);
569}
570
571static uint32_t
572pmap_allocate_rid(void)
573{
574	uint64_t bit, bits;
575	int rid;
576
577	mtx_lock(&pmap_ridmutex);
578	if (pmap_ridcount == pmap_ridmax)
579		panic("pmap_allocate_rid: All Region IDs used");
580
581	/* Find an index with a free bit. */
582	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
583		pmap_rididx++;
584		if (pmap_rididx == pmap_ridmapsz)
585			pmap_rididx = 0;
586	}
587	rid = pmap_rididx * 64;
588
589	/* Find a free bit. */
590	bit = 1UL;
591	while (bits & bit) {
592		rid++;
593		bit <<= 1;
594	}
595
596	pmap_ridmap[pmap_rididx] |= bit;
597	pmap_ridcount++;
598	mtx_unlock(&pmap_ridmutex);
599
600	return rid;
601}
602
603static void
604pmap_free_rid(uint32_t rid)
605{
606	uint64_t bit;
607	int idx;
608
609	idx = rid / 64;
610	bit = ~(1UL << (rid & 63));
611
612	mtx_lock(&pmap_ridmutex);
613	pmap_ridmap[idx] &= bit;
614	pmap_ridcount--;
615	mtx_unlock(&pmap_ridmutex);
616}
617
618/***************************************************
619 * Page table page management routines.....
620 ***************************************************/
621
622void
623pmap_pinit0(struct pmap *pmap)
624{
625
626	PMAP_LOCK_INIT(pmap);
627	/* kernel_pmap is the same as any other pmap. */
628	pmap_pinit(pmap);
629}
630
631/*
632 * Initialize a preallocated and zeroed pmap structure,
633 * such as one in a vmspace structure.
634 */
635int
636pmap_pinit(struct pmap *pmap)
637{
638	int i;
639
640	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
641		pmap->pm_rid[i] = pmap_allocate_rid();
642	TAILQ_INIT(&pmap->pm_pvchunk);
643	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
644	return (1);
645}
646
647/***************************************************
648 * Pmap allocation/deallocation routines.
649 ***************************************************/
650
651/*
652 * Release any resources held by the given physical map.
653 * Called when a pmap initialized by pmap_pinit is being released.
654 * Should only be called if the map contains no valid mappings.
655 */
656void
657pmap_release(pmap_t pmap)
658{
659	int i;
660
661	for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
662		if (pmap->pm_rid[i])
663			pmap_free_rid(pmap->pm_rid[i]);
664}
665
666/*
667 * grow the number of kernel page table entries, if needed
668 */
669void
670pmap_growkernel(vm_offset_t addr)
671{
672	struct ia64_lpte **dir1;
673	struct ia64_lpte *leaf;
674	vm_page_t nkpg;
675
676	while (kernel_vm_end <= addr) {
677		if (nkpt == PAGE_SIZE/8 + PAGE_SIZE*PAGE_SIZE/64)
678			panic("%s: out of kernel address space", __func__);
679
680		dir1 = ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)];
681		if (dir1 == NULL) {
682			nkpg = vm_page_alloc(NULL, nkpt++,
683			    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
684			if (!nkpg)
685				panic("%s: cannot add dir. page", __func__);
686
687			dir1 = (struct ia64_lpte **)pmap_page_to_va(nkpg);
688			bzero(dir1, PAGE_SIZE);
689			ia64_kptdir[KPTE_DIR0_INDEX(kernel_vm_end)] = dir1;
690		}
691
692		nkpg = vm_page_alloc(NULL, nkpt++,
693		    VM_ALLOC_NOOBJ|VM_ALLOC_INTERRUPT|VM_ALLOC_WIRED);
694		if (!nkpg)
695			panic("%s: cannot add PTE page", __func__);
696
697		leaf = (struct ia64_lpte *)pmap_page_to_va(nkpg);
698		bzero(leaf, PAGE_SIZE);
699		dir1[KPTE_DIR1_INDEX(kernel_vm_end)] = leaf;
700
701		kernel_vm_end += PAGE_SIZE * NKPTEPG;
702	}
703}
704
705/***************************************************
706 * page management routines.
707 ***************************************************/
708
709CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
710
711static __inline struct pv_chunk *
712pv_to_chunk(pv_entry_t pv)
713{
714
715	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
716}
717
718#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
719
720#define	PC_FREE_FULL	0xfffffffffffffffful
721#define	PC_FREE_PARTIAL	\
722	((1UL << (_NPCPV - sizeof(u_long) * 8 * (_NPCM - 1))) - 1)
723
724#if PAGE_SIZE == 8192
725static const u_long pc_freemask[_NPCM] = {
726	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
727	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_PARTIAL
728};
729#elif PAGE_SIZE == 16384
730static const u_long pc_freemask[_NPCM] = {
731	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
732	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
733	PC_FREE_FULL, PC_FREE_FULL, PC_FREE_FULL,
734	PC_FREE_FULL, PC_FREE_PARTIAL
735};
736#endif
737
738static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
739
740SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
741    "Current number of pv entries");
742
743#ifdef PV_STATS
744static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
745
746SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
747    "Current number of pv entry chunks");
748SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
749    "Current number of pv entry chunks allocated");
750SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
751    "Current number of pv entry chunks frees");
752SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
753    "Number of times tried to get a chunk page but failed.");
754
755static long pv_entry_frees, pv_entry_allocs;
756static int pv_entry_spare;
757
758SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
759    "Current number of pv entry frees");
760SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
761    "Current number of pv entry allocs");
762SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
763    "Current number of spare pv entries");
764#endif
765
766/*
767 * We are in a serious low memory condition.  Resort to
768 * drastic measures to free some pages so we can allocate
769 * another pv entry chunk.
770 */
771static vm_page_t
772pmap_pv_reclaim(pmap_t locked_pmap)
773{
774	struct pch newtail;
775	struct pv_chunk *pc;
776	struct ia64_lpte *pte;
777	pmap_t pmap;
778	pv_entry_t pv;
779	vm_offset_t va;
780	vm_page_t m, m_pc;
781	u_long inuse;
782	int bit, field, freed, idx;
783
784	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
785	pmap = NULL;
786	m_pc = NULL;
787	TAILQ_INIT(&newtail);
788	while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL) {
789		TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
790		if (pmap != pc->pc_pmap) {
791			if (pmap != NULL) {
792				if (pmap != locked_pmap) {
793					pmap_switch(locked_pmap);
794					PMAP_UNLOCK(pmap);
795				}
796			}
797			pmap = pc->pc_pmap;
798			/* Avoid deadlock and lock recursion. */
799			if (pmap > locked_pmap)
800				PMAP_LOCK(pmap);
801			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
802				pmap = NULL;
803				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
804				continue;
805			}
806			pmap_switch(pmap);
807		}
808
809		/*
810		 * Destroy every non-wired, 8 KB page mapping in the chunk.
811		 */
812		freed = 0;
813		for (field = 0; field < _NPCM; field++) {
814			for (inuse = ~pc->pc_map[field] & pc_freemask[field];
815			    inuse != 0; inuse &= ~(1UL << bit)) {
816				bit = ffsl(inuse) - 1;
817				idx = field * sizeof(inuse) * NBBY + bit;
818				pv = &pc->pc_pventry[idx];
819				va = pv->pv_va;
820				pte = pmap_find_vhpt(va);
821				KASSERT(pte != NULL, ("pte"));
822				if (pmap_wired(pte))
823					continue;
824				pmap_remove_vhpt(va);
825				pmap_invalidate_page(va);
826				m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
827				if (pmap_accessed(pte))
828					vm_page_aflag_set(m, PGA_REFERENCED);
829				if (pmap_dirty(pte))
830					vm_page_dirty(m);
831				pmap_free_pte(pte, va);
832				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
833				if (TAILQ_EMPTY(&m->md.pv_list))
834					vm_page_aflag_clear(m, PGA_WRITEABLE);
835				pc->pc_map[field] |= 1UL << bit;
836				freed++;
837			}
838		}
839		if (freed == 0) {
840			TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
841			continue;
842		}
843		/* Every freed mapping is for a 8 KB page. */
844		pmap->pm_stats.resident_count -= freed;
845		PV_STAT(pv_entry_frees += freed);
846		PV_STAT(pv_entry_spare += freed);
847		pv_entry_count -= freed;
848		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
849		for (field = 0; field < _NPCM; field++)
850			if (pc->pc_map[field] != pc_freemask[field]) {
851				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
852				    pc_list);
853				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
854
855				/*
856				 * One freed pv entry in locked_pmap is
857				 * sufficient.
858				 */
859				if (pmap == locked_pmap)
860					goto out;
861				break;
862			}
863		if (field == _NPCM) {
864			PV_STAT(pv_entry_spare -= _NPCPV);
865			PV_STAT(pc_chunk_count--);
866			PV_STAT(pc_chunk_frees++);
867			/* Entire chunk is free; return it. */
868			m_pc = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc));
869			break;
870		}
871	}
872out:
873	TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
874	if (pmap != NULL) {
875		if (pmap != locked_pmap) {
876			pmap_switch(locked_pmap);
877			PMAP_UNLOCK(pmap);
878		}
879	}
880	return (m_pc);
881}
882
883/*
884 * free the pv_entry back to the free list
885 */
886static void
887free_pv_entry(pmap_t pmap, pv_entry_t pv)
888{
889	struct pv_chunk *pc;
890	int bit, field, idx;
891
892	rw_assert(&pvh_global_lock, RA_WLOCKED);
893	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
894	PV_STAT(pv_entry_frees++);
895	PV_STAT(pv_entry_spare++);
896	pv_entry_count--;
897	pc = pv_to_chunk(pv);
898	idx = pv - &pc->pc_pventry[0];
899	field = idx / (sizeof(u_long) * NBBY);
900	bit = idx % (sizeof(u_long) * NBBY);
901	pc->pc_map[field] |= 1ul << bit;
902	for (idx = 0; idx < _NPCM; idx++)
903		if (pc->pc_map[idx] != pc_freemask[idx]) {
904			/*
905			 * 98% of the time, pc is already at the head of the
906			 * list.  If it isn't already, move it to the head.
907			 */
908			if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
909			    pc)) {
910				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
911				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
912				    pc_list);
913			}
914			return;
915		}
916	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
917	free_pv_chunk(pc);
918}
919
920static void
921free_pv_chunk(struct pv_chunk *pc)
922{
923	vm_page_t m;
924
925 	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
926	PV_STAT(pv_entry_spare -= _NPCPV);
927	PV_STAT(pc_chunk_count--);
928	PV_STAT(pc_chunk_frees++);
929	/* entire chunk is free, return it */
930	m = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc));
931	vm_page_unwire(m, 0);
932	vm_page_free(m);
933}
934
935/*
936 * get a new pv_entry, allocating a block from the system
937 * when needed.
938 */
939static pv_entry_t
940get_pv_entry(pmap_t pmap, boolean_t try)
941{
942	struct pv_chunk *pc;
943	pv_entry_t pv;
944	vm_page_t m;
945	int bit, field, idx;
946
947	rw_assert(&pvh_global_lock, RA_WLOCKED);
948	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
949	PV_STAT(pv_entry_allocs++);
950	pv_entry_count++;
951retry:
952	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
953	if (pc != NULL) {
954		for (field = 0; field < _NPCM; field++) {
955			if (pc->pc_map[field]) {
956				bit = ffsl(pc->pc_map[field]) - 1;
957				break;
958			}
959		}
960		if (field < _NPCM) {
961			idx = field * sizeof(pc->pc_map[field]) * NBBY + bit;
962			pv = &pc->pc_pventry[idx];
963			pc->pc_map[field] &= ~(1ul << bit);
964			/* If this was the last item, move it to tail */
965			for (field = 0; field < _NPCM; field++)
966				if (pc->pc_map[field] != 0) {
967					PV_STAT(pv_entry_spare--);
968					return (pv);	/* not full, return */
969				}
970			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
971			TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
972			PV_STAT(pv_entry_spare--);
973			return (pv);
974		}
975	}
976	/* No free items, allocate another chunk */
977	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
978	    VM_ALLOC_WIRED);
979	if (m == NULL) {
980		if (try) {
981			pv_entry_count--;
982			PV_STAT(pc_chunk_tryfail++);
983			return (NULL);
984		}
985		m = pmap_pv_reclaim(pmap);
986		if (m == NULL)
987			goto retry;
988	}
989	PV_STAT(pc_chunk_count++);
990	PV_STAT(pc_chunk_allocs++);
991	pc = (struct pv_chunk *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
992	pc->pc_pmap = pmap;
993	pc->pc_map[0] = pc_freemask[0] & ~1ul;	/* preallocated bit 0 */
994	for (field = 1; field < _NPCM; field++)
995		pc->pc_map[field] = pc_freemask[field];
996	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
997	pv = &pc->pc_pventry[0];
998	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
999	PV_STAT(pv_entry_spare += _NPCPV - 1);
1000	return (pv);
1001}
1002
1003/*
1004 * Conditionally create a pv entry.
1005 */
1006static boolean_t
1007pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1008{
1009	pv_entry_t pv;
1010
1011	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1012	rw_assert(&pvh_global_lock, RA_WLOCKED);
1013	if ((pv = get_pv_entry(pmap, TRUE)) != NULL) {
1014		pv->pv_va = va;
1015		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1016		return (TRUE);
1017	} else
1018		return (FALSE);
1019}
1020
1021/*
1022 * Add an ia64_lpte to the VHPT.
1023 */
1024static void
1025pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
1026{
1027	struct ia64_bucket *bckt;
1028	struct ia64_lpte *vhpte;
1029	uint64_t pte_pa;
1030
1031	/* Can fault, so get it out of the way. */
1032	pte_pa = ia64_tpa((vm_offset_t)pte);
1033
1034	vhpte = (struct ia64_lpte *)ia64_thash(va);
1035	bckt = (struct ia64_bucket *)vhpte->chain;
1036
1037	mtx_lock_spin(&bckt->mutex);
1038	pte->chain = bckt->chain;
1039	ia64_mf();
1040	bckt->chain = pte_pa;
1041
1042	pmap_vhpt_inserts++;
1043	bckt->length++;
1044	mtx_unlock_spin(&bckt->mutex);
1045}
1046
1047/*
1048 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
1049 * worked or an appropriate error code otherwise.
1050 */
1051static int
1052pmap_remove_vhpt(vm_offset_t va)
1053{
1054	struct ia64_bucket *bckt;
1055	struct ia64_lpte *pte;
1056	struct ia64_lpte *lpte;
1057	struct ia64_lpte *vhpte;
1058	uint64_t chain, tag;
1059
1060	tag = ia64_ttag(va);
1061	vhpte = (struct ia64_lpte *)ia64_thash(va);
1062	bckt = (struct ia64_bucket *)vhpte->chain;
1063
1064	lpte = NULL;
1065	mtx_lock_spin(&bckt->mutex);
1066	chain = bckt->chain;
1067	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1068	while (chain != 0 && pte->tag != tag) {
1069		lpte = pte;
1070		chain = pte->chain;
1071		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1072	}
1073	if (chain == 0) {
1074		mtx_unlock_spin(&bckt->mutex);
1075		return (ENOENT);
1076	}
1077
1078	/* Snip this pv_entry out of the collision chain. */
1079	if (lpte == NULL)
1080		bckt->chain = pte->chain;
1081	else
1082		lpte->chain = pte->chain;
1083	ia64_mf();
1084
1085	bckt->length--;
1086	mtx_unlock_spin(&bckt->mutex);
1087	return (0);
1088}
1089
1090/*
1091 * Find the ia64_lpte for the given va, if any.
1092 */
1093static struct ia64_lpte *
1094pmap_find_vhpt(vm_offset_t va)
1095{
1096	struct ia64_bucket *bckt;
1097	struct ia64_lpte *pte;
1098	uint64_t chain, tag;
1099
1100	tag = ia64_ttag(va);
1101	pte = (struct ia64_lpte *)ia64_thash(va);
1102	bckt = (struct ia64_bucket *)pte->chain;
1103
1104	mtx_lock_spin(&bckt->mutex);
1105	chain = bckt->chain;
1106	pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1107	while (chain != 0 && pte->tag != tag) {
1108		chain = pte->chain;
1109		pte = (struct ia64_lpte *)IA64_PHYS_TO_RR7(chain);
1110	}
1111	mtx_unlock_spin(&bckt->mutex);
1112	return ((chain != 0) ? pte : NULL);
1113}
1114
1115/*
1116 * Remove an entry from the list of managed mappings.
1117 */
1118static int
1119pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1120{
1121
1122	rw_assert(&pvh_global_lock, RA_WLOCKED);
1123	if (!pv) {
1124		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1125			if (pmap == PV_PMAP(pv) && va == pv->pv_va)
1126				break;
1127		}
1128	}
1129
1130	if (pv) {
1131		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1132		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1133			vm_page_aflag_clear(m, PGA_WRITEABLE);
1134
1135		free_pv_entry(pmap, pv);
1136		return 0;
1137	} else {
1138		return ENOENT;
1139	}
1140}
1141
1142/*
1143 * Create a pv entry for page at pa for
1144 * (pmap, va).
1145 */
1146static void
1147pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1148{
1149	pv_entry_t pv;
1150
1151	rw_assert(&pvh_global_lock, RA_WLOCKED);
1152	pv = get_pv_entry(pmap, FALSE);
1153	pv->pv_va = va;
1154	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1155}
1156
1157/*
1158 *	Routine:	pmap_extract
1159 *	Function:
1160 *		Extract the physical page address associated
1161 *		with the given map/virtual_address pair.
1162 */
1163vm_paddr_t
1164pmap_extract(pmap_t pmap, vm_offset_t va)
1165{
1166	struct ia64_lpte *pte;
1167	pmap_t oldpmap;
1168	vm_paddr_t pa;
1169
1170	pa = 0;
1171	PMAP_LOCK(pmap);
1172	oldpmap = pmap_switch(pmap);
1173	pte = pmap_find_vhpt(va);
1174	if (pte != NULL && pmap_present(pte))
1175		pa = pmap_ppn(pte);
1176	pmap_switch(oldpmap);
1177	PMAP_UNLOCK(pmap);
1178	return (pa);
1179}
1180
1181/*
1182 *	Routine:	pmap_extract_and_hold
1183 *	Function:
1184 *		Atomically extract and hold the physical page
1185 *		with the given pmap and virtual address pair
1186 *		if that mapping permits the given protection.
1187 */
1188vm_page_t
1189pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1190{
1191	struct ia64_lpte *pte;
1192	pmap_t oldpmap;
1193	vm_page_t m;
1194	vm_paddr_t pa;
1195
1196	pa = 0;
1197	m = NULL;
1198	PMAP_LOCK(pmap);
1199	oldpmap = pmap_switch(pmap);
1200retry:
1201	pte = pmap_find_vhpt(va);
1202	if (pte != NULL && pmap_present(pte) &&
1203	    (pmap_prot(pte) & prot) == prot) {
1204		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1205		if (vm_page_pa_tryrelock(pmap, pmap_ppn(pte), &pa))
1206			goto retry;
1207		vm_page_hold(m);
1208	}
1209	PA_UNLOCK_COND(pa);
1210	pmap_switch(oldpmap);
1211	PMAP_UNLOCK(pmap);
1212	return (m);
1213}
1214
1215/***************************************************
1216 * Low level mapping routines.....
1217 ***************************************************/
1218
1219/*
1220 * Find the kernel lpte for mapping the given virtual address, which
1221 * must be in the part of region 5 which we can cover with our kernel
1222 * 'page tables'.
1223 */
1224static struct ia64_lpte *
1225pmap_find_kpte(vm_offset_t va)
1226{
1227	struct ia64_lpte **dir1;
1228	struct ia64_lpte *leaf;
1229
1230	KASSERT((va >> 61) == 5,
1231		("kernel mapping 0x%lx not in region 5", va));
1232	KASSERT(va < kernel_vm_end,
1233		("kernel mapping 0x%lx out of range", va));
1234
1235	dir1 = ia64_kptdir[KPTE_DIR0_INDEX(va)];
1236	leaf = dir1[KPTE_DIR1_INDEX(va)];
1237	return (&leaf[KPTE_PTE_INDEX(va)]);
1238}
1239
1240/*
1241 * Find a pte suitable for mapping a user-space address. If one exists
1242 * in the VHPT, that one will be returned, otherwise a new pte is
1243 * allocated.
1244 */
1245static struct ia64_lpte *
1246pmap_find_pte(vm_offset_t va)
1247{
1248	struct ia64_lpte *pte;
1249
1250	if (va >= VM_MAXUSER_ADDRESS)
1251		return pmap_find_kpte(va);
1252
1253	pte = pmap_find_vhpt(va);
1254	if (pte == NULL) {
1255		pte = uma_zalloc(ptezone, M_NOWAIT | M_ZERO);
1256		pte->tag = 1UL << 63;
1257	}
1258	return (pte);
1259}
1260
1261/*
1262 * Free a pte which is now unused. This simply returns it to the zone
1263 * allocator if it is a user mapping. For kernel mappings, clear the
1264 * valid bit to make it clear that the mapping is not currently used.
1265 */
1266static void
1267pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1268{
1269	if (va < VM_MAXUSER_ADDRESS)
1270		uma_zfree(ptezone, pte);
1271	else
1272		pmap_clear_present(pte);
1273}
1274
1275static PMAP_INLINE void
1276pmap_pte_prot(pmap_t pm, struct ia64_lpte *pte, vm_prot_t prot)
1277{
1278	static long prot2ar[4] = {
1279		PTE_AR_R,		/* VM_PROT_NONE */
1280		PTE_AR_RW,		/* VM_PROT_WRITE */
1281		PTE_AR_RX|PTE_ED,	/* VM_PROT_EXECUTE */
1282		PTE_AR_RWX|PTE_ED	/* VM_PROT_WRITE|VM_PROT_EXECUTE */
1283	};
1284
1285	pte->pte &= ~(PTE_PROT_MASK | PTE_PL_MASK | PTE_AR_MASK | PTE_ED);
1286	pte->pte |= (uint64_t)(prot & VM_PROT_ALL) << 56;
1287	pte->pte |= (prot == VM_PROT_NONE || pm == kernel_pmap)
1288	    ? PTE_PL_KERN : PTE_PL_USER;
1289	pte->pte |= prot2ar[(prot & VM_PROT_ALL) >> 1];
1290}
1291
1292static PMAP_INLINE void
1293pmap_pte_attr(struct ia64_lpte *pte, vm_memattr_t ma)
1294{
1295
1296	pte->pte &= ~PTE_MA_MASK;
1297	pte->pte |= (ma & PTE_MA_MASK);
1298}
1299
1300/*
1301 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1302 * the pte was orginally valid, then its assumed to already be in the
1303 * VHPT.
1304 * This functions does not set the protection bits.  It's expected
1305 * that those have been set correctly prior to calling this function.
1306 */
1307static void
1308pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1309    boolean_t wired, boolean_t managed)
1310{
1311
1312	pte->pte &= PTE_PROT_MASK | PTE_MA_MASK | PTE_PL_MASK |
1313	    PTE_AR_MASK | PTE_ED;
1314	pte->pte |= PTE_PRESENT;
1315	pte->pte |= (managed) ? PTE_MANAGED : (PTE_DIRTY | PTE_ACCESSED);
1316	pte->pte |= (wired) ? PTE_WIRED : 0;
1317	pte->pte |= pa & PTE_PPN_MASK;
1318
1319	pte->itir = PAGE_SHIFT << 2;
1320
1321	pte->tag = ia64_ttag(va);
1322}
1323
1324/*
1325 * Remove the (possibly managed) mapping represented by pte from the
1326 * given pmap.
1327 */
1328static int
1329pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1330		pv_entry_t pv, int freepte)
1331{
1332	int error;
1333	vm_page_t m;
1334
1335	/*
1336	 * First remove from the VHPT.
1337	 */
1338	error = pmap_remove_vhpt(va);
1339	if (error)
1340		return (error);
1341
1342	pmap_invalidate_page(va);
1343
1344	if (pmap_wired(pte))
1345		pmap->pm_stats.wired_count -= 1;
1346
1347	pmap->pm_stats.resident_count -= 1;
1348	if (pmap_managed(pte)) {
1349		m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
1350		if (pmap_dirty(pte))
1351			vm_page_dirty(m);
1352		if (pmap_accessed(pte))
1353			vm_page_aflag_set(m, PGA_REFERENCED);
1354
1355		error = pmap_remove_entry(pmap, m, va, pv);
1356	}
1357	if (freepte)
1358		pmap_free_pte(pte, va);
1359
1360	return (error);
1361}
1362
1363/*
1364 * Extract the physical page address associated with a kernel
1365 * virtual address.
1366 */
1367vm_paddr_t
1368pmap_kextract(vm_offset_t va)
1369{
1370	struct ia64_lpte *pte;
1371	uint64_t *pbvm_pgtbl;
1372	vm_paddr_t pa;
1373	u_int idx;
1374
1375	KASSERT(va >= VM_MAXUSER_ADDRESS, ("Must be kernel VA"));
1376
1377	/* Regions 6 and 7 are direct mapped. */
1378	if (va >= IA64_RR_BASE(6)) {
1379		pa = IA64_RR_MASK(va);
1380		goto out;
1381	}
1382
1383	/* Region 5 is our KVA. Bail out if the VA is beyond our limits. */
1384	if (va >= kernel_vm_end)
1385		goto err_out;
1386	if (va >= VM_INIT_KERNEL_ADDRESS) {
1387		pte = pmap_find_kpte(va);
1388		pa = pmap_present(pte) ? pmap_ppn(pte) | (va & PAGE_MASK) : 0;
1389		goto out;
1390	}
1391
1392	/* The PBVM page table. */
1393	if (va >= IA64_PBVM_PGTBL + bootinfo->bi_pbvm_pgtblsz)
1394		goto err_out;
1395	if (va >= IA64_PBVM_PGTBL) {
1396		pa = (va - IA64_PBVM_PGTBL) + bootinfo->bi_pbvm_pgtbl;
1397		goto out;
1398	}
1399
1400	/* The PBVM itself. */
1401	if (va >= IA64_PBVM_BASE) {
1402		pbvm_pgtbl = (void *)IA64_PBVM_PGTBL;
1403		idx = (va - IA64_PBVM_BASE) >> IA64_PBVM_PAGE_SHIFT;
1404		if (idx >= (bootinfo->bi_pbvm_pgtblsz >> 3))
1405			goto err_out;
1406		if ((pbvm_pgtbl[idx] & PTE_PRESENT) == 0)
1407			goto err_out;
1408		pa = (pbvm_pgtbl[idx] & PTE_PPN_MASK) +
1409		    (va & IA64_PBVM_PAGE_MASK);
1410		goto out;
1411	}
1412
1413 err_out:
1414	printf("XXX: %s: va=%#lx is invalid\n", __func__, va);
1415	pa = 0;
1416	/* FALLTHROUGH */
1417
1418 out:
1419	return (pa);
1420}
1421
1422/*
1423 * Add a list of wired pages to the kva this routine is only used for
1424 * temporary kernel mappings that do not need to have page modification
1425 * or references recorded.  Note that old mappings are simply written
1426 * over.  The page is effectively wired, but it's customary to not have
1427 * the PTE reflect that, nor update statistics.
1428 */
1429void
1430pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1431{
1432	struct ia64_lpte *pte;
1433	int i;
1434
1435	for (i = 0; i < count; i++) {
1436		pte = pmap_find_kpte(va);
1437		if (pmap_present(pte))
1438			pmap_invalidate_page(va);
1439		else
1440			pmap_enter_vhpt(pte, va);
1441		pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1442		pmap_pte_attr(pte, m[i]->md.memattr);
1443		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m[i]), FALSE, FALSE);
1444		va += PAGE_SIZE;
1445	}
1446}
1447
1448/*
1449 * this routine jerks page mappings from the
1450 * kernel -- it is meant only for temporary mappings.
1451 */
1452void
1453pmap_qremove(vm_offset_t va, int count)
1454{
1455	struct ia64_lpte *pte;
1456	int i;
1457
1458	for (i = 0; i < count; i++) {
1459		pte = pmap_find_kpte(va);
1460		if (pmap_present(pte)) {
1461			pmap_remove_vhpt(va);
1462			pmap_invalidate_page(va);
1463			pmap_clear_present(pte);
1464		}
1465		va += PAGE_SIZE;
1466	}
1467}
1468
1469/*
1470 * Add a wired page to the kva.  As for pmap_qenter(), it's customary
1471 * to not have the PTE reflect that, nor update statistics.
1472 */
1473void
1474pmap_kenter(vm_offset_t va, vm_offset_t pa)
1475{
1476	struct ia64_lpte *pte;
1477
1478	pte = pmap_find_kpte(va);
1479	if (pmap_present(pte))
1480		pmap_invalidate_page(va);
1481	else
1482		pmap_enter_vhpt(pte, va);
1483	pmap_pte_prot(kernel_pmap, pte, VM_PROT_ALL);
1484	pmap_pte_attr(pte, VM_MEMATTR_DEFAULT);
1485	pmap_set_pte(pte, va, pa, FALSE, FALSE);
1486}
1487
1488/*
1489 * Remove a page from the kva
1490 */
1491void
1492pmap_kremove(vm_offset_t va)
1493{
1494	struct ia64_lpte *pte;
1495
1496	pte = pmap_find_kpte(va);
1497	if (pmap_present(pte)) {
1498		pmap_remove_vhpt(va);
1499		pmap_invalidate_page(va);
1500		pmap_clear_present(pte);
1501	}
1502}
1503
1504/*
1505 *	Used to map a range of physical addresses into kernel
1506 *	virtual address space.
1507 *
1508 *	The value passed in '*virt' is a suggested virtual address for
1509 *	the mapping. Architectures which can support a direct-mapped
1510 *	physical to virtual region can return the appropriate address
1511 *	within that region, leaving '*virt' unchanged. Other
1512 *	architectures should map the pages starting at '*virt' and
1513 *	update '*virt' with the first usable address after the mapped
1514 *	region.
1515 */
1516vm_offset_t
1517pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1518{
1519	return IA64_PHYS_TO_RR7(start);
1520}
1521
1522/*
1523 *	Remove the given range of addresses from the specified map.
1524 *
1525 *	It is assumed that the start and end are properly
1526 *	rounded to the page size.
1527 *
1528 *	Sparsely used ranges are inefficiently removed.  The VHPT is
1529 *	probed for every page within the range.  XXX
1530 */
1531void
1532pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1533{
1534	pmap_t oldpmap;
1535	vm_offset_t va;
1536	struct ia64_lpte *pte;
1537
1538	/*
1539	 * Perform an unsynchronized read.  This is, however, safe.
1540	 */
1541	if (pmap->pm_stats.resident_count == 0)
1542		return;
1543
1544	rw_wlock(&pvh_global_lock);
1545	PMAP_LOCK(pmap);
1546	oldpmap = pmap_switch(pmap);
1547	for (va = sva; va < eva; va += PAGE_SIZE) {
1548		pte = pmap_find_vhpt(va);
1549		if (pte != NULL)
1550			pmap_remove_pte(pmap, pte, va, 0, 1);
1551	}
1552	rw_wunlock(&pvh_global_lock);
1553	pmap_switch(oldpmap);
1554	PMAP_UNLOCK(pmap);
1555}
1556
1557/*
1558 *	Routine:	pmap_remove_all
1559 *	Function:
1560 *		Removes this physical page from
1561 *		all physical maps in which it resides.
1562 *		Reflects back modify bits to the pager.
1563 *
1564 *	Notes:
1565 *		Original versions of this routine were very
1566 *		inefficient because they iteratively called
1567 *		pmap_remove (slow...)
1568 */
1569
1570void
1571pmap_remove_all(vm_page_t m)
1572{
1573	pmap_t oldpmap;
1574	pv_entry_t pv;
1575
1576	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1577	    ("pmap_remove_all: page %p is not managed", m));
1578	rw_wlock(&pvh_global_lock);
1579	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1580		struct ia64_lpte *pte;
1581		pmap_t pmap = PV_PMAP(pv);
1582		vm_offset_t va = pv->pv_va;
1583
1584		PMAP_LOCK(pmap);
1585		oldpmap = pmap_switch(pmap);
1586		pte = pmap_find_vhpt(va);
1587		KASSERT(pte != NULL, ("pte"));
1588		if (pmap_ppn(pte) != VM_PAGE_TO_PHYS(m))
1589			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1590		pmap_remove_pte(pmap, pte, va, pv, 1);
1591		pmap_switch(oldpmap);
1592		PMAP_UNLOCK(pmap);
1593	}
1594	vm_page_aflag_clear(m, PGA_WRITEABLE);
1595	rw_wunlock(&pvh_global_lock);
1596}
1597
1598/*
1599 *	Set the physical protection on the
1600 *	specified range of this map as requested.
1601 */
1602void
1603pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1604{
1605	pmap_t oldpmap;
1606	struct ia64_lpte *pte;
1607
1608	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1609		pmap_remove(pmap, sva, eva);
1610		return;
1611	}
1612
1613	if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
1614	    (VM_PROT_WRITE|VM_PROT_EXECUTE))
1615		return;
1616
1617	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1618		panic("pmap_protect: unaligned addresses");
1619
1620	PMAP_LOCK(pmap);
1621	oldpmap = pmap_switch(pmap);
1622	for ( ; sva < eva; sva += PAGE_SIZE) {
1623		/* If page is invalid, skip this page */
1624		pte = pmap_find_vhpt(sva);
1625		if (pte == NULL)
1626			continue;
1627
1628		/* If there's no change, skip it too */
1629		if (pmap_prot(pte) == prot)
1630			continue;
1631
1632		if ((prot & VM_PROT_WRITE) == 0 &&
1633		    pmap_managed(pte) && pmap_dirty(pte)) {
1634			vm_paddr_t pa = pmap_ppn(pte);
1635			vm_page_t m = PHYS_TO_VM_PAGE(pa);
1636
1637			vm_page_dirty(m);
1638			pmap_clear_dirty(pte);
1639		}
1640
1641		if (prot & VM_PROT_EXECUTE)
1642			ia64_sync_icache(sva, PAGE_SIZE);
1643
1644		pmap_pte_prot(pmap, pte, prot);
1645		pmap_invalidate_page(sva);
1646	}
1647	pmap_switch(oldpmap);
1648	PMAP_UNLOCK(pmap);
1649}
1650
1651/*
1652 *	Insert the given physical page (p) at
1653 *	the specified virtual address (v) in the
1654 *	target physical map with the protection requested.
1655 *
1656 *	If specified, the page will be wired down, meaning
1657 *	that the related pte can not be reclaimed.
1658 *
1659 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1660 *	or lose information.  That is, this routine must actually
1661 *	insert this page into the given map NOW.
1662 */
1663void
1664pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
1665    vm_prot_t prot, boolean_t wired)
1666{
1667	pmap_t oldpmap;
1668	vm_offset_t pa;
1669	vm_offset_t opa;
1670	struct ia64_lpte origpte;
1671	struct ia64_lpte *pte;
1672	boolean_t icache_inval, managed;
1673
1674	rw_wlock(&pvh_global_lock);
1675	PMAP_LOCK(pmap);
1676	oldpmap = pmap_switch(pmap);
1677
1678	va &= ~PAGE_MASK;
1679 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
1680	KASSERT((m->oflags & VPO_UNMANAGED) != 0 || vm_page_xbusied(m),
1681	    ("pmap_enter: page %p is not busy", m));
1682
1683	/*
1684	 * Find (or create) a pte for the given mapping.
1685	 */
1686	while ((pte = pmap_find_pte(va)) == NULL) {
1687		pmap_switch(oldpmap);
1688		PMAP_UNLOCK(pmap);
1689		rw_wunlock(&pvh_global_lock);
1690		VM_WAIT;
1691		rw_wlock(&pvh_global_lock);
1692		PMAP_LOCK(pmap);
1693		oldpmap = pmap_switch(pmap);
1694	}
1695	origpte = *pte;
1696	if (!pmap_present(pte)) {
1697		opa = ~0UL;
1698		pmap_enter_vhpt(pte, va);
1699	} else
1700		opa = pmap_ppn(pte);
1701	managed = FALSE;
1702	pa = VM_PAGE_TO_PHYS(m);
1703
1704	icache_inval = (prot & VM_PROT_EXECUTE) ? TRUE : FALSE;
1705
1706	/*
1707	 * Mapping has not changed, must be protection or wiring change.
1708	 */
1709	if (opa == pa) {
1710		/*
1711		 * Wiring change, just update stats. We don't worry about
1712		 * wiring PT pages as they remain resident as long as there
1713		 * are valid mappings in them. Hence, if a user page is wired,
1714		 * the PT page will be also.
1715		 */
1716		if (wired && !pmap_wired(&origpte))
1717			pmap->pm_stats.wired_count++;
1718		else if (!wired && pmap_wired(&origpte))
1719			pmap->pm_stats.wired_count--;
1720
1721		managed = (pmap_managed(&origpte)) ? TRUE : FALSE;
1722
1723		/*
1724		 * We might be turning off write access to the page,
1725		 * so we go ahead and sense modify status. Otherwise,
1726		 * we can avoid I-cache invalidation if the page
1727		 * already allowed execution.
1728		 */
1729		if (managed && pmap_dirty(&origpte))
1730			vm_page_dirty(m);
1731		else if (pmap_exec(&origpte))
1732			icache_inval = FALSE;
1733
1734		pmap_invalidate_page(va);
1735		goto validate;
1736	}
1737
1738	/*
1739	 * Mapping has changed, invalidate old range and fall
1740	 * through to handle validating new mapping.
1741	 */
1742	if (opa != ~0UL) {
1743		pmap_remove_pte(pmap, pte, va, 0, 0);
1744		pmap_enter_vhpt(pte, va);
1745	}
1746
1747	/*
1748	 * Enter on the PV list if part of our managed memory.
1749	 */
1750	if ((m->oflags & VPO_UNMANAGED) == 0) {
1751		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1752		    ("pmap_enter: managed mapping within the clean submap"));
1753		pmap_insert_entry(pmap, va, m);
1754		managed = TRUE;
1755	}
1756
1757	/*
1758	 * Increment counters
1759	 */
1760	pmap->pm_stats.resident_count++;
1761	if (wired)
1762		pmap->pm_stats.wired_count++;
1763
1764validate:
1765
1766	/*
1767	 * Now validate mapping with desired protection/wiring. This
1768	 * adds the pte to the VHPT if necessary.
1769	 */
1770	pmap_pte_prot(pmap, pte, prot);
1771	pmap_pte_attr(pte, m->md.memattr);
1772	pmap_set_pte(pte, va, pa, wired, managed);
1773
1774	/* Invalidate the I-cache when needed. */
1775	if (icache_inval)
1776		ia64_sync_icache(va, PAGE_SIZE);
1777
1778	if ((prot & VM_PROT_WRITE) != 0 && managed)
1779		vm_page_aflag_set(m, PGA_WRITEABLE);
1780	rw_wunlock(&pvh_global_lock);
1781	pmap_switch(oldpmap);
1782	PMAP_UNLOCK(pmap);
1783}
1784
1785/*
1786 * Maps a sequence of resident pages belonging to the same object.
1787 * The sequence begins with the given page m_start.  This page is
1788 * mapped at the given virtual address start.  Each subsequent page is
1789 * mapped at a virtual address that is offset from start by the same
1790 * amount as the page is offset from m_start within the object.  The
1791 * last page in the sequence is the page with the largest offset from
1792 * m_start that can be mapped at a virtual address less than the given
1793 * virtual address end.  Not every virtual page between start and end
1794 * is mapped; only those for which a resident page exists with the
1795 * corresponding offset from m_start are mapped.
1796 */
1797void
1798pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
1799    vm_page_t m_start, vm_prot_t prot)
1800{
1801	pmap_t oldpmap;
1802	vm_page_t m;
1803	vm_pindex_t diff, psize;
1804
1805	VM_OBJECT_ASSERT_LOCKED(m_start->object);
1806
1807	psize = atop(end - start);
1808	m = m_start;
1809	rw_wlock(&pvh_global_lock);
1810	PMAP_LOCK(pmap);
1811	oldpmap = pmap_switch(pmap);
1812	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1813		pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
1814		m = TAILQ_NEXT(m, listq);
1815	}
1816	rw_wunlock(&pvh_global_lock);
1817	pmap_switch(oldpmap);
1818 	PMAP_UNLOCK(pmap);
1819}
1820
1821/*
1822 * this code makes some *MAJOR* assumptions:
1823 * 1. Current pmap & pmap exists.
1824 * 2. Not wired.
1825 * 3. Read access.
1826 * 4. No page table pages.
1827 * but is *MUCH* faster than pmap_enter...
1828 */
1829
1830void
1831pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
1832{
1833	pmap_t oldpmap;
1834
1835	rw_wlock(&pvh_global_lock);
1836	PMAP_LOCK(pmap);
1837	oldpmap = pmap_switch(pmap);
1838	pmap_enter_quick_locked(pmap, va, m, prot);
1839	rw_wunlock(&pvh_global_lock);
1840	pmap_switch(oldpmap);
1841	PMAP_UNLOCK(pmap);
1842}
1843
1844static void
1845pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
1846    vm_prot_t prot)
1847{
1848	struct ia64_lpte *pte;
1849	boolean_t managed;
1850
1851	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
1852	    (m->oflags & VPO_UNMANAGED) != 0,
1853	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
1854	rw_assert(&pvh_global_lock, RA_WLOCKED);
1855	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1856
1857	if ((pte = pmap_find_pte(va)) == NULL)
1858		return;
1859
1860	if (!pmap_present(pte)) {
1861		/* Enter on the PV list if the page is managed. */
1862		if ((m->oflags & VPO_UNMANAGED) == 0) {
1863			if (!pmap_try_insert_pv_entry(pmap, va, m)) {
1864				pmap_free_pte(pte, va);
1865				return;
1866			}
1867			managed = TRUE;
1868		} else
1869			managed = FALSE;
1870
1871		/* Increment counters. */
1872		pmap->pm_stats.resident_count++;
1873
1874		/* Initialise with R/O protection and enter into VHPT. */
1875		pmap_enter_vhpt(pte, va);
1876		pmap_pte_prot(pmap, pte,
1877		    prot & (VM_PROT_READ | VM_PROT_EXECUTE));
1878		pmap_pte_attr(pte, m->md.memattr);
1879		pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m), FALSE, managed);
1880
1881		if (prot & VM_PROT_EXECUTE)
1882			ia64_sync_icache(va, PAGE_SIZE);
1883	}
1884}
1885
1886/*
1887 * pmap_object_init_pt preloads the ptes for a given object
1888 * into the specified pmap.  This eliminates the blast of soft
1889 * faults on process startup and immediately after an mmap.
1890 */
1891void
1892pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1893		    vm_object_t object, vm_pindex_t pindex,
1894		    vm_size_t size)
1895{
1896
1897	VM_OBJECT_ASSERT_WLOCKED(object);
1898	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
1899	    ("pmap_object_init_pt: non-device object"));
1900}
1901
1902/*
1903 *	Routine:	pmap_change_wiring
1904 *	Function:	Change the wiring attribute for a map/virtual-address
1905 *			pair.
1906 *	In/out conditions:
1907 *			The mapping must already exist in the pmap.
1908 */
1909void
1910pmap_change_wiring(pmap, va, wired)
1911	register pmap_t pmap;
1912	vm_offset_t va;
1913	boolean_t wired;
1914{
1915	pmap_t oldpmap;
1916	struct ia64_lpte *pte;
1917
1918	PMAP_LOCK(pmap);
1919	oldpmap = pmap_switch(pmap);
1920
1921	pte = pmap_find_vhpt(va);
1922	KASSERT(pte != NULL, ("pte"));
1923	if (wired && !pmap_wired(pte)) {
1924		pmap->pm_stats.wired_count++;
1925		pmap_set_wired(pte);
1926	} else if (!wired && pmap_wired(pte)) {
1927		pmap->pm_stats.wired_count--;
1928		pmap_clear_wired(pte);
1929	}
1930
1931	pmap_switch(oldpmap);
1932	PMAP_UNLOCK(pmap);
1933}
1934
1935
1936
1937/*
1938 *	Copy the range specified by src_addr/len
1939 *	from the source map to the range dst_addr/len
1940 *	in the destination map.
1941 *
1942 *	This routine is only advisory and need not do anything.
1943 */
1944
1945void
1946pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
1947	  vm_offset_t src_addr)
1948{
1949}
1950
1951
1952/*
1953 *	pmap_zero_page zeros the specified hardware page by
1954 *	mapping it into virtual memory and using bzero to clear
1955 *	its contents.
1956 */
1957
1958void
1959pmap_zero_page(vm_page_t m)
1960{
1961	void *p;
1962
1963	p = (void *)pmap_page_to_va(m);
1964	bzero(p, PAGE_SIZE);
1965}
1966
1967
1968/*
1969 *	pmap_zero_page_area zeros the specified hardware page by
1970 *	mapping it into virtual memory and using bzero to clear
1971 *	its contents.
1972 *
1973 *	off and size must reside within a single page.
1974 */
1975
1976void
1977pmap_zero_page_area(vm_page_t m, int off, int size)
1978{
1979	char *p;
1980
1981	p = (void *)pmap_page_to_va(m);
1982	bzero(p + off, size);
1983}
1984
1985
1986/*
1987 *	pmap_zero_page_idle zeros the specified hardware page by
1988 *	mapping it into virtual memory and using bzero to clear
1989 *	its contents.  This is for the vm_idlezero process.
1990 */
1991
1992void
1993pmap_zero_page_idle(vm_page_t m)
1994{
1995	void *p;
1996
1997	p = (void *)pmap_page_to_va(m);
1998	bzero(p, PAGE_SIZE);
1999}
2000
2001
2002/*
2003 *	pmap_copy_page copies the specified (machine independent)
2004 *	page by mapping the page into virtual memory and using
2005 *	bcopy to copy the page, one machine dependent page at a
2006 *	time.
2007 */
2008void
2009pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2010{
2011	void *dst, *src;
2012
2013	src = (void *)pmap_page_to_va(msrc);
2014	dst = (void *)pmap_page_to_va(mdst);
2015	bcopy(src, dst, PAGE_SIZE);
2016}
2017
2018int unmapped_buf_allowed;
2019
2020void
2021pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
2022    vm_offset_t b_offset, int xfersize)
2023{
2024	void *a_cp, *b_cp;
2025	vm_offset_t a_pg_offset, b_pg_offset;
2026	int cnt;
2027
2028	while (xfersize > 0) {
2029		a_pg_offset = a_offset & PAGE_MASK;
2030		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
2031		a_cp = (char *)pmap_page_to_va(ma[a_offset >> PAGE_SHIFT]) +
2032		    a_pg_offset;
2033		b_pg_offset = b_offset & PAGE_MASK;
2034		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
2035		b_cp = (char *)pmap_page_to_va(mb[b_offset >> PAGE_SHIFT]) +
2036		    b_pg_offset;
2037		bcopy(a_cp, b_cp, cnt);
2038		a_offset += cnt;
2039		b_offset += cnt;
2040		xfersize -= cnt;
2041	}
2042}
2043
2044/*
2045 * Returns true if the pmap's pv is one of the first
2046 * 16 pvs linked to from this page.  This count may
2047 * be changed upwards or downwards in the future; it
2048 * is only necessary that true be returned for a small
2049 * subset of pmaps for proper page aging.
2050 */
2051boolean_t
2052pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2053{
2054	pv_entry_t pv;
2055	int loops = 0;
2056	boolean_t rv;
2057
2058	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2059	    ("pmap_page_exists_quick: page %p is not managed", m));
2060	rv = FALSE;
2061	rw_wlock(&pvh_global_lock);
2062	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2063		if (PV_PMAP(pv) == pmap) {
2064			rv = TRUE;
2065			break;
2066		}
2067		loops++;
2068		if (loops >= 16)
2069			break;
2070	}
2071	rw_wunlock(&pvh_global_lock);
2072	return (rv);
2073}
2074
2075/*
2076 *	pmap_page_wired_mappings:
2077 *
2078 *	Return the number of managed mappings to the given physical page
2079 *	that are wired.
2080 */
2081int
2082pmap_page_wired_mappings(vm_page_t m)
2083{
2084	struct ia64_lpte *pte;
2085	pmap_t oldpmap, pmap;
2086	pv_entry_t pv;
2087	int count;
2088
2089	count = 0;
2090	if ((m->oflags & VPO_UNMANAGED) != 0)
2091		return (count);
2092	rw_wlock(&pvh_global_lock);
2093	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2094		pmap = PV_PMAP(pv);
2095		PMAP_LOCK(pmap);
2096		oldpmap = pmap_switch(pmap);
2097		pte = pmap_find_vhpt(pv->pv_va);
2098		KASSERT(pte != NULL, ("pte"));
2099		if (pmap_wired(pte))
2100			count++;
2101		pmap_switch(oldpmap);
2102		PMAP_UNLOCK(pmap);
2103	}
2104	rw_wunlock(&pvh_global_lock);
2105	return (count);
2106}
2107
2108/*
2109 * Remove all pages from specified address space
2110 * this aids process exit speeds.  Also, this code
2111 * is special cased for current process only, but
2112 * can have the more generic (and slightly slower)
2113 * mode enabled.  This is much faster than pmap_remove
2114 * in the case of running down an entire address space.
2115 */
2116void
2117pmap_remove_pages(pmap_t pmap)
2118{
2119	struct pv_chunk *pc, *npc;
2120	struct ia64_lpte *pte;
2121	pv_entry_t pv;
2122	vm_offset_t va;
2123	vm_page_t m;
2124	u_long inuse, bitmask;
2125	int allfree, bit, field, idx;
2126
2127	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
2128		printf("warning: %s called with non-current pmap\n",
2129		    __func__);
2130		return;
2131	}
2132	rw_wlock(&pvh_global_lock);
2133	PMAP_LOCK(pmap);
2134	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
2135		allfree = 1;
2136		for (field = 0; field < _NPCM; field++) {
2137			inuse = ~pc->pc_map[field] & pc_freemask[field];
2138			while (inuse != 0) {
2139				bit = ffsl(inuse) - 1;
2140				bitmask = 1UL << bit;
2141				idx = field * sizeof(inuse) * NBBY + bit;
2142				pv = &pc->pc_pventry[idx];
2143				inuse &= ~bitmask;
2144				va = pv->pv_va;
2145				pte = pmap_find_vhpt(va);
2146				KASSERT(pte != NULL, ("pte"));
2147				if (pmap_wired(pte)) {
2148					allfree = 0;
2149					continue;
2150				}
2151				pmap_remove_vhpt(va);
2152				pmap_invalidate_page(va);
2153				m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
2154				if (pmap_dirty(pte))
2155					vm_page_dirty(m);
2156				pmap_free_pte(pte, va);
2157				/* Mark free */
2158				PV_STAT(pv_entry_frees++);
2159				PV_STAT(pv_entry_spare++);
2160				pv_entry_count--;
2161				pc->pc_map[field] |= bitmask;
2162				pmap->pm_stats.resident_count--;
2163				TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2164				if (TAILQ_EMPTY(&m->md.pv_list))
2165					vm_page_aflag_clear(m, PGA_WRITEABLE);
2166			}
2167		}
2168		if (allfree) {
2169			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2170			free_pv_chunk(pc);
2171		}
2172	}
2173	rw_wunlock(&pvh_global_lock);
2174	PMAP_UNLOCK(pmap);
2175}
2176
2177/*
2178 *	pmap_ts_referenced:
2179 *
2180 *	Return a count of reference bits for a page, clearing those bits.
2181 *	It is not necessary for every reference bit to be cleared, but it
2182 *	is necessary that 0 only be returned when there are truly no
2183 *	reference bits set.
2184 *
2185 *	XXX: The exact number of bits to check and clear is a matter that
2186 *	should be tested and standardized at some point in the future for
2187 *	optimal aging of shared pages.
2188 */
2189int
2190pmap_ts_referenced(vm_page_t m)
2191{
2192	struct ia64_lpte *pte;
2193	pmap_t oldpmap, pmap;
2194	pv_entry_t pv;
2195	int count = 0;
2196
2197	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2198	    ("pmap_ts_referenced: page %p is not managed", m));
2199	rw_wlock(&pvh_global_lock);
2200	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2201		pmap = PV_PMAP(pv);
2202		PMAP_LOCK(pmap);
2203		oldpmap = pmap_switch(pmap);
2204		pte = pmap_find_vhpt(pv->pv_va);
2205		KASSERT(pte != NULL, ("pte"));
2206		if (pmap_accessed(pte)) {
2207			count++;
2208			pmap_clear_accessed(pte);
2209			pmap_invalidate_page(pv->pv_va);
2210		}
2211		pmap_switch(oldpmap);
2212		PMAP_UNLOCK(pmap);
2213	}
2214	rw_wunlock(&pvh_global_lock);
2215	return (count);
2216}
2217
2218/*
2219 *	pmap_is_modified:
2220 *
2221 *	Return whether or not the specified physical page was modified
2222 *	in any physical maps.
2223 */
2224boolean_t
2225pmap_is_modified(vm_page_t m)
2226{
2227	struct ia64_lpte *pte;
2228	pmap_t oldpmap, pmap;
2229	pv_entry_t pv;
2230	boolean_t rv;
2231
2232	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2233	    ("pmap_is_modified: page %p is not managed", m));
2234	rv = FALSE;
2235
2236	/*
2237	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2238	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2239	 * is clear, no PTEs can be dirty.
2240	 */
2241	VM_OBJECT_ASSERT_WLOCKED(m->object);
2242	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2243		return (rv);
2244	rw_wlock(&pvh_global_lock);
2245	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2246		pmap = PV_PMAP(pv);
2247		PMAP_LOCK(pmap);
2248		oldpmap = pmap_switch(pmap);
2249		pte = pmap_find_vhpt(pv->pv_va);
2250		pmap_switch(oldpmap);
2251		KASSERT(pte != NULL, ("pte"));
2252		rv = pmap_dirty(pte) ? TRUE : FALSE;
2253		PMAP_UNLOCK(pmap);
2254		if (rv)
2255			break;
2256	}
2257	rw_wunlock(&pvh_global_lock);
2258	return (rv);
2259}
2260
2261/*
2262 *	pmap_is_prefaultable:
2263 *
2264 *	Return whether or not the specified virtual address is elgible
2265 *	for prefault.
2266 */
2267boolean_t
2268pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2269{
2270	struct ia64_lpte *pte;
2271
2272	pte = pmap_find_vhpt(addr);
2273	if (pte != NULL && pmap_present(pte))
2274		return (FALSE);
2275	return (TRUE);
2276}
2277
2278/*
2279 *	pmap_is_referenced:
2280 *
2281 *	Return whether or not the specified physical page was referenced
2282 *	in any physical maps.
2283 */
2284boolean_t
2285pmap_is_referenced(vm_page_t m)
2286{
2287	struct ia64_lpte *pte;
2288	pmap_t oldpmap, pmap;
2289	pv_entry_t pv;
2290	boolean_t rv;
2291
2292	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2293	    ("pmap_is_referenced: page %p is not managed", m));
2294	rv = FALSE;
2295	rw_wlock(&pvh_global_lock);
2296	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2297		pmap = PV_PMAP(pv);
2298		PMAP_LOCK(pmap);
2299		oldpmap = pmap_switch(pmap);
2300		pte = pmap_find_vhpt(pv->pv_va);
2301		pmap_switch(oldpmap);
2302		KASSERT(pte != NULL, ("pte"));
2303		rv = pmap_accessed(pte) ? TRUE : FALSE;
2304		PMAP_UNLOCK(pmap);
2305		if (rv)
2306			break;
2307	}
2308	rw_wunlock(&pvh_global_lock);
2309	return (rv);
2310}
2311
2312/*
2313 *	Apply the given advice to the specified range of addresses within the
2314 *	given pmap.  Depending on the advice, clear the referenced and/or
2315 *	modified flags in each mapping and set the mapped page's dirty field.
2316 */
2317void
2318pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
2319{
2320	struct ia64_lpte *pte;
2321	pmap_t oldpmap;
2322	vm_page_t m;
2323
2324	PMAP_LOCK(pmap);
2325	oldpmap = pmap_switch(pmap);
2326	for (; sva < eva; sva += PAGE_SIZE) {
2327		/* If page is invalid, skip this page. */
2328		pte = pmap_find_vhpt(sva);
2329		if (pte == NULL)
2330			continue;
2331
2332		/* If it isn't managed, skip it too. */
2333		if (!pmap_managed(pte))
2334			continue;
2335
2336		/* Clear its modified and referenced bits. */
2337		if (pmap_dirty(pte)) {
2338			if (advice == MADV_DONTNEED) {
2339				/*
2340				 * Future calls to pmap_is_modified() can be
2341				 * avoided by making the page dirty now.
2342				 */
2343				m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
2344				vm_page_dirty(m);
2345			}
2346			pmap_clear_dirty(pte);
2347		} else if (!pmap_accessed(pte))
2348			continue;
2349		pmap_clear_accessed(pte);
2350		pmap_invalidate_page(sva);
2351	}
2352	pmap_switch(oldpmap);
2353	PMAP_UNLOCK(pmap);
2354}
2355
2356/*
2357 *	Clear the modify bits on the specified physical page.
2358 */
2359void
2360pmap_clear_modify(vm_page_t m)
2361{
2362	struct ia64_lpte *pte;
2363	pmap_t oldpmap, pmap;
2364	pv_entry_t pv;
2365
2366	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2367	    ("pmap_clear_modify: page %p is not managed", m));
2368	VM_OBJECT_ASSERT_WLOCKED(m->object);
2369	KASSERT(!vm_page_xbusied(m),
2370	    ("pmap_clear_modify: page %p is exclusive busied", m));
2371
2372	/*
2373	 * If the page is not PGA_WRITEABLE, then no PTEs can be modified.
2374	 * If the object containing the page is locked and the page is not
2375	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
2376	 */
2377	if ((m->aflags & PGA_WRITEABLE) == 0)
2378		return;
2379	rw_wlock(&pvh_global_lock);
2380	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2381		pmap = PV_PMAP(pv);
2382		PMAP_LOCK(pmap);
2383		oldpmap = pmap_switch(pmap);
2384		pte = pmap_find_vhpt(pv->pv_va);
2385		KASSERT(pte != NULL, ("pte"));
2386		if (pmap_dirty(pte)) {
2387			pmap_clear_dirty(pte);
2388			pmap_invalidate_page(pv->pv_va);
2389		}
2390		pmap_switch(oldpmap);
2391		PMAP_UNLOCK(pmap);
2392	}
2393	rw_wunlock(&pvh_global_lock);
2394}
2395
2396/*
2397 * Clear the write and modified bits in each of the given page's mappings.
2398 */
2399void
2400pmap_remove_write(vm_page_t m)
2401{
2402	struct ia64_lpte *pte;
2403	pmap_t oldpmap, pmap;
2404	pv_entry_t pv;
2405	vm_prot_t prot;
2406
2407	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2408	    ("pmap_remove_write: page %p is not managed", m));
2409
2410	/*
2411	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2412	 * set by another thread while the object is locked.  Thus,
2413	 * if PGA_WRITEABLE is clear, no page table entries need updating.
2414	 */
2415	VM_OBJECT_ASSERT_WLOCKED(m->object);
2416	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2417		return;
2418	rw_wlock(&pvh_global_lock);
2419	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2420		pmap = PV_PMAP(pv);
2421		PMAP_LOCK(pmap);
2422		oldpmap = pmap_switch(pmap);
2423		pte = pmap_find_vhpt(pv->pv_va);
2424		KASSERT(pte != NULL, ("pte"));
2425		prot = pmap_prot(pte);
2426		if ((prot & VM_PROT_WRITE) != 0) {
2427			if (pmap_dirty(pte)) {
2428				vm_page_dirty(m);
2429				pmap_clear_dirty(pte);
2430			}
2431			prot &= ~VM_PROT_WRITE;
2432			pmap_pte_prot(pmap, pte, prot);
2433			pmap_pte_attr(pte, m->md.memattr);
2434			pmap_invalidate_page(pv->pv_va);
2435		}
2436		pmap_switch(oldpmap);
2437		PMAP_UNLOCK(pmap);
2438	}
2439	vm_page_aflag_clear(m, PGA_WRITEABLE);
2440	rw_wunlock(&pvh_global_lock);
2441}
2442
2443/*
2444 * Map a set of physical memory pages into the kernel virtual
2445 * address space. Return a pointer to where it is mapped. This
2446 * routine is intended to be used for mapping device memory,
2447 * NOT real memory.
2448 */
2449void *
2450pmap_mapdev(vm_paddr_t pa, vm_size_t sz)
2451{
2452	static void *last_va = NULL;
2453	static vm_paddr_t last_pa = 0;
2454	static vm_size_t last_sz = 0;
2455	struct efi_md *md;
2456	vm_offset_t va;
2457
2458	if (pa == last_pa && sz == last_sz)
2459		return (last_va);
2460
2461	md = efi_md_find(pa);
2462	if (md == NULL) {
2463		printf("%s: [%#lx..%#lx] not covered by memory descriptor\n",
2464		    __func__, pa, pa + sz - 1);
2465		return ((void *)IA64_PHYS_TO_RR6(pa));
2466	}
2467
2468	if (md->md_type == EFI_MD_TYPE_FREE) {
2469		printf("%s: [%#lx..%#lx] is in DRAM\n", __func__, pa,
2470		    pa + sz - 1);
2471                return (NULL);
2472	}
2473
2474	va = (md->md_attr & EFI_MD_ATTR_WB) ? IA64_PHYS_TO_RR7(pa) :
2475	    IA64_PHYS_TO_RR6(pa);
2476
2477	last_va = (void *)va;
2478	last_pa = pa;
2479	last_sz = sz;
2480	return (last_va);
2481}
2482
2483/*
2484 * 'Unmap' a range mapped by pmap_mapdev().
2485 */
2486void
2487pmap_unmapdev(vm_offset_t va, vm_size_t size)
2488{
2489}
2490
2491/*
2492 * Sets the memory attribute for the specified page.
2493 */
2494static void
2495pmap_page_set_memattr_1(void *arg)
2496{
2497	struct ia64_pal_result res;
2498	register_t is;
2499	uintptr_t pp = (uintptr_t)arg;
2500
2501	is = intr_disable();
2502	res = ia64_call_pal_static(pp, 0, 0, 0);
2503	intr_restore(is);
2504}
2505
2506void
2507pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
2508{
2509	struct ia64_lpte *pte;
2510	pmap_t oldpmap, pmap;
2511	pv_entry_t pv;
2512	void *va;
2513
2514	rw_wlock(&pvh_global_lock);
2515	m->md.memattr = ma;
2516	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2517		pmap = PV_PMAP(pv);
2518		PMAP_LOCK(pmap);
2519		oldpmap = pmap_switch(pmap);
2520		pte = pmap_find_vhpt(pv->pv_va);
2521		KASSERT(pte != NULL, ("pte"));
2522		pmap_pte_attr(pte, ma);
2523		pmap_invalidate_page(pv->pv_va);
2524		pmap_switch(oldpmap);
2525		PMAP_UNLOCK(pmap);
2526	}
2527	rw_wunlock(&pvh_global_lock);
2528
2529	if (ma == VM_MEMATTR_UNCACHEABLE) {
2530#ifdef SMP
2531		smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL,
2532		    (void *)PAL_PREFETCH_VISIBILITY);
2533#else
2534		pmap_page_set_memattr_1((void *)PAL_PREFETCH_VISIBILITY);
2535#endif
2536		va = (void *)pmap_page_to_va(m);
2537		critical_enter();
2538		cpu_flush_dcache(va, PAGE_SIZE);
2539		critical_exit();
2540#ifdef SMP
2541		smp_rendezvous(NULL, pmap_page_set_memattr_1, NULL,
2542		    (void *)PAL_MC_DRAIN);
2543#else
2544		pmap_page_set_memattr_1((void *)PAL_MC_DRAIN);
2545#endif
2546	}
2547}
2548
2549/*
2550 * perform the pmap work for mincore
2551 */
2552int
2553pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
2554{
2555	pmap_t oldpmap;
2556	struct ia64_lpte *pte, tpte;
2557	vm_paddr_t pa;
2558	int val;
2559
2560	PMAP_LOCK(pmap);
2561retry:
2562	oldpmap = pmap_switch(pmap);
2563	pte = pmap_find_vhpt(addr);
2564	if (pte != NULL) {
2565		tpte = *pte;
2566		pte = &tpte;
2567	}
2568	pmap_switch(oldpmap);
2569	if (pte == NULL || !pmap_present(pte)) {
2570		val = 0;
2571		goto out;
2572	}
2573	val = MINCORE_INCORE;
2574	if (pmap_dirty(pte))
2575		val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
2576	if (pmap_accessed(pte))
2577		val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
2578	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
2579	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
2580	    pmap_managed(pte)) {
2581		pa = pmap_ppn(pte);
2582		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
2583		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
2584			goto retry;
2585	} else
2586out:
2587		PA_UNLOCK_COND(*locked_pa);
2588	PMAP_UNLOCK(pmap);
2589	return (val);
2590}
2591
2592void
2593pmap_activate(struct thread *td)
2594{
2595	pmap_switch(vmspace_pmap(td->td_proc->p_vmspace));
2596}
2597
2598pmap_t
2599pmap_switch(pmap_t pm)
2600{
2601	pmap_t prevpm;
2602	int i;
2603
2604	critical_enter();
2605	prevpm = PCPU_GET(md.current_pmap);
2606	if (prevpm == pm)
2607		goto out;
2608	if (pm == NULL) {
2609		for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2610			ia64_set_rr(IA64_RR_BASE(i),
2611			    (i << 8)|(PAGE_SHIFT << 2)|1);
2612		}
2613	} else {
2614		for (i = 0; i < IA64_VM_MINKERN_REGION; i++) {
2615			ia64_set_rr(IA64_RR_BASE(i),
2616			    (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2617		}
2618	}
2619	PCPU_SET(md.current_pmap, pm);
2620	ia64_srlz_d();
2621
2622out:
2623	critical_exit();
2624	return (prevpm);
2625}
2626
2627void
2628pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2629{
2630	pmap_t oldpm;
2631	struct ia64_lpte *pte;
2632	vm_offset_t lim;
2633	vm_size_t len;
2634
2635	sz += va & 31;
2636	va &= ~31;
2637	sz = (sz + 31) & ~31;
2638
2639	PMAP_LOCK(pm);
2640	oldpm = pmap_switch(pm);
2641	while (sz > 0) {
2642		lim = round_page(va);
2643		len = MIN(lim - va, sz);
2644		pte = pmap_find_vhpt(va);
2645		if (pte != NULL && pmap_present(pte))
2646			ia64_sync_icache(va, len);
2647		va += len;
2648		sz -= len;
2649	}
2650	pmap_switch(oldpm);
2651	PMAP_UNLOCK(pm);
2652}
2653
2654/*
2655 *	Increase the starting virtual address of the given mapping if a
2656 *	different alignment might result in more superpage mappings.
2657 */
2658void
2659pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2660    vm_offset_t *addr, vm_size_t size)
2661{
2662}
2663
2664#include "opt_ddb.h"
2665
2666#ifdef DDB
2667
2668#include <ddb/ddb.h>
2669
2670static const char*	psnames[] = {
2671	"1B",	"2B",	"4B",	"8B",
2672	"16B",	"32B",	"64B",	"128B",
2673	"256B",	"512B",	"1K",	"2K",
2674	"4K",	"8K",	"16K",	"32K",
2675	"64K",	"128K",	"256K",	"512K",
2676	"1M",	"2M",	"4M",	"8M",
2677	"16M",	"32M",	"64M",	"128M",
2678	"256M",	"512M",	"1G",	"2G"
2679};
2680
2681static void
2682print_trs(int type)
2683{
2684	struct ia64_pal_result res;
2685	int i, maxtr;
2686	struct {
2687		pt_entry_t	pte;
2688		uint64_t	itir;
2689		uint64_t	ifa;
2690		struct ia64_rr	rr;
2691	} buf;
2692	static const char *manames[] = {
2693		"WB",	"bad",	"bad",	"bad",
2694		"UC",	"UCE",	"WC",	"NaT",
2695	};
2696
2697	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2698	if (res.pal_status != 0) {
2699		db_printf("Can't get VM summary\n");
2700		return;
2701	}
2702
2703	if (type == 0)
2704		maxtr = (res.pal_result[0] >> 40) & 0xff;
2705	else
2706		maxtr = (res.pal_result[0] >> 32) & 0xff;
2707
2708	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2709	for (i = 0; i <= maxtr; i++) {
2710		bzero(&buf, sizeof(buf));
2711		res = ia64_pal_physical(PAL_VM_TR_READ, i, type,
2712		    ia64_tpa((uint64_t)&buf));
2713		if (!(res.pal_result[0] & 1))
2714			buf.pte &= ~PTE_AR_MASK;
2715		if (!(res.pal_result[0] & 2))
2716			buf.pte &= ~PTE_PL_MASK;
2717		if (!(res.pal_result[0] & 4))
2718			pmap_clear_dirty(&buf);
2719		if (!(res.pal_result[0] & 8))
2720			buf.pte &= ~PTE_MA_MASK;
2721		db_printf("%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s "
2722		    "%d %06x\n", (int)buf.ifa & 1, buf.rr.rr_rid,
2723		    buf.ifa >> 12, (buf.pte & PTE_PPN_MASK) >> 12,
2724		    psnames[(buf.itir & ITIR_PS_MASK) >> 2],
2725		    (buf.pte & PTE_ED) ? 1 : 0,
2726		    (int)(buf.pte & PTE_AR_MASK) >> 9,
2727		    (int)(buf.pte & PTE_PL_MASK) >> 7,
2728		    (pmap_dirty(&buf)) ? 1 : 0,
2729		    (pmap_accessed(&buf)) ? 1 : 0,
2730		    manames[(buf.pte & PTE_MA_MASK) >> 2],
2731		    (pmap_present(&buf)) ? 1 : 0,
2732		    (int)((buf.itir & ITIR_KEY_MASK) >> 8));
2733	}
2734}
2735
2736DB_COMMAND(itr, db_itr)
2737{
2738	print_trs(0);
2739}
2740
2741DB_COMMAND(dtr, db_dtr)
2742{
2743	print_trs(1);
2744}
2745
2746DB_COMMAND(rr, db_rr)
2747{
2748	int i;
2749	uint64_t t;
2750	struct ia64_rr rr;
2751
2752	printf("RR RID    PgSz VE\n");
2753	for (i = 0; i < 8; i++) {
2754		__asm __volatile ("mov %0=rr[%1]"
2755				  : "=r"(t)
2756				  : "r"(IA64_RR_BASE(i)));
2757		*(uint64_t *) &rr = t;
2758		printf("%d  %06x %4s %d\n",
2759		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2760	}
2761}
2762
2763DB_COMMAND(thash, db_thash)
2764{
2765	if (!have_addr)
2766		return;
2767
2768	db_printf("%p\n", (void *) ia64_thash(addr));
2769}
2770
2771DB_COMMAND(ttag, db_ttag)
2772{
2773	if (!have_addr)
2774		return;
2775
2776	db_printf("0x%lx\n", ia64_ttag(addr));
2777}
2778
2779DB_COMMAND(kpte, db_kpte)
2780{
2781	struct ia64_lpte *pte;
2782
2783	if (!have_addr) {
2784		db_printf("usage: kpte <kva>\n");
2785		return;
2786	}
2787	if (addr < VM_INIT_KERNEL_ADDRESS) {
2788		db_printf("kpte: error: invalid <kva>\n");
2789		return;
2790	}
2791	pte = pmap_find_kpte(addr);
2792	db_printf("kpte at %p:\n", pte);
2793	db_printf("  pte  =%016lx\n", pte->pte);
2794	db_printf("  itir =%016lx\n", pte->itir);
2795	db_printf("  tag  =%016lx\n", pte->tag);
2796	db_printf("  chain=%016lx\n", pte->chain);
2797}
2798
2799#endif
2800