pmap.c revision 106753
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 * $FreeBSD: head/sys/ia64/ia64/pmap.c 106753 2002-11-11 05:17:34Z alc $
47 */
48
49/*
50 *	Manages physical address maps.
51 *
52 *	In addition to hardware address maps, this
53 *	module is called upon to provide software-use-only
54 *	maps which may or may not be stored in the same
55 *	form as hardware maps.  These pseudo-maps are
56 *	used to store intermediate results from copy
57 *	operations to and from address spaces.
58 *
59 *	Since the information managed by this module is
60 *	also stored by the logical address mapping module,
61 *	this module may throw away valid virtual-to-physical
62 *	mappings at almost any time.  However, invalidations
63 *	of virtual-to-physical mappings must be done as
64 *	requested.
65 *
66 *	In order to cope with hardware architectures which
67 *	make virtual-to-physical map invalidates expensive,
68 *	this module may delay invalidate or reduced protection
69 *	operations until such time as they are actually
70 *	necessary.  This module is given full information as
71 *	to which processors are currently using which maps,
72 *	and to when physical maps must be made correct.
73 */
74
75/*
76 * Following the Linux model, region IDs are allocated in groups of
77 * eight so that a single region ID can be used for as many RRs as we
78 * want by encoding the RR number into the low bits of the ID.
79 *
80 * We reserve region ID 0 for the kernel and allocate the remaining
81 * IDs for user pmaps.
82 *
83 * Region 0..4
84 *	User virtually mapped
85 *
86 * Region 5
87 *	Kernel virtually mapped
88 *
89 * Region 6
90 *	Kernel physically mapped uncacheable
91 *
92 * Region 7
93 *	Kernel physically mapped cacheable
94 */
95
96#include <sys/param.h>
97#include <sys/kernel.h>
98#include <sys/lock.h>
99#include <sys/malloc.h>
100#include <sys/mman.h>
101#include <sys/msgbuf.h>
102#include <sys/mutex.h>
103#include <sys/proc.h>
104#include <sys/sx.h>
105#include <sys/systm.h>
106#include <sys/vmmeter.h>
107#include <sys/smp.h>
108#include <sys/sysctl.h>
109
110#include <vm/vm.h>
111#include <vm/vm_param.h>
112#include <vm/vm_kern.h>
113#include <vm/vm_page.h>
114#include <vm/vm_map.h>
115#include <vm/vm_object.h>
116#include <vm/vm_extern.h>
117#include <vm/vm_pageout.h>
118#include <vm/vm_pager.h>
119#include <vm/uma.h>
120#include <vm/uma_int.h>
121
122#include <sys/user.h>
123
124#include <machine/pal.h>
125#include <machine/md_var.h>
126
127MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
128
129#ifndef PMAP_SHPGPERPROC
130#define PMAP_SHPGPERPROC 200
131#endif
132
133#if defined(DIAGNOSTIC)
134#define PMAP_DIAGNOSTIC
135#endif
136
137#define MINPV 2048	/* Preallocate at least this many */
138#define MAXPV 20480	/* But no more than this */
139
140#if 0
141#define PMAP_DIAGNOSTIC
142#define PMAP_DEBUG
143#endif
144
145#if !defined(PMAP_DIAGNOSTIC)
146#define PMAP_INLINE __inline
147#else
148#define PMAP_INLINE
149#endif
150
151/*
152 * Get PDEs and PTEs for user/kernel address space
153 */
154#define pmap_pte_w(pte)		((pte)->pte_ig & PTE_IG_WIRED)
155#define pmap_pte_managed(pte)	((pte)->pte_ig & PTE_IG_MANAGED)
156#define pmap_pte_v(pte)		((pte)->pte_p)
157#define pmap_pte_pa(pte)	(((pte)->pte_ppn) << 12)
158#define pmap_pte_prot(pte)	(((pte)->pte_ar << 2) | (pte)->pte_pl)
159
160#define pmap_pte_set_w(pte, v) ((v)?((pte)->pte_ig |= PTE_IG_WIRED) \
161				:((pte)->pte_ig &= ~PTE_IG_WIRED))
162#define pmap_pte_set_prot(pte, v) do {		\
163    (pte)->pte_ar = v >> 2;			\
164    (pte)->pte_pl = v & 3;			\
165} while (0)
166
167/*
168 * Given a map and a machine independent protection code,
169 * convert to an ia64 protection code.
170 */
171#define pte_prot(m, p)		(protection_codes[m == kernel_pmap ? 0 : 1][p])
172#define pte_prot_pl(m, p)	(pte_prot(m, p) & 3)
173#define pte_prot_ar(m, p)	(pte_prot(m, p) >> 2)
174int	protection_codes[2][8];
175
176/*
177 * Return non-zero if this pmap is currently active
178 */
179#define pmap_isactive(pmap)	(pmap->pm_active)
180
181/*
182 * Statically allocated kernel pmap
183 */
184struct pmap kernel_pmap_store;
185
186vm_offset_t avail_start;	/* PA of first available physical page */
187vm_offset_t avail_end;		/* PA of last available physical page */
188vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
189vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
190static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
191
192vm_offset_t vhpt_base, vhpt_size;
193
194/*
195 * We use an object to own the kernel's 'page tables'. For simplicity,
196 * we use one page directory to index a set of pages containing
197 * ia64_lptes. This gives us up to 2Gb of kernel virtual space.
198 */
199static vm_object_t kptobj;
200static int nkpt;
201static struct ia64_lpte **kptdir;
202#define KPTE_DIR_INDEX(va) \
203	((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
204#define KPTE_PTE_INDEX(va) \
205	((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
206#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
207
208vm_offset_t kernel_vm_end;
209
210/*
211 * Values for ptc.e. XXX values for SKI.
212 */
213static u_int64_t pmap_ptc_e_base = 0x100000000;
214static u_int64_t pmap_ptc_e_count1 = 3;
215static u_int64_t pmap_ptc_e_count2 = 2;
216static u_int64_t pmap_ptc_e_stride1 = 0x2000;
217static u_int64_t pmap_ptc_e_stride2 = 0x100000000;
218
219/*
220 * Data for the RID allocator
221 */
222static u_int64_t *pmap_ridbusy;
223static int pmap_ridmax, pmap_ridcount;
224struct mtx pmap_ridmutex;
225
226/*
227 * Data for the pv entry allocation mechanism
228 */
229static uma_zone_t pvzone;
230static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
231static int pmap_pagedaemon_waken = 0;
232static struct pv_entry *pvbootentries;
233static int pvbootnext, pvbootmax;
234
235/*
236 * Data for allocating PTEs for user processes.
237 */
238static uma_zone_t ptezone;
239
240/*
241 * VHPT instrumentation.
242 */
243static int pmap_vhpt_inserts;
244static int pmap_vhpt_collisions;
245static int pmap_vhpt_resident;
246SYSCTL_DECL(_vm_stats);
247SYSCTL_NODE(_vm_stats, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
248SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
249	   &pmap_vhpt_inserts, 0, "");
250SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, collisions, CTLFLAG_RD,
251	   &pmap_vhpt_collisions, 0, "");
252SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, resident, CTLFLAG_RD,
253	   &pmap_vhpt_resident, 0, "");
254
255static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
256static pv_entry_t get_pv_entry(void);
257static void	ia64_protection_init(void);
258
259static void	pmap_invalidate_all(pmap_t pmap);
260static void	pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m);
261
262vm_offset_t
263pmap_steal_memory(vm_size_t size)
264{
265	vm_size_t bank_size;
266	vm_offset_t pa, va;
267
268	size = round_page(size);
269
270	bank_size = phys_avail[1] - phys_avail[0];
271	while (size > bank_size) {
272		int i;
273		for (i = 0; phys_avail[i+2]; i+= 2) {
274			phys_avail[i] = phys_avail[i+2];
275			phys_avail[i+1] = phys_avail[i+3];
276		}
277		phys_avail[i] = 0;
278		phys_avail[i+1] = 0;
279		if (!phys_avail[0])
280			panic("pmap_steal_memory: out of memory");
281		bank_size = phys_avail[1] - phys_avail[0];
282	}
283
284	pa = phys_avail[0];
285	phys_avail[0] += size;
286
287	va = IA64_PHYS_TO_RR7(pa);
288	bzero((caddr_t) va, size);
289	return va;
290}
291
292/*
293 *	Bootstrap the system enough to run with virtual memory.
294 */
295void
296pmap_bootstrap()
297{
298	int i, j, count, ridbits;
299	struct ia64_pal_result res;
300
301	/*
302	 * Query the PAL Code to find the loop parameters for the
303	 * ptc.e instruction.
304	 */
305	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
306	if (res.pal_status != 0)
307		panic("Can't configure ptc.e parameters");
308	pmap_ptc_e_base = res.pal_result[0];
309	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
310	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
311	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
312	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
313	if (bootverbose)
314		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
315		       "stride1=0x%lx, stride2=0x%lx\n",
316		       pmap_ptc_e_base,
317		       pmap_ptc_e_count1,
318		       pmap_ptc_e_count2,
319		       pmap_ptc_e_stride1,
320		       pmap_ptc_e_stride2);
321
322	/*
323	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
324	 */
325	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
326	if (res.pal_status != 0) {
327		if (bootverbose)
328			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
329		ridbits = 18; /* guaranteed minimum */
330	} else {
331		ridbits = (res.pal_result[1] >> 8) & 0xff;
332		if (bootverbose)
333			printf("Processor supports %d Region ID bits\n",
334			       ridbits);
335	}
336	pmap_ridmax = (1 << ridbits);
337	pmap_ridcount = 8;
338	pmap_ridbusy = (u_int64_t *)
339		pmap_steal_memory(pmap_ridmax / 8);
340	bzero(pmap_ridbusy, pmap_ridmax / 8);
341	pmap_ridbusy[0] |= 0xff;
342	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
343
344	/*
345	 * Allocate some memory for initial kernel 'page tables'.
346	 */
347	kptdir = (struct ia64_lpte **) pmap_steal_memory(PAGE_SIZE);
348	for (i = 0; i < NKPT; i++) {
349		kptdir[i] = (struct ia64_lpte *) pmap_steal_memory(PAGE_SIZE);
350	}
351	nkpt = NKPT;
352
353	avail_start = phys_avail[0];
354	for (i = 0; phys_avail[i+2]; i+= 2) ;
355	avail_end = phys_avail[i+1];
356	count = i+2;
357
358	/*
359	 * Figure out a useful size for the VHPT, based on the size of
360	 * physical memory and try to locate a region which is large
361	 * enough to contain the VHPT (which must be a power of two in
362	 * size and aligned to a natural boundary).
363	 */
364	vhpt_size = 15;
365	while ((1<<vhpt_size) < ia64_btop(avail_end - avail_start) * 32)
366		vhpt_size++;
367
368	vhpt_base = 0;
369	while (!vhpt_base) {
370		vm_offset_t mask;
371		if (bootverbose)
372			printf("Trying VHPT size 0x%lx\n", (1L<<vhpt_size));
373		mask = (1L << vhpt_size) - 1;
374		for (i = 0; i < count; i += 2) {
375			vm_offset_t base, limit;
376			base = (phys_avail[i] + mask) & ~mask;
377			limit = base + (1L << vhpt_size);
378			if (limit <= phys_avail[i+1])
379				/*
380				 * VHPT can fit in this region
381				 */
382				break;
383		}
384		if (!phys_avail[i]) {
385			/*
386			 * Can't fit, try next smaller size.
387			 */
388			vhpt_size--;
389		} else {
390			vhpt_base = (phys_avail[i] + mask) & ~mask;
391		}
392	}
393	if (vhpt_size < 15)
394		panic("Can't find space for VHPT");
395
396	if (bootverbose)
397		printf("Putting VHPT at %p\n", (void *) vhpt_base);
398	if (vhpt_base != phys_avail[i]) {
399		/*
400		 * Split this region.
401		 */
402		if (bootverbose)
403			printf("Splitting [%p-%p]\n",
404			       (void *) phys_avail[i],
405			       (void *) phys_avail[i+1]);
406		for (j = count; j > i; j -= 2) {
407			phys_avail[j] = phys_avail[j-2];
408			phys_avail[j+1] = phys_avail[j-2+1];
409		}
410		phys_avail[count+2] = 0;
411		phys_avail[count+3] = 0;
412		phys_avail[i+1] = vhpt_base;
413		phys_avail[i+2] = vhpt_base + (1L << vhpt_size);
414	} else {
415		phys_avail[i] = vhpt_base + (1L << vhpt_size);
416	}
417
418	vhpt_base = IA64_PHYS_TO_RR7(vhpt_base);
419	bzero((void *) vhpt_base, (1L << vhpt_size));
420	__asm __volatile("mov cr.pta=%0;; srlz.i;;"
421			 :: "r" (vhpt_base + (1<<8) + (vhpt_size<<2) + 1));
422
423	virtual_avail = IA64_RR_BASE(5);
424	virtual_end = IA64_RR_BASE(6)-1;
425
426	/*
427	 * Initialize protection array.
428	 */
429	ia64_protection_init();
430
431	/*
432	 * Initialize the kernel pmap (which is statically allocated).
433	 */
434	for (i = 0; i < 5; i++)
435		kernel_pmap->pm_rid[i] = 0;
436	kernel_pmap->pm_active = 1;
437	TAILQ_INIT(&kernel_pmap->pm_pvlist);
438	PCPU_SET(current_pmap, kernel_pmap);
439
440	/*
441	 * Region 5 is mapped via the vhpt.
442	 */
443	ia64_set_rr(IA64_RR_BASE(5),
444		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
445
446	/*
447	 * Region 6 is direct mapped UC and region 7 is direct mapped
448	 * WC. The details of this is controlled by the Alt {I,D}TLB
449	 * handlers. Here we just make sure that they have the largest
450	 * possible page size to minimise TLB usage.
451	 */
452	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (28 << 2));
453	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (28 << 2));
454
455	/*
456	 * Set up proc0's PCB.
457	 */
458#if 0
459	thread0.td_pcb->pcb_hw.apcb_asn = 0;
460#endif
461
462	/*
463	 * Reserve some memory for allocating pvs while bootstrapping
464	 * the pv allocator. We need to have enough to cover mapping
465	 * the kmem_alloc region used to allocate the initial_pvs in
466	 * pmap_init. In general, the size of this region is
467	 * approximately (# physical pages) * (size of pv entry).
468	 */
469	pvbootmax = ((physmem * sizeof(struct pv_entry)) >> PAGE_SHIFT) + 128;
470	pvbootentries = (struct pv_entry *)
471		pmap_steal_memory(pvbootmax * sizeof(struct pv_entry));
472	pvbootnext = 0;
473
474	/*
475	 * Clear out any random TLB entries left over from booting.
476	 */
477	pmap_invalidate_all(kernel_pmap);
478}
479
480void *
481uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
482{
483	static vm_pindex_t color;
484	vm_page_t m;
485	int pflags;
486	void *va;
487
488	*flags = UMA_SLAB_PRIV;
489	if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT)
490		pflags = VM_ALLOC_INTERRUPT;
491	else
492		pflags = VM_ALLOC_SYSTEM;
493	if (wait & M_ZERO)
494		pflags |= VM_ALLOC_ZERO;
495	m = vm_page_alloc(NULL, color++, pflags | VM_ALLOC_NOOBJ);
496	if (m) {
497		va = (void *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
498		if ((m->flags & PG_ZERO) == 0)
499			bzero(va, PAGE_SIZE);
500		return (va);
501	}
502	return (NULL);
503}
504
505void
506uma_small_free(void *mem, int size, u_int8_t flags)
507{
508	vm_page_t m;
509
510	m = PHYS_TO_VM_PAGE(IA64_RR_MASK((u_int64_t)mem));
511	vm_page_lock_queues();
512	vm_page_free(m);
513	vm_page_unlock_queues();
514}
515
516/*
517 *	Initialize the pmap module.
518 *	Called by vm_init, to initialize any structures that the pmap
519 *	system needs to map virtual memory.
520 *	pmap_init has been enhanced to support in a fairly consistant
521 *	way, discontiguous physical memory.
522 */
523void
524pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
525{
526	int i;
527	int initial_pvs;
528
529	/*
530	 * Allocate memory for random pmap data structures.  Includes the
531	 * pv_head_table.
532	 */
533
534	for(i = 0; i < vm_page_array_size; i++) {
535		vm_page_t m;
536
537		m = &vm_page_array[i];
538		TAILQ_INIT(&m->md.pv_list);
539		m->md.pv_list_count = 0;
540 	}
541
542	/*
543	 * Init the pv free list and the PTE free list.
544	 */
545	initial_pvs = vm_page_array_size;
546	if (initial_pvs < MINPV)
547		initial_pvs = MINPV;
548	if (initial_pvs > MAXPV)
549		initial_pvs = MAXPV;
550	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry),
551	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
552	uma_prealloc(pvzone, initial_pvs);
553
554	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
555	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
556	uma_prealloc(ptezone, initial_pvs);
557
558	/*
559	 * Create the object for the kernel's page tables.
560	 */
561	kptobj = vm_object_allocate(OBJT_DEFAULT, MAXKPT);
562
563	/*
564	 * Now it is safe to enable pv_table recording.
565	 */
566	pmap_initialized = TRUE;
567}
568
569/*
570 * Initialize the address space (zone) for the pv_entries.  Set a
571 * high water mark so that the system can recover from excessive
572 * numbers of pv entries.
573 */
574void
575pmap_init2()
576{
577	int shpgperproc = PMAP_SHPGPERPROC;
578
579	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
580	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
581	pv_entry_high_water = 9 * (pv_entry_max / 10);
582}
583
584
585/***************************************************
586 * Manipulate TLBs for a pmap
587 ***************************************************/
588
589static void
590pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
591{
592	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
593		("invalidating TLB for non-current pmap"));
594	ia64_ptc_g(va, PAGE_SHIFT << 2);
595}
596
597static void
598pmap_invalidate_all_1(void *arg)
599{
600	u_int64_t addr;
601	int i, j;
602	register_t psr;
603
604	psr = intr_disable();
605	addr = pmap_ptc_e_base;
606	for (i = 0; i < pmap_ptc_e_count1; i++) {
607		for (j = 0; j < pmap_ptc_e_count2; j++) {
608			ia64_ptc_e(addr);
609			addr += pmap_ptc_e_stride2;
610		}
611		addr += pmap_ptc_e_stride1;
612	}
613	intr_restore(psr);
614}
615
616static void
617pmap_invalidate_all(pmap_t pmap)
618{
619	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
620		("invalidating TLB for non-current pmap"));
621
622
623#ifdef SMP
624	smp_rendezvous(0, pmap_invalidate_all_1, 0, 0);
625#else
626	pmap_invalidate_all_1(0);
627#endif
628}
629
630static u_int32_t
631pmap_allocate_rid(void)
632{
633	int rid;
634
635	if (pmap_ridcount == pmap_ridmax)
636		panic("pmap_allocate_rid: All Region IDs used");
637
638	do {
639		rid = arc4random() & (pmap_ridmax - 1);
640	} while (pmap_ridbusy[rid / 64] & (1L << (rid & 63)));
641	pmap_ridbusy[rid / 64] |= (1L << (rid & 63));
642	pmap_ridcount++;
643
644	return rid;
645}
646
647static void
648pmap_free_rid(u_int32_t rid)
649{
650	mtx_lock(&pmap_ridmutex);
651	pmap_ridbusy[rid / 64] &= ~(1L << (rid & 63));
652	pmap_ridcount--;
653	mtx_unlock(&pmap_ridmutex);
654}
655
656static void
657pmap_ensure_rid(pmap_t pmap, vm_offset_t va)
658{
659	int rr;
660
661	rr = va >> 61;
662
663	/*
664	 * We get called for virtual addresses that may just as well be
665	 * kernel addresses (ie region 5, 6 or 7). Since the pm_rid field
666	 * only holds region IDs for user regions, we have to make sure
667	 * the region is within bounds.
668	 */
669	if (rr >= 5)
670		return;
671
672	if (pmap->pm_rid[rr])
673		return;
674
675	mtx_lock(&pmap_ridmutex);
676	pmap->pm_rid[rr] = pmap_allocate_rid();
677	if (pmap == PCPU_GET(current_pmap))
678		ia64_set_rr(IA64_RR_BASE(rr),
679			    (pmap->pm_rid[rr] << 8)|(PAGE_SHIFT << 2)|1);
680	mtx_unlock(&pmap_ridmutex);
681}
682
683/***************************************************
684 * Low level helper routines.....
685 ***************************************************/
686
687/*
688 * Install a pte into the VHPT
689 */
690static PMAP_INLINE void
691pmap_install_pte(struct ia64_lpte *vhpte, struct ia64_lpte *pte)
692{
693	u_int64_t *vhp, *p;
694
695	/* invalidate the pte */
696	atomic_set_64(&vhpte->pte_tag, 1L << 63);
697	ia64_mf();			/* make sure everyone sees */
698
699	vhp = (u_int64_t *) vhpte;
700	p = (u_int64_t *) pte;
701
702	vhp[0] = p[0];
703	vhp[1] = p[1];
704	vhp[2] = p[2];			/* sets ti to one */
705
706	ia64_mf();
707}
708
709/*
710 * Compare essential parts of pte.
711 */
712static PMAP_INLINE int
713pmap_equal_pte(struct ia64_lpte *pte1, struct ia64_lpte *pte2)
714{
715	return *(u_int64_t *) pte1 == *(u_int64_t *) pte2;
716}
717
718/*
719 * this routine defines the region(s) of memory that should
720 * not be tested for the modified bit.
721 */
722static PMAP_INLINE int
723pmap_track_modified(vm_offset_t va)
724{
725	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
726		return 1;
727	else
728		return 0;
729}
730
731#ifndef KSTACK_MAX_PAGES
732#define KSTACK_MAX_PAGES 32
733#endif
734
735/*
736 * Create the KSTACK for a new thread.
737 * This routine directly affects the fork perf for a process/thread.
738 */
739void
740pmap_new_thread(struct thread *td, int pages)
741{
742	vm_offset_t *ks;
743
744	/* Bounds check */
745	if (pages <= 1)
746		pages = KSTACK_PAGES;
747	else if (pages > KSTACK_MAX_PAGES)
748		pages = KSTACK_MAX_PAGES;
749
750	/*
751	 * Use contigmalloc for user area so that we can use a region
752	 * 7 address for it which makes it impossible to accidentally
753	 * lose when recording a trapframe.
754	 */
755	ks = contigmalloc(pages * PAGE_SIZE, M_PMAP, M_WAITOK, 0ul,
756	    256*1024*1024 - 1, PAGE_SIZE, 256*1024*1024);
757	if (ks == NULL)
758		panic("pmap_new_thread: could not contigmalloc %d pages\n",
759		    pages);
760
761	td->td_md.md_kstackvirt = ks;
762	td->td_kstack = IA64_PHYS_TO_RR7(ia64_tpa((u_int64_t)ks));
763	td->td_kstack_pages = pages;
764}
765
766/*
767 * Dispose the KSTACK for a thread that has exited.
768 * This routine directly impacts the exit perf of a process/thread.
769 */
770void
771pmap_dispose_thread(struct thread *td)
772{
773	int pages;
774
775	pages = td->td_kstack_pages;
776	contigfree(td->td_md.md_kstackvirt, pages * PAGE_SIZE, M_PMAP);
777	td->td_md.md_kstackvirt = NULL;
778	td->td_kstack = 0;
779}
780
781/*
782 * Set up a variable sized alternate kstack.  This appears to be MI.
783 */
784void
785pmap_new_altkstack(struct thread *td, int pages)
786{
787
788	/*
789	 * Shuffle the original stack. Save the virtual kstack address
790	 * instead of the physical address because 1) we can derive the
791	 * physical address from the virtual address and 2) we need the
792	 * virtual address in pmap_dispose_thread.
793	 */
794	td->td_altkstack_obj = td->td_kstack_obj;
795	td->td_altkstack = (vm_offset_t)td->td_md.md_kstackvirt;
796	td->td_altkstack_pages = td->td_kstack_pages;
797
798	pmap_new_thread(td, pages);
799}
800
801void
802pmap_dispose_altkstack(struct thread *td)
803{
804
805	pmap_dispose_thread(td);
806
807	/*
808	 * Restore the original kstack. Note that td_altkstack holds the
809	 * virtual kstack address of the previous kstack.
810	 */
811	td->td_md.md_kstackvirt = (void*)td->td_altkstack;
812	td->td_kstack = IA64_PHYS_TO_RR7(ia64_tpa(td->td_altkstack));
813	td->td_kstack_obj = td->td_altkstack_obj;
814	td->td_kstack_pages = td->td_altkstack_pages;
815	td->td_altkstack = 0;
816	td->td_altkstack_obj = NULL;
817	td->td_altkstack_pages = 0;
818}
819
820/*
821 * Allow the KSTACK for a thread to be prejudicially paged out.
822 */
823void
824pmap_swapout_thread(struct thread *td)
825{
826}
827
828/*
829 * Bring the KSTACK for a specified thread back in.
830 */
831void
832pmap_swapin_thread(struct thread *td)
833{
834}
835
836/***************************************************
837 * Page table page management routines.....
838 ***************************************************/
839
840void
841pmap_pinit0(struct pmap *pmap)
842{
843	/* kernel_pmap is the same as any other pmap. */
844	pmap_pinit(pmap);
845}
846
847/*
848 * Initialize a preallocated and zeroed pmap structure,
849 * such as one in a vmspace structure.
850 */
851void
852pmap_pinit(struct pmap *pmap)
853{
854	int i;
855
856	pmap->pm_flags = 0;
857	for (i = 0; i < 5; i++)
858		pmap->pm_rid[i] = 0;
859	pmap->pm_ptphint = NULL;
860	pmap->pm_active = 0;
861	TAILQ_INIT(&pmap->pm_pvlist);
862	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
863}
864
865/*
866 * Wire in kernel global address entries.  To avoid a race condition
867 * between pmap initialization and pmap_growkernel, this procedure
868 * should be called after the vmspace is attached to the process
869 * but before this pmap is activated.
870 */
871void
872pmap_pinit2(struct pmap *pmap)
873{
874}
875
876/***************************************************
877* Pmap allocation/deallocation routines.
878 ***************************************************/
879
880/*
881 * Release any resources held by the given physical map.
882 * Called when a pmap initialized by pmap_pinit is being released.
883 * Should only be called if the map contains no valid mappings.
884 */
885void
886pmap_release(pmap_t pmap)
887{
888	int i;
889
890	for (i = 0; i < 5; i++)
891		if (pmap->pm_rid[i])
892			pmap_free_rid(pmap->pm_rid[i]);
893}
894
895/*
896 * grow the number of kernel page table entries, if needed
897 */
898void
899pmap_growkernel(vm_offset_t addr)
900{
901	struct ia64_lpte *ptepage;
902	vm_page_t nkpg;
903
904	if (kernel_vm_end == 0) {
905		kernel_vm_end = nkpt * PAGE_SIZE * NKPTEPG
906			+ IA64_RR_BASE(5);
907	}
908	addr = (addr + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
909	while (kernel_vm_end < addr) {
910		if (kptdir[KPTE_DIR_INDEX(kernel_vm_end)]) {
911			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG)
912				& ~(PAGE_SIZE * NKPTEPG - 1);
913			continue;
914		}
915
916		/*
917		 * We could handle more by increasing the size of kptdir.
918		 */
919		if (nkpt == MAXKPT)
920			panic("pmap_growkernel: out of kernel address space");
921
922		/*
923		 * This index is bogus, but out of the way
924		 */
925		nkpg = vm_page_alloc(kptobj, nkpt,
926		    VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
927		if (!nkpg)
928			panic("pmap_growkernel: no memory to grow kernel");
929
930		nkpt++;
931		ptepage = (struct ia64_lpte *)
932			IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
933		bzero(ptepage, PAGE_SIZE);
934		kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage;
935
936		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
937	}
938}
939
940/***************************************************
941 * page management routines.
942 ***************************************************/
943
944/*
945 * free the pv_entry back to the free list
946 */
947static PMAP_INLINE void
948free_pv_entry(pv_entry_t pv)
949{
950	pv_entry_count--;
951	uma_zfree(pvzone, pv);
952}
953
954/*
955 * get a new pv_entry, allocating a block from the system
956 * when needed.
957 * the memory allocation is performed bypassing the malloc code
958 * because of the possibility of allocations at interrupt time.
959 */
960static pv_entry_t
961get_pv_entry(void)
962{
963	pv_entry_count++;
964	if (pv_entry_high_water &&
965		(pv_entry_count > pv_entry_high_water) &&
966		(pmap_pagedaemon_waken == 0)) {
967		pmap_pagedaemon_waken = 1;
968		wakeup (&vm_pages_needed);
969	}
970	return uma_zalloc(pvzone, M_NOWAIT);
971}
972
973/*
974 * Add an ia64_lpte to the VHPT.
975 */
976static void
977pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
978{
979	struct ia64_lpte *vhpte;
980
981	pmap_vhpt_inserts++;
982	pmap_vhpt_resident++;
983
984	vhpte = (struct ia64_lpte *) ia64_thash(va);
985
986	if (vhpte->pte_chain)
987		pmap_vhpt_collisions++;
988
989	pte->pte_chain = vhpte->pte_chain;
990	vhpte->pte_chain = ia64_tpa((vm_offset_t) pte);
991
992	if (!vhpte->pte_p && pte->pte_p)
993		pmap_install_pte(vhpte, pte);
994	else
995		ia64_mf();
996}
997
998/*
999 * Update VHPT after a pte has changed.
1000 */
1001static void
1002pmap_update_vhpt(struct ia64_lpte *pte, vm_offset_t va)
1003{
1004	struct ia64_lpte *vhpte;
1005
1006	vhpte = (struct ia64_lpte *) ia64_thash(va);
1007
1008	if ((!vhpte->pte_p || vhpte->pte_tag == pte->pte_tag)
1009	    && pte->pte_p)
1010		pmap_install_pte(vhpte, pte);
1011}
1012
1013/*
1014 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
1015 * worked or an appropriate error code otherwise.
1016 */
1017static int
1018pmap_remove_vhpt(vm_offset_t va)
1019{
1020	struct ia64_lpte *pte;
1021	struct ia64_lpte *lpte;
1022	struct ia64_lpte *vhpte;
1023	u_int64_t tag;
1024	int error = ENOENT;
1025
1026	vhpte = (struct ia64_lpte *) ia64_thash(va);
1027
1028	/*
1029	 * If the VHPTE is invalid, there can't be a collision chain.
1030	 */
1031	if (!vhpte->pte_p) {
1032		KASSERT(!vhpte->pte_chain, ("bad vhpte"));
1033		printf("can't remove vhpt entry for 0x%lx\n", va);
1034		goto done;
1035	}
1036
1037	lpte = vhpte;
1038	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(vhpte->pte_chain);
1039	tag = ia64_ttag(va);
1040
1041	while (pte->pte_tag != tag) {
1042		lpte = pte;
1043		if (pte->pte_chain)
1044			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1045		else {
1046			printf("can't remove vhpt entry for 0x%lx\n", va);
1047			goto done;
1048		}
1049	}
1050
1051	/*
1052	 * Snip this pv_entry out of the collision chain.
1053	 */
1054	lpte->pte_chain = pte->pte_chain;
1055
1056	/*
1057	 * If the VHPTE matches as well, change it to map the first
1058	 * element from the chain if there is one.
1059	 */
1060	if (vhpte->pte_tag == tag) {
1061		if (vhpte->pte_chain) {
1062			pte = (struct ia64_lpte *)
1063				IA64_PHYS_TO_RR7(vhpte->pte_chain);
1064			pmap_install_pte(vhpte, pte);
1065		} else {
1066			vhpte->pte_p = 0;
1067			ia64_mf();
1068		}
1069	}
1070
1071	pmap_vhpt_resident--;
1072	error = 0;
1073 done:
1074	return error;
1075}
1076
1077/*
1078 * Find the ia64_lpte for the given va, if any.
1079 */
1080static struct ia64_lpte *
1081pmap_find_vhpt(vm_offset_t va)
1082{
1083	struct ia64_lpte *pte;
1084	u_int64_t tag;
1085
1086	pte = (struct ia64_lpte *) ia64_thash(va);
1087	if (!pte->pte_chain) {
1088		pte = 0;
1089		goto done;
1090	}
1091
1092	tag = ia64_ttag(va);
1093	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1094
1095	while (pte->pte_tag != tag) {
1096		if (pte->pte_chain) {
1097			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1098		} else {
1099			pte = 0;
1100			break;
1101		}
1102	}
1103
1104 done:
1105	return pte;
1106}
1107
1108/*
1109 * Remove an entry from the list of managed mappings.
1110 */
1111static int
1112pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1113{
1114	if (!pv) {
1115		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1116			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1117				if (pmap == pv->pv_pmap && va == pv->pv_va)
1118					break;
1119			}
1120		} else {
1121			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1122				if (va == pv->pv_va)
1123					break;
1124			}
1125		}
1126	}
1127
1128	if (pv) {
1129		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1130		m->md.pv_list_count--;
1131		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1132			vm_page_flag_clear(m, PG_WRITEABLE);
1133
1134		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1135		free_pv_entry(pv);
1136		return 0;
1137	} else {
1138		return ENOENT;
1139	}
1140}
1141
1142/*
1143 * Create a pv entry for page at pa for
1144 * (pmap, va).
1145 */
1146static void
1147pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1148{
1149	pv_entry_t pv;
1150
1151	pv = get_pv_entry();
1152	pv->pv_pmap = pmap;
1153	pv->pv_va = va;
1154
1155	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1156	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1157	m->md.pv_list_count++;
1158}
1159
1160/*
1161 *	Routine:	pmap_extract
1162 *	Function:
1163 *		Extract the physical page address associated
1164 *		with the given map/virtual_address pair.
1165 */
1166vm_offset_t
1167pmap_extract(pmap, va)
1168	register pmap_t pmap;
1169	vm_offset_t va;
1170{
1171	pmap_t oldpmap;
1172	vm_offset_t pa;
1173
1174	oldpmap = pmap_install(pmap);
1175	pa = ia64_tpa(va);
1176	pmap_install(oldpmap);
1177	return pa;
1178}
1179
1180/***************************************************
1181 * Low level mapping routines.....
1182 ***************************************************/
1183
1184/*
1185 * Find the kernel lpte for mapping the given virtual address, which
1186 * must be in the part of region 5 which we can cover with our kernel
1187 * 'page tables'.
1188 */
1189static struct ia64_lpte *
1190pmap_find_kpte(vm_offset_t va)
1191{
1192	KASSERT((va >> 61) == 5,
1193		("kernel mapping 0x%lx not in region 5", va));
1194	KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG),
1195		("kernel mapping 0x%lx out of range", va));
1196	return &kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)];
1197}
1198
1199/*
1200 * Find a pte suitable for mapping a user-space address. If one exists
1201 * in the VHPT, that one will be returned, otherwise a new pte is
1202 * allocated.
1203 */
1204static struct ia64_lpte *
1205pmap_find_pte(vm_offset_t va)
1206{
1207	struct ia64_lpte *pte;
1208
1209	if (va >= VM_MAXUSER_ADDRESS)
1210		return pmap_find_kpte(va);
1211
1212	pte = pmap_find_vhpt(va);
1213	if (!pte) {
1214		pte = uma_zalloc(ptezone, M_WAITOK);
1215		pte->pte_p = 0;
1216	}
1217	return pte;
1218}
1219
1220/*
1221 * Free a pte which is now unused. This simply returns it to the zone
1222 * allocator if it is a user mapping. For kernel mappings, clear the
1223 * valid bit to make it clear that the mapping is not currently used.
1224 */
1225static void
1226pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1227{
1228	if (va < VM_MAXUSER_ADDRESS)
1229		uma_zfree(ptezone, pte);
1230	else
1231		pte->pte_p = 0;
1232}
1233
1234/*
1235 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1236 * the pte was orginally valid, then its assumed to already be in the
1237 * VHPT.
1238 */
1239static void
1240pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1241	     int ig, int pl, int ar)
1242{
1243	int wasvalid = pte->pte_p;
1244
1245	pte->pte_p = 1;
1246	pte->pte_ma = PTE_MA_WB;
1247	if (ig & PTE_IG_MANAGED) {
1248		pte->pte_a = 0;
1249		pte->pte_d = 0;
1250	} else {
1251		pte->pte_a = 1;
1252		pte->pte_d = 1;
1253	}
1254	pte->pte_pl = pl;
1255	pte->pte_ar = ar;
1256	pte->pte_ppn = pa >> 12;
1257	pte->pte_ed = 0;
1258	pte->pte_ig = ig;
1259
1260	pte->pte_ps = PAGE_SHIFT;
1261	pte->pte_key = 0;
1262
1263	pte->pte_tag = ia64_ttag(va);
1264
1265	if (wasvalid) {
1266		pmap_update_vhpt(pte, va);
1267	} else {
1268		pmap_enter_vhpt(pte, va);
1269	}
1270}
1271
1272/*
1273 * If a pte contains a valid mapping, clear it and update the VHPT.
1274 */
1275static void
1276pmap_clear_pte(struct ia64_lpte *pte, vm_offset_t va)
1277{
1278	if (pte->pte_p) {
1279		pmap_remove_vhpt(va);
1280		ia64_ptc_g(va, PAGE_SHIFT << 2);
1281		pte->pte_p = 0;
1282	}
1283}
1284
1285/*
1286 * Remove the (possibly managed) mapping represented by pte from the
1287 * given pmap.
1288 */
1289static int
1290pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1291		pv_entry_t pv, int freepte)
1292{
1293	int error;
1294	vm_page_t m;
1295
1296	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1297		("removing pte for non-current pmap"));
1298
1299	/*
1300	 * First remove from the VHPT.
1301	 */
1302	error = pmap_remove_vhpt(va);
1303	if (error)
1304		return error;
1305
1306	/*
1307	 * Make sure pmap_set_pte() knows it isn't in the VHPT.
1308	 */
1309	pte->pte_p = 0;
1310
1311	if (pte->pte_ig & PTE_IG_WIRED)
1312		pmap->pm_stats.wired_count -= 1;
1313
1314	pmap->pm_stats.resident_count -= 1;
1315	if (pte->pte_ig & PTE_IG_MANAGED) {
1316		m = PHYS_TO_VM_PAGE(pmap_pte_pa(pte));
1317		if (pte->pte_d)
1318			if (pmap_track_modified(va))
1319				vm_page_dirty(m);
1320		if (pte->pte_a)
1321			vm_page_flag_set(m, PG_REFERENCED);
1322
1323		if (freepte)
1324			pmap_free_pte(pte, va);
1325		return pmap_remove_entry(pmap, m, va, pv);
1326	} else {
1327		if (freepte)
1328			pmap_free_pte(pte, va);
1329		return 0;
1330	}
1331}
1332
1333/*
1334 * Add a list of wired pages to the kva
1335 * this routine is only used for temporary
1336 * kernel mappings that do not need to have
1337 * page modification or references recorded.
1338 * Note that old mappings are simply written
1339 * over.  The page *must* be wired.
1340 */
1341void
1342pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1343{
1344	int i;
1345	struct ia64_lpte *pte;
1346
1347	for (i = 0; i < count; i++) {
1348		vm_offset_t tva = va + i * PAGE_SIZE;
1349		int wasvalid;
1350		pte = pmap_find_kpte(tva);
1351		wasvalid = pte->pte_p;
1352		pmap_set_pte(pte, tva, VM_PAGE_TO_PHYS(m[i]),
1353			     0, PTE_PL_KERN, PTE_AR_RWX);
1354		if (wasvalid)
1355			ia64_ptc_g(tva, PAGE_SHIFT << 2);
1356	}
1357}
1358
1359/*
1360 * this routine jerks page mappings from the
1361 * kernel -- it is meant only for temporary mappings.
1362 */
1363void
1364pmap_qremove(vm_offset_t va, int count)
1365{
1366	int i;
1367	struct ia64_lpte *pte;
1368
1369	for (i = 0; i < count; i++) {
1370		pte = pmap_find_kpte(va);
1371		pmap_clear_pte(pte, va);
1372		va += PAGE_SIZE;
1373	}
1374}
1375
1376/*
1377 * Add a wired page to the kva.
1378 */
1379void
1380pmap_kenter(vm_offset_t va, vm_offset_t pa)
1381{
1382	struct ia64_lpte *pte;
1383	int wasvalid;
1384
1385	pte = pmap_find_kpte(va);
1386	wasvalid = pte->pte_p;
1387	pmap_set_pte(pte, va, pa, 0, PTE_PL_KERN, PTE_AR_RWX);
1388	if (wasvalid)
1389		ia64_ptc_g(va, PAGE_SHIFT << 2);
1390}
1391
1392/*
1393 * Remove a page from the kva
1394 */
1395void
1396pmap_kremove(vm_offset_t va)
1397{
1398	struct ia64_lpte *pte;
1399
1400	pte = pmap_find_kpte(va);
1401	pmap_clear_pte(pte, va);
1402}
1403
1404/*
1405 *	Used to map a range of physical addresses into kernel
1406 *	virtual address space.
1407 *
1408 *	The value passed in '*virt' is a suggested virtual address for
1409 *	the mapping. Architectures which can support a direct-mapped
1410 *	physical to virtual region can return the appropriate address
1411 *	within that region, leaving '*virt' unchanged. Other
1412 *	architectures should map the pages starting at '*virt' and
1413 *	update '*virt' with the first usable address after the mapped
1414 *	region.
1415 */
1416vm_offset_t
1417pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1418{
1419	return IA64_PHYS_TO_RR7(start);
1420}
1421
1422/*
1423 * This routine is very drastic, but can save the system
1424 * in a pinch.
1425 */
1426void
1427pmap_collect()
1428{
1429	int i;
1430	vm_page_t m;
1431	static int warningdone = 0;
1432
1433	if (pmap_pagedaemon_waken == 0)
1434		return;
1435
1436	if (warningdone < 5) {
1437		printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
1438		warningdone++;
1439	}
1440
1441	for(i = 0; i < vm_page_array_size; i++) {
1442		m = &vm_page_array[i];
1443		if (m->wire_count || m->hold_count || m->busy ||
1444		    (m->flags & (PG_BUSY | PG_UNMANAGED)))
1445			continue;
1446		pmap_remove_all(m);
1447	}
1448	pmap_pagedaemon_waken = 0;
1449}
1450
1451/*
1452 * Remove a single page from a process address space
1453 */
1454static void
1455pmap_remove_page(pmap_t pmap, vm_offset_t va)
1456{
1457	struct ia64_lpte *pte;
1458
1459	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1460		("removing page for non-current pmap"));
1461
1462	pte = pmap_find_vhpt(va);
1463	if (pte) {
1464		pmap_remove_pte(pmap, pte, va, 0, 1);
1465		pmap_invalidate_page(pmap, va);
1466	}
1467	return;
1468}
1469
1470/*
1471 *	Remove the given range of addresses from the specified map.
1472 *
1473 *	It is assumed that the start and end are properly
1474 *	rounded to the page size.
1475 */
1476void
1477pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1478{
1479	pmap_t oldpmap;
1480	vm_offset_t va;
1481	pv_entry_t pv;
1482	struct ia64_lpte *pte;
1483
1484	if (pmap == NULL)
1485		return;
1486
1487	if (pmap->pm_stats.resident_count == 0)
1488		return;
1489
1490	oldpmap = pmap_install(pmap);
1491
1492	/*
1493	 * special handling of removing one page.  a very
1494	 * common operation and easy to short circuit some
1495	 * code.
1496	 */
1497	if (sva + PAGE_SIZE == eva) {
1498		pmap_remove_page(pmap, sva);
1499		pmap_install(oldpmap);
1500		return;
1501	}
1502
1503	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1504		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1505			va = pv->pv_va;
1506			if (va >= sva && va < eva) {
1507				pte = pmap_find_vhpt(va);
1508				pmap_remove_pte(pmap, pte, va, pv, 1);
1509				pmap_invalidate_page(pmap, va);
1510			}
1511		}
1512
1513	} else {
1514		for (va = sva; va < eva; va = va += PAGE_SIZE) {
1515			pte = pmap_find_vhpt(va);
1516			if (pte) {
1517				pmap_remove_pte(pmap, pte, va, 0, 1);
1518				pmap_invalidate_page(pmap, va);
1519			}
1520		}
1521	}
1522
1523	pmap_install(oldpmap);
1524}
1525
1526/*
1527 *	Routine:	pmap_remove_all
1528 *	Function:
1529 *		Removes this physical page from
1530 *		all physical maps in which it resides.
1531 *		Reflects back modify bits to the pager.
1532 *
1533 *	Notes:
1534 *		Original versions of this routine were very
1535 *		inefficient because they iteratively called
1536 *		pmap_remove (slow...)
1537 */
1538
1539void
1540pmap_remove_all(vm_page_t m)
1541{
1542	pmap_t oldpmap;
1543	pv_entry_t pv;
1544	int s;
1545
1546#if defined(PMAP_DIAGNOSTIC)
1547	/*
1548	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1549	 * pages!
1550	 */
1551	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1552		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1553	}
1554#endif
1555
1556	s = splvm();
1557
1558	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1559		struct ia64_lpte *pte;
1560		pmap_t pmap = pv->pv_pmap;
1561		vm_offset_t va = pv->pv_va;
1562
1563		oldpmap = pmap_install(pmap);
1564		pte = pmap_find_vhpt(va);
1565		if (pmap_pte_pa(pte) != VM_PAGE_TO_PHYS(m))
1566			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1567		pmap_remove_pte(pmap, pte, va, pv, 1);
1568		pmap_invalidate_page(pmap, va);
1569		pmap_install(oldpmap);
1570	}
1571
1572	vm_page_flag_clear(m, PG_WRITEABLE);
1573
1574	splx(s);
1575	return;
1576}
1577
1578/*
1579 *	Set the physical protection on the
1580 *	specified range of this map as requested.
1581 */
1582void
1583pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1584{
1585	pmap_t oldpmap;
1586	struct ia64_lpte *pte;
1587	int newprot;
1588
1589	if (pmap == NULL)
1590		return;
1591
1592	oldpmap = pmap_install(pmap);
1593
1594	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1595		pmap_remove(pmap, sva, eva);
1596		pmap_install(oldpmap);
1597		return;
1598	}
1599
1600	if (prot & VM_PROT_WRITE) {
1601		pmap_install(oldpmap);
1602		return;
1603	}
1604
1605	newprot = pte_prot(pmap, prot);
1606
1607	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1608		panic("pmap_protect: unaligned addresses");
1609
1610	while (sva < eva) {
1611		/*
1612		 * If page is invalid, skip this page
1613		 */
1614		pte = pmap_find_vhpt(sva);
1615		if (!pte) {
1616			sva += PAGE_SIZE;
1617			continue;
1618		}
1619
1620		if (pmap_pte_prot(pte) != newprot) {
1621			if (pte->pte_ig & PTE_IG_MANAGED) {
1622				vm_offset_t pa = pmap_pte_pa(pte);
1623				vm_page_t m = PHYS_TO_VM_PAGE(pa);
1624				if (pte->pte_d) {
1625					if (pmap_track_modified(sva))
1626						vm_page_dirty(m);
1627					pte->pte_d = 0;
1628				}
1629				if (pte->pte_a) {
1630					vm_page_flag_set(m, PG_REFERENCED);
1631					pte->pte_a = 0;
1632				}
1633			}
1634			pmap_pte_set_prot(pte, newprot);
1635			pmap_update_vhpt(pte, sva);
1636			pmap_invalidate_page(pmap, sva);
1637		}
1638
1639		sva += PAGE_SIZE;
1640	}
1641	pmap_install(oldpmap);
1642}
1643
1644/*
1645 *	Insert the given physical page (p) at
1646 *	the specified virtual address (v) in the
1647 *	target physical map with the protection requested.
1648 *
1649 *	If specified, the page will be wired down, meaning
1650 *	that the related pte can not be reclaimed.
1651 *
1652 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1653 *	or lose information.  That is, this routine must actually
1654 *	insert this page into the given map NOW.
1655 */
1656void
1657pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1658	   boolean_t wired)
1659{
1660	pmap_t oldpmap;
1661	vm_offset_t pa;
1662	vm_offset_t opa;
1663	struct ia64_lpte origpte;
1664	struct ia64_lpte *pte;
1665	int managed;
1666
1667	if (pmap == NULL)
1668		return;
1669
1670	pmap_ensure_rid(pmap, va);
1671
1672	oldpmap = pmap_install(pmap);
1673
1674	va &= ~PAGE_MASK;
1675#ifdef PMAP_DIAGNOSTIC
1676	if (va > VM_MAX_KERNEL_ADDRESS)
1677		panic("pmap_enter: toobig");
1678#endif
1679
1680	/*
1681	 * Find (or create) a pte for the given mapping.
1682	 */
1683	pte = pmap_find_pte(va);
1684	origpte = *pte;
1685
1686	if (origpte.pte_p)
1687		opa = pmap_pte_pa(&origpte);
1688	else
1689		opa = 0;
1690	managed = 0;
1691
1692	pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK;
1693
1694	/*
1695	 * Mapping has not changed, must be protection or wiring change.
1696	 */
1697	if (origpte.pte_p && (opa == pa)) {
1698		/*
1699		 * Wiring change, just update stats. We don't worry about
1700		 * wiring PT pages as they remain resident as long as there
1701		 * are valid mappings in them. Hence, if a user page is wired,
1702		 * the PT page will be also.
1703		 */
1704		if (wired && ((origpte.pte_ig & PTE_IG_WIRED) == 0))
1705			pmap->pm_stats.wired_count++;
1706		else if (!wired && (origpte.pte_ig & PTE_IG_WIRED))
1707			pmap->pm_stats.wired_count--;
1708
1709		/*
1710		 * We might be turning off write access to the page,
1711		 * so we go ahead and sense modify status.
1712		 */
1713		if (origpte.pte_ig & PTE_IG_MANAGED) {
1714			if (origpte.pte_d && pmap_track_modified(va)) {
1715				vm_page_t om;
1716				om = PHYS_TO_VM_PAGE(opa);
1717				vm_page_dirty(om);
1718			}
1719		}
1720
1721		managed = origpte.pte_ig & PTE_IG_MANAGED;
1722		goto validate;
1723	}
1724	/*
1725	 * Mapping has changed, invalidate old range and fall
1726	 * through to handle validating new mapping.
1727	 */
1728	if (opa) {
1729		int error;
1730		error = pmap_remove_pte(pmap, pte, va, 0, 0);
1731		if (error)
1732			panic("pmap_enter: pte vanished, va: 0x%lx", va);
1733	}
1734
1735	/*
1736	 * Enter on the PV list if part of our managed memory.
1737	 */
1738	if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
1739		pmap_insert_entry(pmap, va, m);
1740		managed |= PTE_IG_MANAGED;
1741	}
1742
1743	/*
1744	 * Increment counters
1745	 */
1746	pmap->pm_stats.resident_count++;
1747	if (wired)
1748		pmap->pm_stats.wired_count++;
1749
1750validate:
1751
1752	/*
1753	 * Now validate mapping with desired protection/wiring. This
1754	 * adds the pte to the VHPT if necessary.
1755	 */
1756	pmap_set_pte(pte, va, pa, managed | (wired ? PTE_IG_WIRED : 0),
1757		     pte_prot_pl(pmap, prot), pte_prot_ar(pmap, prot));
1758
1759	/*
1760	 * if the mapping or permission bits are different, we need
1761	 * to invalidate the page.
1762	 */
1763	if (!pmap_equal_pte(&origpte, pte))
1764		pmap_invalidate_page(pmap, va);
1765
1766	pmap_install(oldpmap);
1767}
1768
1769/*
1770 * this code makes some *MAJOR* assumptions:
1771 * 1. Current pmap & pmap exists.
1772 * 2. Not wired.
1773 * 3. Read access.
1774 * 4. No page table pages.
1775 * 5. Tlbflush is deferred to calling procedure.
1776 * 6. Page IS managed.
1777 * but is *MUCH* faster than pmap_enter...
1778 */
1779
1780static void
1781pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
1782{
1783	struct ia64_lpte *pte;
1784	pmap_t oldpmap;
1785
1786	pmap_ensure_rid(pmap, va);
1787
1788	oldpmap = pmap_install(pmap);
1789
1790	pte = pmap_find_pte(va);
1791	if (pte->pte_p)
1792		return;
1793
1794	/*
1795	 * Enter on the PV list since its part of our managed memory.
1796	 */
1797	pmap_insert_entry(pmap, va, m);
1798
1799	/*
1800	 * Increment counters
1801	 */
1802	pmap->pm_stats.resident_count++;
1803
1804	/*
1805	 * Initialise PTE with read-only protection and enter into VHPT.
1806	 */
1807	pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m),
1808		     PTE_IG_MANAGED,
1809		     PTE_PL_USER, PTE_AR_R);
1810
1811	pmap_install(oldpmap);
1812}
1813
1814/*
1815 * Make temporary mapping for a physical address. This is called
1816 * during dump.
1817 */
1818void *
1819pmap_kenter_temporary(vm_offset_t pa, int i)
1820{
1821	return (void *) IA64_PHYS_TO_RR7(pa - (i * PAGE_SIZE));
1822}
1823
1824#define MAX_INIT_PT (96)
1825/*
1826 * pmap_object_init_pt preloads the ptes for a given object
1827 * into the specified pmap.  This eliminates the blast of soft
1828 * faults on process startup and immediately after an mmap.
1829 */
1830void
1831pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1832		    vm_object_t object, vm_pindex_t pindex,
1833		    vm_size_t size, int limit)
1834{
1835	pmap_t oldpmap;
1836	vm_offset_t tmpidx;
1837	int psize;
1838	vm_page_t p;
1839	int objpgs;
1840
1841	if (pmap == NULL || object == NULL)
1842		return;
1843
1844	oldpmap = pmap_install(pmap);
1845
1846	psize = ia64_btop(size);
1847
1848	if ((object->type != OBJT_VNODE) ||
1849		((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
1850			(object->resident_page_count > MAX_INIT_PT))) {
1851		pmap_install(oldpmap);
1852		return;
1853	}
1854
1855	if (psize + pindex > object->size) {
1856		if (object->size < pindex)
1857			return;
1858		psize = object->size - pindex;
1859	}
1860
1861	/*
1862	 * if we are processing a major portion of the object, then scan the
1863	 * entire thing.
1864	 */
1865	if (psize > (object->resident_page_count >> 2)) {
1866		objpgs = psize;
1867
1868		for (p = TAILQ_FIRST(&object->memq);
1869		    ((objpgs > 0) && (p != NULL));
1870		    p = TAILQ_NEXT(p, listq)) {
1871
1872			tmpidx = p->pindex;
1873			if (tmpidx < pindex) {
1874				continue;
1875			}
1876			tmpidx -= pindex;
1877			if (tmpidx >= psize) {
1878				continue;
1879			}
1880			/*
1881			 * don't allow an madvise to blow away our really
1882			 * free pages allocating pv entries.
1883			 */
1884			if ((limit & MAP_PREFAULT_MADVISE) &&
1885			    cnt.v_free_count < cnt.v_free_reserved) {
1886				break;
1887			}
1888			vm_page_lock_queues();
1889			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1890				(p->busy == 0) &&
1891			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1892				if ((p->queue - p->pc) == PQ_CACHE)
1893					vm_page_deactivate(p);
1894				vm_page_busy(p);
1895				vm_page_unlock_queues();
1896				pmap_enter_quick(pmap,
1897						 addr + ia64_ptob(tmpidx), p);
1898				vm_page_lock_queues();
1899				vm_page_wakeup(p);
1900			}
1901			vm_page_unlock_queues();
1902			objpgs -= 1;
1903		}
1904	} else {
1905		/*
1906		 * else lookup the pages one-by-one.
1907		 */
1908		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
1909			/*
1910			 * don't allow an madvise to blow away our really
1911			 * free pages allocating pv entries.
1912			 */
1913			if ((limit & MAP_PREFAULT_MADVISE) &&
1914			    cnt.v_free_count < cnt.v_free_reserved) {
1915				break;
1916			}
1917			p = vm_page_lookup(object, tmpidx + pindex);
1918			if (p == NULL)
1919				continue;
1920			vm_page_lock_queues();
1921			if ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL &&
1922				(p->busy == 0) &&
1923			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1924				if ((p->queue - p->pc) == PQ_CACHE)
1925					vm_page_deactivate(p);
1926				vm_page_busy(p);
1927				vm_page_unlock_queues();
1928				pmap_enter_quick(pmap,
1929						 addr + ia64_ptob(tmpidx), p);
1930				vm_page_lock_queues();
1931				vm_page_wakeup(p);
1932			}
1933			vm_page_unlock_queues();
1934		}
1935	}
1936	pmap_install(oldpmap);
1937	return;
1938}
1939
1940/*
1941 * pmap_prefault provides a quick way of clustering
1942 * pagefaults into a processes address space.  It is a "cousin"
1943 * of pmap_object_init_pt, except it runs at page fault time instead
1944 * of mmap time.
1945 */
1946#define PFBAK 4
1947#define PFFOR 4
1948#define PAGEORDER_SIZE (PFBAK+PFFOR)
1949
1950static int pmap_prefault_pageorder[] = {
1951	-1 * PAGE_SIZE, 1 * PAGE_SIZE,
1952	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
1953	-3 * PAGE_SIZE, 3 * PAGE_SIZE,
1954	-4 * PAGE_SIZE, 4 * PAGE_SIZE
1955};
1956
1957void
1958pmap_prefault(pmap, addra, entry)
1959	pmap_t pmap;
1960	vm_offset_t addra;
1961	vm_map_entry_t entry;
1962{
1963	int i;
1964	vm_offset_t starta;
1965	vm_offset_t addr;
1966	vm_pindex_t pindex;
1967	vm_page_t m, mpte;
1968	vm_object_t object;
1969
1970	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)))
1971		return;
1972
1973	object = entry->object.vm_object;
1974
1975	starta = addra - PFBAK * PAGE_SIZE;
1976	if (starta < entry->start) {
1977		starta = entry->start;
1978	} else if (starta > addra) {
1979		starta = 0;
1980	}
1981
1982	mpte = NULL;
1983	for (i = 0; i < PAGEORDER_SIZE; i++) {
1984		vm_object_t lobject;
1985		struct ia64_lpte *pte;
1986
1987		addr = addra + pmap_prefault_pageorder[i];
1988		if (addr > addra + (PFFOR * PAGE_SIZE))
1989			addr = 0;
1990
1991		if (addr < starta || addr >= entry->end)
1992			continue;
1993
1994		pte = pmap_find_vhpt(addr);
1995		if (pte && pte->pte_p)
1996			continue;
1997
1998		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
1999		lobject = object;
2000		for (m = vm_page_lookup(lobject, pindex);
2001		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
2002		    lobject = lobject->backing_object) {
2003			if (lobject->backing_object_offset & PAGE_MASK)
2004				break;
2005			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
2006			m = vm_page_lookup(lobject->backing_object, pindex);
2007		}
2008
2009		/*
2010		 * give-up when a page is not in memory
2011		 */
2012		if (m == NULL)
2013			break;
2014		vm_page_lock_queues();
2015		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2016			(m->busy == 0) &&
2017		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2018
2019			if ((m->queue - m->pc) == PQ_CACHE) {
2020				vm_page_deactivate(m);
2021			}
2022			vm_page_busy(m);
2023			vm_page_unlock_queues();
2024			pmap_enter_quick(pmap, addr, m);
2025			vm_page_lock_queues();
2026			vm_page_wakeup(m);
2027		}
2028		vm_page_unlock_queues();
2029	}
2030}
2031
2032/*
2033 *	Routine:	pmap_change_wiring
2034 *	Function:	Change the wiring attribute for a map/virtual-address
2035 *			pair.
2036 *	In/out conditions:
2037 *			The mapping must already exist in the pmap.
2038 */
2039void
2040pmap_change_wiring(pmap, va, wired)
2041	register pmap_t pmap;
2042	vm_offset_t va;
2043	boolean_t wired;
2044{
2045	pmap_t oldpmap;
2046	struct ia64_lpte *pte;
2047
2048	if (pmap == NULL)
2049		return;
2050
2051	oldpmap = pmap_install(pmap);
2052
2053	pte = pmap_find_vhpt(va);
2054
2055	if (wired && !pmap_pte_w(pte))
2056		pmap->pm_stats.wired_count++;
2057	else if (!wired && pmap_pte_w(pte))
2058		pmap->pm_stats.wired_count--;
2059
2060	/*
2061	 * Wiring is not a hardware characteristic so there is no need to
2062	 * invalidate TLB.
2063	 */
2064	pmap_pte_set_w(pte, wired);
2065
2066	pmap_install(oldpmap);
2067}
2068
2069
2070
2071/*
2072 *	Copy the range specified by src_addr/len
2073 *	from the source map to the range dst_addr/len
2074 *	in the destination map.
2075 *
2076 *	This routine is only advisory and need not do anything.
2077 */
2078
2079void
2080pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2081	  vm_offset_t src_addr)
2082{
2083}
2084
2085
2086/*
2087 *	pmap_zero_page zeros the specified hardware page by
2088 *	mapping it into virtual memory and using bzero to clear
2089 *	its contents.
2090 */
2091
2092void
2093pmap_zero_page(vm_page_t m)
2094{
2095	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2096	bzero((caddr_t) va, PAGE_SIZE);
2097}
2098
2099
2100/*
2101 *	pmap_zero_page_area zeros the specified hardware page by
2102 *	mapping it into virtual memory and using bzero to clear
2103 *	its contents.
2104 *
2105 *	off and size must reside within a single page.
2106 */
2107
2108void
2109pmap_zero_page_area(vm_page_t m, int off, int size)
2110{
2111	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2112	bzero((char *)(caddr_t)va + off, size);
2113}
2114
2115
2116/*
2117 *	pmap_zero_page_idle zeros the specified hardware page by
2118 *	mapping it into virtual memory and using bzero to clear
2119 *	its contents.  This is for the vm_idlezero process.
2120 */
2121
2122void
2123pmap_zero_page_idle(vm_page_t m)
2124{
2125	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2126	bzero((caddr_t) va, PAGE_SIZE);
2127}
2128
2129
2130/*
2131 *	pmap_copy_page copies the specified (machine independent)
2132 *	page by mapping the page into virtual memory and using
2133 *	bcopy to copy the page, one machine dependent page at a
2134 *	time.
2135 */
2136void
2137pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2138{
2139	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
2140	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
2141	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
2142}
2143
2144/*
2145 * Returns true if the pmap's pv is one of the first
2146 * 16 pvs linked to from this page.  This count may
2147 * be changed upwards or downwards in the future; it
2148 * is only necessary that true be returned for a small
2149 * subset of pmaps for proper page aging.
2150 */
2151boolean_t
2152pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2153{
2154	pv_entry_t pv;
2155	int loops = 0;
2156	int s;
2157
2158	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2159		return FALSE;
2160
2161	s = splvm();
2162
2163	/*
2164	 * Not found, check current mappings returning immediately if found.
2165	 */
2166	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2167		if (pv->pv_pmap == pmap) {
2168			splx(s);
2169			return TRUE;
2170		}
2171		loops++;
2172		if (loops >= 16)
2173			break;
2174	}
2175	splx(s);
2176	return (FALSE);
2177}
2178
2179#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2180/*
2181 * Remove all pages from specified address space
2182 * this aids process exit speeds.  Also, this code
2183 * is special cased for current process only, but
2184 * can have the more generic (and slightly slower)
2185 * mode enabled.  This is much faster than pmap_remove
2186 * in the case of running down an entire address space.
2187 */
2188void
2189pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2190{
2191	pv_entry_t pv, npv;
2192	int s;
2193
2194#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2195	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
2196		printf("warning: pmap_remove_pages called with non-current pmap\n");
2197		return;
2198	}
2199#endif
2200
2201	s = splvm();
2202	for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
2203		pv;
2204		pv = npv) {
2205		struct ia64_lpte *pte;
2206
2207		npv = TAILQ_NEXT(pv, pv_plist);
2208
2209		if (pv->pv_va >= eva || pv->pv_va < sva) {
2210			continue;
2211		}
2212
2213		pte = pmap_find_vhpt(pv->pv_va);
2214		if (!pte)
2215			panic("pmap_remove_pages: page on pm_pvlist has no pte\n");
2216
2217
2218/*
2219 * We cannot remove wired pages from a process' mapping at this time
2220 */
2221		if (pte->pte_ig & PTE_IG_WIRED) {
2222			continue;
2223		}
2224
2225		pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
2226	}
2227	splx(s);
2228
2229	pmap_invalidate_all(pmap);
2230}
2231
2232/*
2233 *      pmap_page_protect:
2234 *
2235 *      Lower the permission for all mappings to a given page.
2236 */
2237void
2238pmap_page_protect(vm_page_t m, vm_prot_t prot)
2239{
2240	pv_entry_t pv;
2241
2242	if ((prot & VM_PROT_WRITE) != 0)
2243		return;
2244	if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
2245		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2246			int newprot = pte_prot(pv->pv_pmap, prot);
2247			pmap_t oldpmap = pmap_install(pv->pv_pmap);
2248			struct ia64_lpte *pte;
2249			pte = pmap_find_vhpt(pv->pv_va);
2250			pmap_pte_set_prot(pte, newprot);
2251			pmap_update_vhpt(pte, pv->pv_va);
2252			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2253			pmap_install(oldpmap);
2254		}
2255	} else {
2256		pmap_remove_all(m);
2257	}
2258}
2259
2260vm_offset_t
2261pmap_phys_address(int ppn)
2262{
2263	return (ia64_ptob(ppn));
2264}
2265
2266/*
2267 *	pmap_ts_referenced:
2268 *
2269 *	Return a count of reference bits for a page, clearing those bits.
2270 *	It is not necessary for every reference bit to be cleared, but it
2271 *	is necessary that 0 only be returned when there are truly no
2272 *	reference bits set.
2273 *
2274 *	XXX: The exact number of bits to check and clear is a matter that
2275 *	should be tested and standardized at some point in the future for
2276 *	optimal aging of shared pages.
2277 */
2278int
2279pmap_ts_referenced(vm_page_t m)
2280{
2281	pv_entry_t pv;
2282	int count = 0;
2283
2284	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2285		return 0;
2286
2287	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2288		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2289		struct ia64_lpte *pte;
2290		pte = pmap_find_vhpt(pv->pv_va);
2291		if (pte->pte_a) {
2292			count++;
2293			pte->pte_a = 0;
2294			pmap_update_vhpt(pte, pv->pv_va);
2295			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2296		}
2297		pmap_install(oldpmap);
2298	}
2299
2300	return count;
2301}
2302
2303#if 0
2304/*
2305 *	pmap_is_referenced:
2306 *
2307 *	Return whether or not the specified physical page was referenced
2308 *	in any physical maps.
2309 */
2310static boolean_t
2311pmap_is_referenced(vm_page_t m)
2312{
2313	pv_entry_t pv;
2314
2315	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2316		return FALSE;
2317
2318	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2319		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2320		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2321		pmap_install(oldpmap);
2322		if (pte->pte_a)
2323			return 1;
2324	}
2325
2326	return 0;
2327}
2328#endif
2329
2330/*
2331 *	pmap_is_modified:
2332 *
2333 *	Return whether or not the specified physical page was modified
2334 *	in any physical maps.
2335 */
2336boolean_t
2337pmap_is_modified(vm_page_t m)
2338{
2339	pv_entry_t pv;
2340
2341	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2342		return FALSE;
2343
2344	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2345		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2346		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2347		pmap_install(oldpmap);
2348		if (pte->pte_d)
2349			return 1;
2350	}
2351
2352	return 0;
2353}
2354
2355/*
2356 *	Clear the modify bits on the specified physical page.
2357 */
2358void
2359pmap_clear_modify(vm_page_t m)
2360{
2361	pv_entry_t pv;
2362
2363	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2364		return;
2365
2366	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2367		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2368		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2369		if (pte->pte_d) {
2370			pte->pte_d = 0;
2371			pmap_update_vhpt(pte, pv->pv_va);
2372			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2373		}
2374		pmap_install(oldpmap);
2375	}
2376}
2377
2378/*
2379 *	pmap_clear_reference:
2380 *
2381 *	Clear the reference bit on the specified physical page.
2382 */
2383void
2384pmap_clear_reference(vm_page_t m)
2385{
2386	pv_entry_t pv;
2387
2388	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2389		return;
2390
2391	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2392		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2393		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2394		if (pte->pte_a) {
2395			pte->pte_a = 0;
2396			pmap_update_vhpt(pte, pv->pv_va);
2397			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2398		}
2399		pmap_install(oldpmap);
2400	}
2401}
2402
2403/*
2404 * Miscellaneous support routines follow
2405 */
2406
2407static void
2408ia64_protection_init()
2409{
2410	int prot, *kp, *up;
2411
2412	kp = protection_codes[0];
2413	up = protection_codes[1];
2414
2415	for (prot = 0; prot < 8; prot++) {
2416		switch (prot) {
2417		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
2418			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2419			*up++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2420			break;
2421
2422		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
2423			*kp++ = (PTE_AR_X_RX << 2) | PTE_PL_KERN;
2424			*up++ = (PTE_AR_X_RX << 2) | PTE_PL_USER;
2425			break;
2426
2427		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
2428			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2429			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2430			break;
2431
2432		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
2433			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2434			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2435			break;
2436
2437		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
2438			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2439			*up++ = (PTE_AR_R << 2) | PTE_PL_USER;
2440			break;
2441
2442		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
2443			*kp++ = (PTE_AR_RX << 2) | PTE_PL_KERN;
2444			*up++ = (PTE_AR_RX << 2) | PTE_PL_USER;
2445			break;
2446
2447		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
2448			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2449			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2450			break;
2451
2452		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
2453			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2454			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2455			break;
2456		}
2457	}
2458}
2459
2460/*
2461 * Map a set of physical memory pages into the kernel virtual
2462 * address space. Return a pointer to where it is mapped. This
2463 * routine is intended to be used for mapping device memory,
2464 * NOT real memory.
2465 */
2466void *
2467pmap_mapdev(vm_offset_t pa, vm_size_t size)
2468{
2469	return (void*) IA64_PHYS_TO_RR6(pa);
2470}
2471
2472/*
2473 * 'Unmap' a range mapped by pmap_mapdev().
2474 */
2475void
2476pmap_unmapdev(vm_offset_t va, vm_size_t size)
2477{
2478	return;
2479}
2480
2481/*
2482 * perform the pmap work for mincore
2483 */
2484int
2485pmap_mincore(pmap_t pmap, vm_offset_t addr)
2486{
2487	pmap_t oldpmap;
2488	struct ia64_lpte *pte;
2489	int val = 0;
2490
2491	oldpmap = pmap_install(pmap);
2492	pte = pmap_find_vhpt(addr);
2493	pmap_install(oldpmap);
2494
2495	if (!pte)
2496		return 0;
2497
2498	if (pmap_pte_v(pte)) {
2499		vm_page_t m;
2500		vm_offset_t pa;
2501
2502		val = MINCORE_INCORE;
2503		if ((pte->pte_ig & PTE_IG_MANAGED) == 0)
2504			return val;
2505
2506		pa = pmap_pte_pa(pte);
2507
2508		m = PHYS_TO_VM_PAGE(pa);
2509
2510		/*
2511		 * Modified by us
2512		 */
2513		if (pte->pte_d)
2514			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2515		/*
2516		 * Modified by someone
2517		 */
2518		else if (pmap_is_modified(m))
2519			val |= MINCORE_MODIFIED_OTHER;
2520		/*
2521		 * Referenced by us
2522		 */
2523		if (pte->pte_a)
2524			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2525
2526		/*
2527		 * Referenced by someone
2528		 */
2529		else if (pmap_ts_referenced(m)) {
2530			val |= MINCORE_REFERENCED_OTHER;
2531			vm_page_flag_set(m, PG_REFERENCED);
2532		}
2533	}
2534	return val;
2535}
2536
2537void
2538pmap_activate(struct thread *td)
2539{
2540	pmap_install(vmspace_pmap(td->td_proc->p_vmspace));
2541}
2542
2543pmap_t
2544pmap_install(pmap_t pmap)
2545{
2546	pmap_t oldpmap;
2547	int i;
2548
2549	critical_enter();
2550
2551	oldpmap = PCPU_GET(current_pmap);
2552
2553	if (pmap == oldpmap || pmap == kernel_pmap) {
2554		critical_exit();
2555		return pmap;
2556	}
2557
2558	if (oldpmap) {
2559		atomic_clear_32(&pmap->pm_active, PCPU_GET(cpumask));
2560	}
2561
2562	PCPU_SET(current_pmap, pmap);
2563	if (!pmap) {
2564		/*
2565		 * RIDs 0..4 have no mappings to make sure we generate
2566		 * page faults on accesses.
2567		 */
2568		ia64_set_rr(IA64_RR_BASE(0), (0 << 8)|(PAGE_SHIFT << 2)|1);
2569		ia64_set_rr(IA64_RR_BASE(1), (1 << 8)|(PAGE_SHIFT << 2)|1);
2570		ia64_set_rr(IA64_RR_BASE(2), (2 << 8)|(PAGE_SHIFT << 2)|1);
2571		ia64_set_rr(IA64_RR_BASE(3), (3 << 8)|(PAGE_SHIFT << 2)|1);
2572		ia64_set_rr(IA64_RR_BASE(4), (4 << 8)|(PAGE_SHIFT << 2)|1);
2573		critical_exit();
2574		return oldpmap;
2575	}
2576
2577	atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask));
2578
2579	for (i = 0; i < 5; i++)
2580		ia64_set_rr(IA64_RR_BASE(i),
2581			    (pmap->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2582
2583	critical_exit();
2584	return oldpmap;
2585}
2586
2587vm_offset_t
2588pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2589{
2590
2591	return addr;
2592}
2593
2594#include "opt_ddb.h"
2595
2596#ifdef DDB
2597
2598#include <ddb/ddb.h>
2599
2600static const char*	psnames[] = {
2601	"1B",	"2B",	"4B",	"8B",
2602	"16B",	"32B",	"64B",	"128B",
2603	"256B",	"512B",	"1K",	"2K",
2604	"4K",	"8K",	"16K",	"32K",
2605	"64K",	"128K",	"256K",	"512K",
2606	"1M",	"2M",	"4M",	"8M",
2607	"16M",	"32M",	"64M",	"128M",
2608	"256M",	"512M",	"1G",	"2G"
2609};
2610
2611static void
2612print_trs(int type)
2613{
2614	struct ia64_pal_result	res;
2615	int			i, maxtr;
2616	struct {
2617		struct ia64_pte	pte;
2618		struct ia64_itir itir;
2619		struct ia64_ifa ifa;
2620		struct ia64_rr	rr;
2621	}			buf;
2622	static const char*	manames[] = {
2623		"WB",	"bad",	"bad",	"bad",
2624		"UC",	"UCE",	"WC",	"NaT",
2625
2626	};
2627
2628	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2629	if (res.pal_status != 0) {
2630		db_printf("Can't get VM summary\n");
2631		return;
2632	}
2633
2634	if (type == 0)
2635		maxtr = (res.pal_result[0] >> 40) & 0xff;
2636	else
2637		maxtr = (res.pal_result[0] >> 32) & 0xff;
2638
2639	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2640	for (i = 0; i <= maxtr; i++) {
2641		bzero(&buf, sizeof(buf));
2642		res = ia64_call_pal_stacked_physical
2643			(PAL_VM_TR_READ, i, type, ia64_tpa((u_int64_t) &buf));
2644		if (!(res.pal_result[0] & 1))
2645			buf.pte.pte_ar = 0;
2646		if (!(res.pal_result[0] & 2))
2647			buf.pte.pte_pl = 0;
2648		if (!(res.pal_result[0] & 4))
2649			buf.pte.pte_d = 0;
2650		if (!(res.pal_result[0] & 8))
2651			buf.pte.pte_ma = 0;
2652		db_printf(
2653			"%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s %d %06x\n",
2654			buf.ifa.ifa_ig & 1,
2655			buf.rr.rr_rid,
2656			buf.ifa.ifa_vpn,
2657			buf.pte.pte_ppn,
2658			psnames[buf.itir.itir_ps],
2659			buf.pte.pte_ed,
2660			buf.pte.pte_ar,
2661			buf.pte.pte_pl,
2662			buf.pte.pte_d,
2663			buf.pte.pte_a,
2664			manames[buf.pte.pte_ma],
2665			buf.pte.pte_p,
2666			buf.itir.itir_key);
2667	}
2668}
2669
2670DB_COMMAND(itr, db_itr)
2671{
2672	print_trs(0);
2673}
2674
2675DB_COMMAND(dtr, db_dtr)
2676{
2677	print_trs(1);
2678}
2679
2680DB_COMMAND(rr, db_rr)
2681{
2682	int i;
2683	u_int64_t t;
2684	struct ia64_rr rr;
2685
2686	printf("RR RID    PgSz VE\n");
2687	for (i = 0; i < 8; i++) {
2688		__asm __volatile ("mov %0=rr[%1]"
2689				  : "=r"(t)
2690				  : "r"(IA64_RR_BASE(i)));
2691		*(u_int64_t *) &rr = t;
2692		printf("%d  %06x %4s %d\n",
2693		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2694	}
2695}
2696
2697DB_COMMAND(thash, db_thash)
2698{
2699	if (!have_addr)
2700		return;
2701
2702	db_printf("%p\n", (void *) ia64_thash(addr));
2703}
2704
2705DB_COMMAND(ttag, db_ttag)
2706{
2707	if (!have_addr)
2708		return;
2709
2710	db_printf("0x%lx\n", ia64_ttag(addr));
2711}
2712
2713#endif
2714