pmap.c revision 109623
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 * $FreeBSD: head/sys/ia64/ia64/pmap.c 109623 2003-01-21 08:56:16Z alfred $
47 */
48
49/*
50 *	Manages physical address maps.
51 *
52 *	In addition to hardware address maps, this
53 *	module is called upon to provide software-use-only
54 *	maps which may or may not be stored in the same
55 *	form as hardware maps.  These pseudo-maps are
56 *	used to store intermediate results from copy
57 *	operations to and from address spaces.
58 *
59 *	Since the information managed by this module is
60 *	also stored by the logical address mapping module,
61 *	this module may throw away valid virtual-to-physical
62 *	mappings at almost any time.  However, invalidations
63 *	of virtual-to-physical mappings must be done as
64 *	requested.
65 *
66 *	In order to cope with hardware architectures which
67 *	make virtual-to-physical map invalidates expensive,
68 *	this module may delay invalidate or reduced protection
69 *	operations until such time as they are actually
70 *	necessary.  This module is given full information as
71 *	to which processors are currently using which maps,
72 *	and to when physical maps must be made correct.
73 */
74
75/*
76 * Following the Linux model, region IDs are allocated in groups of
77 * eight so that a single region ID can be used for as many RRs as we
78 * want by encoding the RR number into the low bits of the ID.
79 *
80 * We reserve region ID 0 for the kernel and allocate the remaining
81 * IDs for user pmaps.
82 *
83 * Region 0..4
84 *	User virtually mapped
85 *
86 * Region 5
87 *	Kernel virtually mapped
88 *
89 * Region 6
90 *	Kernel physically mapped uncacheable
91 *
92 * Region 7
93 *	Kernel physically mapped cacheable
94 */
95
96#include <sys/param.h>
97#include <sys/kernel.h>
98#include <sys/lock.h>
99#include <sys/malloc.h>
100#include <sys/mman.h>
101#include <sys/msgbuf.h>
102#include <sys/mutex.h>
103#include <sys/proc.h>
104#include <sys/sx.h>
105#include <sys/systm.h>
106#include <sys/vmmeter.h>
107#include <sys/smp.h>
108#include <sys/sysctl.h>
109
110#include <vm/vm.h>
111#include <vm/vm_param.h>
112#include <vm/vm_kern.h>
113#include <vm/vm_page.h>
114#include <vm/vm_map.h>
115#include <vm/vm_object.h>
116#include <vm/vm_extern.h>
117#include <vm/vm_pageout.h>
118#include <vm/vm_pager.h>
119#include <vm/uma.h>
120#include <vm/uma_int.h>
121
122#include <sys/user.h>
123
124#include <machine/pal.h>
125#include <machine/md_var.h>
126
127MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
128
129#ifndef PMAP_SHPGPERPROC
130#define PMAP_SHPGPERPROC 200
131#endif
132
133#if defined(DIAGNOSTIC)
134#define PMAP_DIAGNOSTIC
135#endif
136
137#define MINPV 2048	/* Preallocate at least this many */
138#define MAXPV 20480	/* But no more than this */
139
140#if 0
141#define PMAP_DIAGNOSTIC
142#define PMAP_DEBUG
143#endif
144
145#if !defined(PMAP_DIAGNOSTIC)
146#define PMAP_INLINE __inline
147#else
148#define PMAP_INLINE
149#endif
150
151/*
152 * Get PDEs and PTEs for user/kernel address space
153 */
154#define pmap_pte_w(pte)		((pte)->pte_ig & PTE_IG_WIRED)
155#define pmap_pte_managed(pte)	((pte)->pte_ig & PTE_IG_MANAGED)
156#define pmap_pte_v(pte)		((pte)->pte_p)
157#define pmap_pte_pa(pte)	(((pte)->pte_ppn) << 12)
158#define pmap_pte_prot(pte)	(((pte)->pte_ar << 2) | (pte)->pte_pl)
159
160#define pmap_pte_set_w(pte, v) ((v)?((pte)->pte_ig |= PTE_IG_WIRED) \
161				:((pte)->pte_ig &= ~PTE_IG_WIRED))
162#define pmap_pte_set_prot(pte, v) do {		\
163    (pte)->pte_ar = v >> 2;			\
164    (pte)->pte_pl = v & 3;			\
165} while (0)
166
167/*
168 * Given a map and a machine independent protection code,
169 * convert to an ia64 protection code.
170 */
171#define pte_prot(m, p)		(protection_codes[m == kernel_pmap ? 0 : 1][p])
172#define pte_prot_pl(m, p)	(pte_prot(m, p) & 3)
173#define pte_prot_ar(m, p)	(pte_prot(m, p) >> 2)
174int	protection_codes[2][8];
175
176/*
177 * Return non-zero if this pmap is currently active
178 */
179#define pmap_isactive(pmap)	(pmap->pm_active)
180
181/*
182 * Statically allocated kernel pmap
183 */
184struct pmap kernel_pmap_store;
185
186vm_offset_t avail_start;	/* PA of first available physical page */
187vm_offset_t avail_end;		/* PA of last available physical page */
188vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
189vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
190static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
191
192vm_offset_t vhpt_base, vhpt_size;
193
194/*
195 * We use an object to own the kernel's 'page tables'. For simplicity,
196 * we use one page directory to index a set of pages containing
197 * ia64_lptes. This gives us up to 2Gb of kernel virtual space.
198 */
199static vm_object_t kptobj;
200static int nkpt;
201static struct ia64_lpte **kptdir;
202#define KPTE_DIR_INDEX(va) \
203	((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
204#define KPTE_PTE_INDEX(va) \
205	((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
206#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
207
208vm_offset_t kernel_vm_end;
209
210/*
211 * Values for ptc.e. XXX values for SKI.
212 */
213static u_int64_t pmap_ptc_e_base = 0x100000000;
214static u_int64_t pmap_ptc_e_count1 = 3;
215static u_int64_t pmap_ptc_e_count2 = 2;
216static u_int64_t pmap_ptc_e_stride1 = 0x2000;
217static u_int64_t pmap_ptc_e_stride2 = 0x100000000;
218
219/*
220 * Data for the RID allocator
221 */
222static u_int64_t *pmap_ridbusy;
223static int pmap_ridmax, pmap_ridcount;
224struct mtx pmap_ridmutex;
225
226/*
227 * Data for the pv entry allocation mechanism
228 */
229static uma_zone_t pvzone;
230static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
231int pmap_pagedaemon_waken;
232static struct pv_entry *pvbootentries;
233static int pvbootnext, pvbootmax;
234
235/*
236 * Data for allocating PTEs for user processes.
237 */
238static uma_zone_t ptezone;
239
240/*
241 * VHPT instrumentation.
242 */
243static int pmap_vhpt_inserts;
244static int pmap_vhpt_collisions;
245static int pmap_vhpt_resident;
246SYSCTL_DECL(_vm_stats);
247SYSCTL_NODE(_vm_stats, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
248SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
249	   &pmap_vhpt_inserts, 0, "");
250SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, collisions, CTLFLAG_RD,
251	   &pmap_vhpt_collisions, 0, "");
252SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, resident, CTLFLAG_RD,
253	   &pmap_vhpt_resident, 0, "");
254
255static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
256static pv_entry_t get_pv_entry(void);
257static void	ia64_protection_init(void);
258
259static void	pmap_invalidate_all(pmap_t pmap);
260static void	pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m);
261
262vm_offset_t
263pmap_steal_memory(vm_size_t size)
264{
265	vm_size_t bank_size;
266	vm_offset_t pa, va;
267
268	size = round_page(size);
269
270	bank_size = phys_avail[1] - phys_avail[0];
271	while (size > bank_size) {
272		int i;
273		for (i = 0; phys_avail[i+2]; i+= 2) {
274			phys_avail[i] = phys_avail[i+2];
275			phys_avail[i+1] = phys_avail[i+3];
276		}
277		phys_avail[i] = 0;
278		phys_avail[i+1] = 0;
279		if (!phys_avail[0])
280			panic("pmap_steal_memory: out of memory");
281		bank_size = phys_avail[1] - phys_avail[0];
282	}
283
284	pa = phys_avail[0];
285	phys_avail[0] += size;
286
287	va = IA64_PHYS_TO_RR7(pa);
288	bzero((caddr_t) va, size);
289	return va;
290}
291
292/*
293 *	Bootstrap the system enough to run with virtual memory.
294 */
295void
296pmap_bootstrap()
297{
298	int i, j, count, ridbits;
299	struct ia64_pal_result res;
300
301	/*
302	 * Query the PAL Code to find the loop parameters for the
303	 * ptc.e instruction.
304	 */
305	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
306	if (res.pal_status != 0)
307		panic("Can't configure ptc.e parameters");
308	pmap_ptc_e_base = res.pal_result[0];
309	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
310	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
311	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
312	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
313	if (bootverbose)
314		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
315		       "stride1=0x%lx, stride2=0x%lx\n",
316		       pmap_ptc_e_base,
317		       pmap_ptc_e_count1,
318		       pmap_ptc_e_count2,
319		       pmap_ptc_e_stride1,
320		       pmap_ptc_e_stride2);
321
322	/*
323	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
324	 */
325	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
326	if (res.pal_status != 0) {
327		if (bootverbose)
328			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
329		ridbits = 18; /* guaranteed minimum */
330	} else {
331		ridbits = (res.pal_result[1] >> 8) & 0xff;
332		if (bootverbose)
333			printf("Processor supports %d Region ID bits\n",
334			       ridbits);
335	}
336	pmap_ridmax = (1 << ridbits);
337	pmap_ridcount = 8;
338	pmap_ridbusy = (u_int64_t *)
339		pmap_steal_memory(pmap_ridmax / 8);
340	bzero(pmap_ridbusy, pmap_ridmax / 8);
341	pmap_ridbusy[0] |= 0xff;
342	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
343
344	/*
345	 * Allocate some memory for initial kernel 'page tables'.
346	 */
347	kptdir = (struct ia64_lpte **) pmap_steal_memory(PAGE_SIZE);
348	for (i = 0; i < NKPT; i++) {
349		kptdir[i] = (struct ia64_lpte *) pmap_steal_memory(PAGE_SIZE);
350	}
351	nkpt = NKPT;
352
353	avail_start = phys_avail[0];
354	for (i = 0; phys_avail[i+2]; i+= 2) ;
355	avail_end = phys_avail[i+1];
356	count = i+2;
357
358	/*
359	 * Figure out a useful size for the VHPT, based on the size of
360	 * physical memory and try to locate a region which is large
361	 * enough to contain the VHPT (which must be a power of two in
362	 * size and aligned to a natural boundary).
363	 * Don't use the difference between avail_start and avail_end
364	 * as a measure for memory size. The address space is often
365	 * enough sparse, causing us to (try to) create a huge VHPT.
366	 */
367	vhpt_size = 15;
368	while ((1<<vhpt_size) < ia64_btop(Maxmem) * 32)
369		vhpt_size++;
370
371	vhpt_base = 0;
372	while (!vhpt_base) {
373		vm_offset_t mask;
374		if (bootverbose)
375			printf("Trying VHPT size 0x%lx\n", (1L<<vhpt_size));
376		mask = (1L << vhpt_size) - 1;
377		for (i = 0; i < count; i += 2) {
378			vm_offset_t base, limit;
379			base = (phys_avail[i] + mask) & ~mask;
380			limit = base + (1L << vhpt_size);
381			if (limit <= phys_avail[i+1])
382				/*
383				 * VHPT can fit in this region
384				 */
385				break;
386		}
387		if (!phys_avail[i]) {
388			/*
389			 * Can't fit, try next smaller size.
390			 */
391			vhpt_size--;
392		} else {
393			vhpt_base = (phys_avail[i] + mask) & ~mask;
394		}
395	}
396	if (vhpt_size < 15)
397		panic("Can't find space for VHPT");
398
399	if (bootverbose)
400		printf("Putting VHPT at %p\n", (void *) vhpt_base);
401	if (vhpt_base != phys_avail[i]) {
402		/*
403		 * Split this region.
404		 */
405		if (bootverbose)
406			printf("Splitting [%p-%p]\n",
407			       (void *) phys_avail[i],
408			       (void *) phys_avail[i+1]);
409		for (j = count; j > i; j -= 2) {
410			phys_avail[j] = phys_avail[j-2];
411			phys_avail[j+1] = phys_avail[j-2+1];
412		}
413		phys_avail[count+2] = 0;
414		phys_avail[count+3] = 0;
415		phys_avail[i+1] = vhpt_base;
416		phys_avail[i+2] = vhpt_base + (1L << vhpt_size);
417	} else {
418		phys_avail[i] = vhpt_base + (1L << vhpt_size);
419	}
420
421	vhpt_base = IA64_PHYS_TO_RR7(vhpt_base);
422	bzero((void *) vhpt_base, (1L << vhpt_size));
423	__asm __volatile("mov cr.pta=%0;; srlz.i;;"
424			 :: "r" (vhpt_base + (1<<8) + (vhpt_size<<2) + 1));
425
426	virtual_avail = IA64_RR_BASE(5);
427	virtual_end = IA64_RR_BASE(6)-1;
428
429	/*
430	 * Initialize protection array.
431	 */
432	ia64_protection_init();
433
434	/*
435	 * Initialize the kernel pmap (which is statically allocated).
436	 */
437	for (i = 0; i < 5; i++)
438		kernel_pmap->pm_rid[i] = 0;
439	kernel_pmap->pm_active = 1;
440	TAILQ_INIT(&kernel_pmap->pm_pvlist);
441	PCPU_SET(current_pmap, kernel_pmap);
442
443	/*
444	 * Region 5 is mapped via the vhpt.
445	 */
446	ia64_set_rr(IA64_RR_BASE(5),
447		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
448
449	/*
450	 * Region 6 is direct mapped UC and region 7 is direct mapped
451	 * WC. The details of this is controlled by the Alt {I,D}TLB
452	 * handlers. Here we just make sure that they have the largest
453	 * possible page size to minimise TLB usage.
454	 */
455	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (28 << 2));
456	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (28 << 2));
457
458	/*
459	 * Reserve some memory for allocating pvs while bootstrapping
460	 * the pv allocator. We need to have enough to cover mapping
461	 * the kmem_alloc region used to allocate the initial_pvs in
462	 * pmap_init. In general, the size of this region is
463	 * approximately (# physical pages) * (size of pv entry).
464	 */
465	pvbootmax = ((physmem * sizeof(struct pv_entry)) >> PAGE_SHIFT) + 128;
466	pvbootentries = (struct pv_entry *)
467		pmap_steal_memory(pvbootmax * sizeof(struct pv_entry));
468	pvbootnext = 0;
469
470	/*
471	 * Clear out any random TLB entries left over from booting.
472	 */
473	pmap_invalidate_all(kernel_pmap);
474}
475
476void *
477uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
478{
479	static vm_pindex_t color;
480	vm_page_t m;
481	int pflags;
482	void *va;
483
484	*flags = UMA_SLAB_PRIV;
485	if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT)
486		pflags = VM_ALLOC_INTERRUPT;
487	else
488		pflags = VM_ALLOC_SYSTEM;
489	if (wait & M_ZERO)
490		pflags |= VM_ALLOC_ZERO;
491
492	for (;;) {
493		m = vm_page_alloc(NULL, color++, pflags | VM_ALLOC_NOOBJ);
494		if (m == NULL) {
495			if (wait & M_NOWAIT)
496				return (NULL);
497			else
498				VM_WAIT;
499		} else
500			break;
501	}
502
503	va = (void *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
504	if ((m->flags & PG_ZERO) == 0)
505		bzero(va, PAGE_SIZE);
506	return (va);
507}
508
509void
510uma_small_free(void *mem, int size, u_int8_t flags)
511{
512	vm_page_t m;
513
514	m = PHYS_TO_VM_PAGE(IA64_RR_MASK((u_int64_t)mem));
515	vm_page_lock_queues();
516	vm_page_free(m);
517	vm_page_unlock_queues();
518}
519
520/*
521 *	Initialize the pmap module.
522 *	Called by vm_init, to initialize any structures that the pmap
523 *	system needs to map virtual memory.
524 *	pmap_init has been enhanced to support in a fairly consistant
525 *	way, discontiguous physical memory.
526 */
527void
528pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
529{
530	int i;
531	int initial_pvs;
532
533	/*
534	 * Allocate memory for random pmap data structures.  Includes the
535	 * pv_head_table.
536	 */
537
538	for(i = 0; i < vm_page_array_size; i++) {
539		vm_page_t m;
540
541		m = &vm_page_array[i];
542		TAILQ_INIT(&m->md.pv_list);
543		m->md.pv_list_count = 0;
544 	}
545
546	/*
547	 * Init the pv free list and the PTE free list.
548	 */
549	initial_pvs = vm_page_array_size;
550	if (initial_pvs < MINPV)
551		initial_pvs = MINPV;
552	if (initial_pvs > MAXPV)
553		initial_pvs = MAXPV;
554	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry),
555	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
556	uma_prealloc(pvzone, initial_pvs);
557
558	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
559	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
560	uma_prealloc(ptezone, initial_pvs);
561
562	/*
563	 * Create the object for the kernel's page tables.
564	 */
565	kptobj = vm_object_allocate(OBJT_DEFAULT, MAXKPT);
566
567	/*
568	 * Now it is safe to enable pv_table recording.
569	 */
570	pmap_initialized = TRUE;
571}
572
573/*
574 * Initialize the address space (zone) for the pv_entries.  Set a
575 * high water mark so that the system can recover from excessive
576 * numbers of pv entries.
577 */
578void
579pmap_init2()
580{
581	int shpgperproc = PMAP_SHPGPERPROC;
582
583	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
584	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
585	pv_entry_high_water = 9 * (pv_entry_max / 10);
586}
587
588
589/***************************************************
590 * Manipulate TLBs for a pmap
591 ***************************************************/
592
593static void
594pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
595{
596	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
597		("invalidating TLB for non-current pmap"));
598	ia64_ptc_g(va, PAGE_SHIFT << 2);
599}
600
601static void
602pmap_invalidate_all_1(void *arg)
603{
604	u_int64_t addr;
605	int i, j;
606	register_t psr;
607
608	psr = intr_disable();
609	addr = pmap_ptc_e_base;
610	for (i = 0; i < pmap_ptc_e_count1; i++) {
611		for (j = 0; j < pmap_ptc_e_count2; j++) {
612			ia64_ptc_e(addr);
613			addr += pmap_ptc_e_stride2;
614		}
615		addr += pmap_ptc_e_stride1;
616	}
617	intr_restore(psr);
618}
619
620static void
621pmap_invalidate_all(pmap_t pmap)
622{
623	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
624		("invalidating TLB for non-current pmap"));
625
626
627#ifdef SMP
628	smp_rendezvous(0, pmap_invalidate_all_1, 0, 0);
629#else
630	pmap_invalidate_all_1(0);
631#endif
632}
633
634static u_int32_t
635pmap_allocate_rid(void)
636{
637	int rid;
638
639	if (pmap_ridcount == pmap_ridmax)
640		panic("pmap_allocate_rid: All Region IDs used");
641
642	do {
643		rid = arc4random() & (pmap_ridmax - 1);
644	} while (pmap_ridbusy[rid / 64] & (1L << (rid & 63)));
645	pmap_ridbusy[rid / 64] |= (1L << (rid & 63));
646	pmap_ridcount++;
647
648	return rid;
649}
650
651static void
652pmap_free_rid(u_int32_t rid)
653{
654	mtx_lock(&pmap_ridmutex);
655	pmap_ridbusy[rid / 64] &= ~(1L << (rid & 63));
656	pmap_ridcount--;
657	mtx_unlock(&pmap_ridmutex);
658}
659
660static void
661pmap_ensure_rid(pmap_t pmap, vm_offset_t va)
662{
663	int rr;
664
665	rr = va >> 61;
666
667	/*
668	 * We get called for virtual addresses that may just as well be
669	 * kernel addresses (ie region 5, 6 or 7). Since the pm_rid field
670	 * only holds region IDs for user regions, we have to make sure
671	 * the region is within bounds.
672	 */
673	if (rr >= 5)
674		return;
675
676	if (pmap->pm_rid[rr])
677		return;
678
679	mtx_lock(&pmap_ridmutex);
680	pmap->pm_rid[rr] = pmap_allocate_rid();
681	if (pmap == PCPU_GET(current_pmap))
682		ia64_set_rr(IA64_RR_BASE(rr),
683			    (pmap->pm_rid[rr] << 8)|(PAGE_SHIFT << 2)|1);
684	mtx_unlock(&pmap_ridmutex);
685}
686
687/***************************************************
688 * Low level helper routines.....
689 ***************************************************/
690
691/*
692 * Install a pte into the VHPT
693 */
694static PMAP_INLINE void
695pmap_install_pte(struct ia64_lpte *vhpte, struct ia64_lpte *pte)
696{
697	u_int64_t *vhp, *p;
698
699	/* invalidate the pte */
700	atomic_set_64(&vhpte->pte_tag, 1L << 63);
701	ia64_mf();			/* make sure everyone sees */
702
703	vhp = (u_int64_t *) vhpte;
704	p = (u_int64_t *) pte;
705
706	vhp[0] = p[0];
707	vhp[1] = p[1];
708	vhp[2] = p[2];			/* sets ti to one */
709
710	ia64_mf();
711}
712
713/*
714 * Compare essential parts of pte.
715 */
716static PMAP_INLINE int
717pmap_equal_pte(struct ia64_lpte *pte1, struct ia64_lpte *pte2)
718{
719	return *(u_int64_t *) pte1 == *(u_int64_t *) pte2;
720}
721
722/*
723 * this routine defines the region(s) of memory that should
724 * not be tested for the modified bit.
725 */
726static PMAP_INLINE int
727pmap_track_modified(vm_offset_t va)
728{
729	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
730		return 1;
731	else
732		return 0;
733}
734
735#ifndef KSTACK_MAX_PAGES
736#define KSTACK_MAX_PAGES 32
737#endif
738
739/*
740 * Create the KSTACK for a new thread.
741 * This routine directly affects the fork perf for a process/thread.
742 */
743void
744pmap_new_thread(struct thread *td, int pages)
745{
746	vm_offset_t *ks;
747
748	/* Bounds check */
749	if (pages <= 1)
750		pages = KSTACK_PAGES;
751	else if (pages > KSTACK_MAX_PAGES)
752		pages = KSTACK_MAX_PAGES;
753
754	/*
755	 * Use contigmalloc for user area so that we can use a region
756	 * 7 address for it which makes it impossible to accidentally
757	 * lose when recording a trapframe.
758	 */
759	ks = contigmalloc(pages * PAGE_SIZE, M_PMAP, 0, 0ul,
760	    256*1024*1024 - 1, PAGE_SIZE, 256*1024*1024);
761	if (ks == NULL)
762		panic("pmap_new_thread: could not contigmalloc %d pages\n",
763		    pages);
764
765	td->td_md.md_kstackvirt = ks;
766	td->td_kstack = IA64_PHYS_TO_RR7(ia64_tpa((u_int64_t)ks));
767	td->td_kstack_pages = pages;
768}
769
770/*
771 * Dispose the KSTACK for a thread that has exited.
772 * This routine directly impacts the exit perf of a process/thread.
773 */
774void
775pmap_dispose_thread(struct thread *td)
776{
777	int pages;
778
779	pages = td->td_kstack_pages;
780	contigfree(td->td_md.md_kstackvirt, pages * PAGE_SIZE, M_PMAP);
781	td->td_md.md_kstackvirt = NULL;
782	td->td_kstack = 0;
783}
784
785/*
786 * Set up a variable sized alternate kstack.  This appears to be MI.
787 */
788void
789pmap_new_altkstack(struct thread *td, int pages)
790{
791
792	/*
793	 * Shuffle the original stack. Save the virtual kstack address
794	 * instead of the physical address because 1) we can derive the
795	 * physical address from the virtual address and 2) we need the
796	 * virtual address in pmap_dispose_thread.
797	 */
798	td->td_altkstack_obj = td->td_kstack_obj;
799	td->td_altkstack = (vm_offset_t)td->td_md.md_kstackvirt;
800	td->td_altkstack_pages = td->td_kstack_pages;
801
802	pmap_new_thread(td, pages);
803}
804
805void
806pmap_dispose_altkstack(struct thread *td)
807{
808
809	pmap_dispose_thread(td);
810
811	/*
812	 * Restore the original kstack. Note that td_altkstack holds the
813	 * virtual kstack address of the previous kstack.
814	 */
815	td->td_md.md_kstackvirt = (void*)td->td_altkstack;
816	td->td_kstack = IA64_PHYS_TO_RR7(ia64_tpa(td->td_altkstack));
817	td->td_kstack_obj = td->td_altkstack_obj;
818	td->td_kstack_pages = td->td_altkstack_pages;
819	td->td_altkstack = 0;
820	td->td_altkstack_obj = NULL;
821	td->td_altkstack_pages = 0;
822}
823
824/*
825 * Allow the KSTACK for a thread to be prejudicially paged out.
826 */
827void
828pmap_swapout_thread(struct thread *td)
829{
830}
831
832/*
833 * Bring the KSTACK for a specified thread back in.
834 */
835void
836pmap_swapin_thread(struct thread *td)
837{
838}
839
840/***************************************************
841 * Page table page management routines.....
842 ***************************************************/
843
844void
845pmap_pinit0(struct pmap *pmap)
846{
847	/* kernel_pmap is the same as any other pmap. */
848	pmap_pinit(pmap);
849}
850
851/*
852 * Initialize a preallocated and zeroed pmap structure,
853 * such as one in a vmspace structure.
854 */
855void
856pmap_pinit(struct pmap *pmap)
857{
858	int i;
859
860	pmap->pm_flags = 0;
861	for (i = 0; i < 5; i++)
862		pmap->pm_rid[i] = 0;
863	pmap->pm_ptphint = NULL;
864	pmap->pm_active = 0;
865	TAILQ_INIT(&pmap->pm_pvlist);
866	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
867}
868
869/*
870 * Wire in kernel global address entries.  To avoid a race condition
871 * between pmap initialization and pmap_growkernel, this procedure
872 * should be called after the vmspace is attached to the process
873 * but before this pmap is activated.
874 */
875void
876pmap_pinit2(struct pmap *pmap)
877{
878}
879
880/***************************************************
881* Pmap allocation/deallocation routines.
882 ***************************************************/
883
884/*
885 * Release any resources held by the given physical map.
886 * Called when a pmap initialized by pmap_pinit is being released.
887 * Should only be called if the map contains no valid mappings.
888 */
889void
890pmap_release(pmap_t pmap)
891{
892	int i;
893
894	for (i = 0; i < 5; i++)
895		if (pmap->pm_rid[i])
896			pmap_free_rid(pmap->pm_rid[i]);
897}
898
899/*
900 * grow the number of kernel page table entries, if needed
901 */
902void
903pmap_growkernel(vm_offset_t addr)
904{
905	struct ia64_lpte *ptepage;
906	vm_page_t nkpg;
907
908	if (kernel_vm_end == 0) {
909		kernel_vm_end = nkpt * PAGE_SIZE * NKPTEPG
910			+ IA64_RR_BASE(5);
911	}
912	addr = (addr + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
913	while (kernel_vm_end < addr) {
914		if (kptdir[KPTE_DIR_INDEX(kernel_vm_end)]) {
915			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG)
916				& ~(PAGE_SIZE * NKPTEPG - 1);
917			continue;
918		}
919
920		/*
921		 * We could handle more by increasing the size of kptdir.
922		 */
923		if (nkpt == MAXKPT)
924			panic("pmap_growkernel: out of kernel address space");
925
926		/*
927		 * This index is bogus, but out of the way
928		 */
929		nkpg = vm_page_alloc(kptobj, nkpt,
930		    VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
931		if (!nkpg)
932			panic("pmap_growkernel: no memory to grow kernel");
933
934		nkpt++;
935		ptepage = (struct ia64_lpte *)
936			IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
937		bzero(ptepage, PAGE_SIZE);
938		kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage;
939
940		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
941	}
942}
943
944/***************************************************
945 * page management routines.
946 ***************************************************/
947
948/*
949 * free the pv_entry back to the free list
950 */
951static PMAP_INLINE void
952free_pv_entry(pv_entry_t pv)
953{
954	pv_entry_count--;
955	uma_zfree(pvzone, pv);
956}
957
958/*
959 * get a new pv_entry, allocating a block from the system
960 * when needed.
961 * the memory allocation is performed bypassing the malloc code
962 * because of the possibility of allocations at interrupt time.
963 */
964static pv_entry_t
965get_pv_entry(void)
966{
967	pv_entry_count++;
968	if (pv_entry_high_water &&
969		(pv_entry_count > pv_entry_high_water) &&
970		(pmap_pagedaemon_waken == 0)) {
971		pmap_pagedaemon_waken = 1;
972		wakeup (&vm_pages_needed);
973	}
974	return uma_zalloc(pvzone, M_NOWAIT);
975}
976
977/*
978 * Add an ia64_lpte to the VHPT.
979 */
980static void
981pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
982{
983	struct ia64_lpte *vhpte;
984
985	pmap_vhpt_inserts++;
986	pmap_vhpt_resident++;
987
988	vhpte = (struct ia64_lpte *) ia64_thash(va);
989
990	if (vhpte->pte_chain)
991		pmap_vhpt_collisions++;
992
993	pte->pte_chain = vhpte->pte_chain;
994	vhpte->pte_chain = ia64_tpa((vm_offset_t) pte);
995
996	if (!vhpte->pte_p && pte->pte_p)
997		pmap_install_pte(vhpte, pte);
998	else
999		ia64_mf();
1000}
1001
1002/*
1003 * Update VHPT after a pte has changed.
1004 */
1005static void
1006pmap_update_vhpt(struct ia64_lpte *pte, vm_offset_t va)
1007{
1008	struct ia64_lpte *vhpte;
1009
1010	vhpte = (struct ia64_lpte *) ia64_thash(va);
1011
1012	if ((!vhpte->pte_p || vhpte->pte_tag == pte->pte_tag)
1013	    && pte->pte_p)
1014		pmap_install_pte(vhpte, pte);
1015}
1016
1017/*
1018 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
1019 * worked or an appropriate error code otherwise.
1020 */
1021static int
1022pmap_remove_vhpt(vm_offset_t va)
1023{
1024	struct ia64_lpte *pte;
1025	struct ia64_lpte *lpte;
1026	struct ia64_lpte *vhpte;
1027	u_int64_t tag;
1028	int error = ENOENT;
1029
1030	vhpte = (struct ia64_lpte *) ia64_thash(va);
1031
1032	/*
1033	 * If the VHPTE is invalid, there can't be a collision chain.
1034	 */
1035	if (!vhpte->pte_p) {
1036		KASSERT(!vhpte->pte_chain, ("bad vhpte"));
1037		printf("can't remove vhpt entry for 0x%lx\n", va);
1038		goto done;
1039	}
1040
1041	lpte = vhpte;
1042	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(vhpte->pte_chain);
1043	tag = ia64_ttag(va);
1044
1045	while (pte->pte_tag != tag) {
1046		lpte = pte;
1047		if (pte->pte_chain)
1048			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1049		else {
1050			printf("can't remove vhpt entry for 0x%lx\n", va);
1051			goto done;
1052		}
1053	}
1054
1055	/*
1056	 * Snip this pv_entry out of the collision chain.
1057	 */
1058	lpte->pte_chain = pte->pte_chain;
1059
1060	/*
1061	 * If the VHPTE matches as well, change it to map the first
1062	 * element from the chain if there is one.
1063	 */
1064	if (vhpte->pte_tag == tag) {
1065		if (vhpte->pte_chain) {
1066			pte = (struct ia64_lpte *)
1067				IA64_PHYS_TO_RR7(vhpte->pte_chain);
1068			pmap_install_pte(vhpte, pte);
1069		} else {
1070			vhpte->pte_p = 0;
1071			ia64_mf();
1072		}
1073	}
1074
1075	pmap_vhpt_resident--;
1076	error = 0;
1077 done:
1078	return error;
1079}
1080
1081/*
1082 * Find the ia64_lpte for the given va, if any.
1083 */
1084static struct ia64_lpte *
1085pmap_find_vhpt(vm_offset_t va)
1086{
1087	struct ia64_lpte *pte;
1088	u_int64_t tag;
1089
1090	pte = (struct ia64_lpte *) ia64_thash(va);
1091	if (!pte->pte_chain) {
1092		pte = 0;
1093		goto done;
1094	}
1095
1096	tag = ia64_ttag(va);
1097	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1098
1099	while (pte->pte_tag != tag) {
1100		if (pte->pte_chain) {
1101			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1102		} else {
1103			pte = 0;
1104			break;
1105		}
1106	}
1107
1108 done:
1109	return pte;
1110}
1111
1112/*
1113 * Remove an entry from the list of managed mappings.
1114 */
1115static int
1116pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1117{
1118	if (!pv) {
1119		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1120			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1121				if (pmap == pv->pv_pmap && va == pv->pv_va)
1122					break;
1123			}
1124		} else {
1125			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1126				if (va == pv->pv_va)
1127					break;
1128			}
1129		}
1130	}
1131
1132	if (pv) {
1133		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1134		m->md.pv_list_count--;
1135		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1136			vm_page_flag_clear(m, PG_WRITEABLE);
1137
1138		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1139		free_pv_entry(pv);
1140		return 0;
1141	} else {
1142		return ENOENT;
1143	}
1144}
1145
1146/*
1147 * Create a pv entry for page at pa for
1148 * (pmap, va).
1149 */
1150static void
1151pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1152{
1153	pv_entry_t pv;
1154
1155	pv = get_pv_entry();
1156	pv->pv_pmap = pmap;
1157	pv->pv_va = va;
1158
1159	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1160	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1161	m->md.pv_list_count++;
1162}
1163
1164/*
1165 *	Routine:	pmap_extract
1166 *	Function:
1167 *		Extract the physical page address associated
1168 *		with the given map/virtual_address pair.
1169 */
1170vm_offset_t
1171pmap_extract(pmap, va)
1172	register pmap_t pmap;
1173	vm_offset_t va;
1174{
1175	pmap_t oldpmap;
1176	vm_offset_t pa;
1177
1178	oldpmap = pmap_install(pmap);
1179	pa = ia64_tpa(va);
1180	pmap_install(oldpmap);
1181	return pa;
1182}
1183
1184/***************************************************
1185 * Low level mapping routines.....
1186 ***************************************************/
1187
1188/*
1189 * Find the kernel lpte for mapping the given virtual address, which
1190 * must be in the part of region 5 which we can cover with our kernel
1191 * 'page tables'.
1192 */
1193static struct ia64_lpte *
1194pmap_find_kpte(vm_offset_t va)
1195{
1196	KASSERT((va >> 61) == 5,
1197		("kernel mapping 0x%lx not in region 5", va));
1198	KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG),
1199		("kernel mapping 0x%lx out of range", va));
1200	return &kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)];
1201}
1202
1203/*
1204 * Find a pte suitable for mapping a user-space address. If one exists
1205 * in the VHPT, that one will be returned, otherwise a new pte is
1206 * allocated.
1207 */
1208static struct ia64_lpte *
1209pmap_find_pte(vm_offset_t va)
1210{
1211	struct ia64_lpte *pte;
1212
1213	if (va >= VM_MAXUSER_ADDRESS)
1214		return pmap_find_kpte(va);
1215
1216	pte = pmap_find_vhpt(va);
1217	if (!pte) {
1218		pte = uma_zalloc(ptezone, 0);
1219		pte->pte_p = 0;
1220	}
1221	return pte;
1222}
1223
1224/*
1225 * Free a pte which is now unused. This simply returns it to the zone
1226 * allocator if it is a user mapping. For kernel mappings, clear the
1227 * valid bit to make it clear that the mapping is not currently used.
1228 */
1229static void
1230pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1231{
1232	if (va < VM_MAXUSER_ADDRESS)
1233		uma_zfree(ptezone, pte);
1234	else
1235		pte->pte_p = 0;
1236}
1237
1238/*
1239 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1240 * the pte was orginally valid, then its assumed to already be in the
1241 * VHPT.
1242 */
1243static void
1244pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1245	     int ig, int pl, int ar)
1246{
1247	int wasvalid = pte->pte_p;
1248
1249	pte->pte_p = 1;
1250	pte->pte_ma = PTE_MA_WB;
1251	if (ig & PTE_IG_MANAGED) {
1252		pte->pte_a = 0;
1253		pte->pte_d = 0;
1254	} else {
1255		pte->pte_a = 1;
1256		pte->pte_d = 1;
1257	}
1258	pte->pte_pl = pl;
1259	pte->pte_ar = ar;
1260	pte->pte_ppn = pa >> 12;
1261	pte->pte_ed = 0;
1262	pte->pte_ig = ig;
1263
1264	pte->pte_ps = PAGE_SHIFT;
1265	pte->pte_key = 0;
1266
1267	pte->pte_tag = ia64_ttag(va);
1268
1269	if (wasvalid) {
1270		pmap_update_vhpt(pte, va);
1271	} else {
1272		pmap_enter_vhpt(pte, va);
1273	}
1274}
1275
1276/*
1277 * If a pte contains a valid mapping, clear it and update the VHPT.
1278 */
1279static void
1280pmap_clear_pte(struct ia64_lpte *pte, vm_offset_t va)
1281{
1282	if (pte->pte_p) {
1283		pmap_remove_vhpt(va);
1284		ia64_ptc_g(va, PAGE_SHIFT << 2);
1285		pte->pte_p = 0;
1286	}
1287}
1288
1289/*
1290 * Remove the (possibly managed) mapping represented by pte from the
1291 * given pmap.
1292 */
1293static int
1294pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1295		pv_entry_t pv, int freepte)
1296{
1297	int error;
1298	vm_page_t m;
1299
1300	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1301		("removing pte for non-current pmap"));
1302
1303	/*
1304	 * First remove from the VHPT.
1305	 */
1306	error = pmap_remove_vhpt(va);
1307	if (error)
1308		return error;
1309
1310	/*
1311	 * Make sure pmap_set_pte() knows it isn't in the VHPT.
1312	 */
1313	pte->pte_p = 0;
1314
1315	if (pte->pte_ig & PTE_IG_WIRED)
1316		pmap->pm_stats.wired_count -= 1;
1317
1318	pmap->pm_stats.resident_count -= 1;
1319	if (pte->pte_ig & PTE_IG_MANAGED) {
1320		m = PHYS_TO_VM_PAGE(pmap_pte_pa(pte));
1321		if (pte->pte_d)
1322			if (pmap_track_modified(va))
1323				vm_page_dirty(m);
1324		if (pte->pte_a)
1325			vm_page_flag_set(m, PG_REFERENCED);
1326
1327		if (freepte)
1328			pmap_free_pte(pte, va);
1329		return pmap_remove_entry(pmap, m, va, pv);
1330	} else {
1331		if (freepte)
1332			pmap_free_pte(pte, va);
1333		return 0;
1334	}
1335}
1336
1337/*
1338 * Add a list of wired pages to the kva
1339 * this routine is only used for temporary
1340 * kernel mappings that do not need to have
1341 * page modification or references recorded.
1342 * Note that old mappings are simply written
1343 * over.  The page *must* be wired.
1344 */
1345void
1346pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1347{
1348	int i;
1349	struct ia64_lpte *pte;
1350
1351	for (i = 0; i < count; i++) {
1352		vm_offset_t tva = va + i * PAGE_SIZE;
1353		int wasvalid;
1354		pte = pmap_find_kpte(tva);
1355		wasvalid = pte->pte_p;
1356		pmap_set_pte(pte, tva, VM_PAGE_TO_PHYS(m[i]),
1357			     0, PTE_PL_KERN, PTE_AR_RWX);
1358		if (wasvalid)
1359			ia64_ptc_g(tva, PAGE_SHIFT << 2);
1360	}
1361}
1362
1363/*
1364 * this routine jerks page mappings from the
1365 * kernel -- it is meant only for temporary mappings.
1366 */
1367void
1368pmap_qremove(vm_offset_t va, int count)
1369{
1370	int i;
1371	struct ia64_lpte *pte;
1372
1373	for (i = 0; i < count; i++) {
1374		pte = pmap_find_kpte(va);
1375		pmap_clear_pte(pte, va);
1376		va += PAGE_SIZE;
1377	}
1378}
1379
1380/*
1381 * Add a wired page to the kva.
1382 */
1383void
1384pmap_kenter(vm_offset_t va, vm_offset_t pa)
1385{
1386	struct ia64_lpte *pte;
1387	int wasvalid;
1388
1389	pte = pmap_find_kpte(va);
1390	wasvalid = pte->pte_p;
1391	pmap_set_pte(pte, va, pa, 0, PTE_PL_KERN, PTE_AR_RWX);
1392	if (wasvalid)
1393		ia64_ptc_g(va, PAGE_SHIFT << 2);
1394}
1395
1396/*
1397 * Remove a page from the kva
1398 */
1399void
1400pmap_kremove(vm_offset_t va)
1401{
1402	struct ia64_lpte *pte;
1403
1404	pte = pmap_find_kpte(va);
1405	pmap_clear_pte(pte, va);
1406}
1407
1408/*
1409 *	Used to map a range of physical addresses into kernel
1410 *	virtual address space.
1411 *
1412 *	The value passed in '*virt' is a suggested virtual address for
1413 *	the mapping. Architectures which can support a direct-mapped
1414 *	physical to virtual region can return the appropriate address
1415 *	within that region, leaving '*virt' unchanged. Other
1416 *	architectures should map the pages starting at '*virt' and
1417 *	update '*virt' with the first usable address after the mapped
1418 *	region.
1419 */
1420vm_offset_t
1421pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1422{
1423	return IA64_PHYS_TO_RR7(start);
1424}
1425
1426/*
1427 * Remove a single page from a process address space
1428 */
1429static void
1430pmap_remove_page(pmap_t pmap, vm_offset_t va)
1431{
1432	struct ia64_lpte *pte;
1433
1434	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1435		("removing page for non-current pmap"));
1436
1437	pte = pmap_find_vhpt(va);
1438	if (pte) {
1439		pmap_remove_pte(pmap, pte, va, 0, 1);
1440		pmap_invalidate_page(pmap, va);
1441	}
1442	return;
1443}
1444
1445/*
1446 *	Remove the given range of addresses from the specified map.
1447 *
1448 *	It is assumed that the start and end are properly
1449 *	rounded to the page size.
1450 */
1451void
1452pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1453{
1454	pmap_t oldpmap;
1455	vm_offset_t va;
1456	pv_entry_t pv;
1457	struct ia64_lpte *pte;
1458
1459	if (pmap == NULL)
1460		return;
1461
1462	if (pmap->pm_stats.resident_count == 0)
1463		return;
1464
1465	oldpmap = pmap_install(pmap);
1466
1467	/*
1468	 * special handling of removing one page.  a very
1469	 * common operation and easy to short circuit some
1470	 * code.
1471	 */
1472	if (sva + PAGE_SIZE == eva) {
1473		pmap_remove_page(pmap, sva);
1474		pmap_install(oldpmap);
1475		return;
1476	}
1477
1478	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1479		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1480			va = pv->pv_va;
1481			if (va >= sva && va < eva) {
1482				pte = pmap_find_vhpt(va);
1483				pmap_remove_pte(pmap, pte, va, pv, 1);
1484				pmap_invalidate_page(pmap, va);
1485			}
1486		}
1487
1488	} else {
1489		for (va = sva; va < eva; va = va += PAGE_SIZE) {
1490			pte = pmap_find_vhpt(va);
1491			if (pte) {
1492				pmap_remove_pte(pmap, pte, va, 0, 1);
1493				pmap_invalidate_page(pmap, va);
1494			}
1495		}
1496	}
1497
1498	pmap_install(oldpmap);
1499}
1500
1501/*
1502 *	Routine:	pmap_remove_all
1503 *	Function:
1504 *		Removes this physical page from
1505 *		all physical maps in which it resides.
1506 *		Reflects back modify bits to the pager.
1507 *
1508 *	Notes:
1509 *		Original versions of this routine were very
1510 *		inefficient because they iteratively called
1511 *		pmap_remove (slow...)
1512 */
1513
1514void
1515pmap_remove_all(vm_page_t m)
1516{
1517	pmap_t oldpmap;
1518	pv_entry_t pv;
1519	int s;
1520
1521#if defined(PMAP_DIAGNOSTIC)
1522	/*
1523	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1524	 * pages!
1525	 */
1526	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1527		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1528	}
1529#endif
1530
1531	s = splvm();
1532
1533	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1534		struct ia64_lpte *pte;
1535		pmap_t pmap = pv->pv_pmap;
1536		vm_offset_t va = pv->pv_va;
1537
1538		oldpmap = pmap_install(pmap);
1539		pte = pmap_find_vhpt(va);
1540		if (pmap_pte_pa(pte) != VM_PAGE_TO_PHYS(m))
1541			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1542		pmap_remove_pte(pmap, pte, va, pv, 1);
1543		pmap_invalidate_page(pmap, va);
1544		pmap_install(oldpmap);
1545	}
1546
1547	vm_page_flag_clear(m, PG_WRITEABLE);
1548
1549	splx(s);
1550	return;
1551}
1552
1553/*
1554 *	Set the physical protection on the
1555 *	specified range of this map as requested.
1556 */
1557void
1558pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1559{
1560	pmap_t oldpmap;
1561	struct ia64_lpte *pte;
1562	int newprot;
1563
1564	if (pmap == NULL)
1565		return;
1566
1567	oldpmap = pmap_install(pmap);
1568
1569	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1570		pmap_remove(pmap, sva, eva);
1571		pmap_install(oldpmap);
1572		return;
1573	}
1574
1575	if (prot & VM_PROT_WRITE) {
1576		pmap_install(oldpmap);
1577		return;
1578	}
1579
1580	newprot = pte_prot(pmap, prot);
1581
1582	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1583		panic("pmap_protect: unaligned addresses");
1584
1585	while (sva < eva) {
1586		/*
1587		 * If page is invalid, skip this page
1588		 */
1589		pte = pmap_find_vhpt(sva);
1590		if (!pte) {
1591			sva += PAGE_SIZE;
1592			continue;
1593		}
1594
1595		if (pmap_pte_prot(pte) != newprot) {
1596			if (pte->pte_ig & PTE_IG_MANAGED) {
1597				vm_offset_t pa = pmap_pte_pa(pte);
1598				vm_page_t m = PHYS_TO_VM_PAGE(pa);
1599				if (pte->pte_d) {
1600					if (pmap_track_modified(sva))
1601						vm_page_dirty(m);
1602					pte->pte_d = 0;
1603				}
1604				if (pte->pte_a) {
1605					vm_page_flag_set(m, PG_REFERENCED);
1606					pte->pte_a = 0;
1607				}
1608			}
1609			pmap_pte_set_prot(pte, newprot);
1610			pmap_update_vhpt(pte, sva);
1611			pmap_invalidate_page(pmap, sva);
1612		}
1613
1614		sva += PAGE_SIZE;
1615	}
1616	pmap_install(oldpmap);
1617}
1618
1619/*
1620 *	Insert the given physical page (p) at
1621 *	the specified virtual address (v) in the
1622 *	target physical map with the protection requested.
1623 *
1624 *	If specified, the page will be wired down, meaning
1625 *	that the related pte can not be reclaimed.
1626 *
1627 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1628 *	or lose information.  That is, this routine must actually
1629 *	insert this page into the given map NOW.
1630 */
1631void
1632pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1633	   boolean_t wired)
1634{
1635	pmap_t oldpmap;
1636	vm_offset_t pa;
1637	vm_offset_t opa;
1638	struct ia64_lpte origpte;
1639	struct ia64_lpte *pte;
1640	int managed;
1641
1642	if (pmap == NULL)
1643		return;
1644
1645	pmap_ensure_rid(pmap, va);
1646
1647	oldpmap = pmap_install(pmap);
1648
1649	va &= ~PAGE_MASK;
1650#ifdef PMAP_DIAGNOSTIC
1651	if (va > VM_MAX_KERNEL_ADDRESS)
1652		panic("pmap_enter: toobig");
1653#endif
1654
1655	/*
1656	 * Find (or create) a pte for the given mapping.
1657	 */
1658	pte = pmap_find_pte(va);
1659	origpte = *pte;
1660
1661	if (origpte.pte_p)
1662		opa = pmap_pte_pa(&origpte);
1663	else
1664		opa = 0;
1665	managed = 0;
1666
1667	pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK;
1668
1669	/*
1670	 * Mapping has not changed, must be protection or wiring change.
1671	 */
1672	if (origpte.pte_p && (opa == pa)) {
1673		/*
1674		 * Wiring change, just update stats. We don't worry about
1675		 * wiring PT pages as they remain resident as long as there
1676		 * are valid mappings in them. Hence, if a user page is wired,
1677		 * the PT page will be also.
1678		 */
1679		if (wired && ((origpte.pte_ig & PTE_IG_WIRED) == 0))
1680			pmap->pm_stats.wired_count++;
1681		else if (!wired && (origpte.pte_ig & PTE_IG_WIRED))
1682			pmap->pm_stats.wired_count--;
1683
1684		/*
1685		 * We might be turning off write access to the page,
1686		 * so we go ahead and sense modify status.
1687		 */
1688		if (origpte.pte_ig & PTE_IG_MANAGED) {
1689			if (origpte.pte_d && pmap_track_modified(va)) {
1690				vm_page_t om;
1691				om = PHYS_TO_VM_PAGE(opa);
1692				vm_page_dirty(om);
1693			}
1694		}
1695
1696		managed = origpte.pte_ig & PTE_IG_MANAGED;
1697		goto validate;
1698	}
1699	/*
1700	 * Mapping has changed, invalidate old range and fall
1701	 * through to handle validating new mapping.
1702	 */
1703	if (opa) {
1704		int error;
1705		vm_page_lock_queues();
1706		error = pmap_remove_pte(pmap, pte, va, 0, 0);
1707		vm_page_unlock_queues();
1708		if (error)
1709			panic("pmap_enter: pte vanished, va: 0x%lx", va);
1710	}
1711
1712	/*
1713	 * Enter on the PV list if part of our managed memory.
1714	 */
1715	if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
1716		pmap_insert_entry(pmap, va, m);
1717		managed |= PTE_IG_MANAGED;
1718	}
1719
1720	/*
1721	 * Increment counters
1722	 */
1723	pmap->pm_stats.resident_count++;
1724	if (wired)
1725		pmap->pm_stats.wired_count++;
1726
1727validate:
1728
1729	/*
1730	 * Now validate mapping with desired protection/wiring. This
1731	 * adds the pte to the VHPT if necessary.
1732	 */
1733	pmap_set_pte(pte, va, pa, managed | (wired ? PTE_IG_WIRED : 0),
1734		     pte_prot_pl(pmap, prot), pte_prot_ar(pmap, prot));
1735
1736	/*
1737	 * if the mapping or permission bits are different, we need
1738	 * to invalidate the page.
1739	 */
1740	if (!pmap_equal_pte(&origpte, pte))
1741		pmap_invalidate_page(pmap, va);
1742
1743	pmap_install(oldpmap);
1744}
1745
1746/*
1747 * this code makes some *MAJOR* assumptions:
1748 * 1. Current pmap & pmap exists.
1749 * 2. Not wired.
1750 * 3. Read access.
1751 * 4. No page table pages.
1752 * 5. Tlbflush is deferred to calling procedure.
1753 * 6. Page IS managed.
1754 * but is *MUCH* faster than pmap_enter...
1755 */
1756
1757static void
1758pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
1759{
1760	struct ia64_lpte *pte;
1761	pmap_t oldpmap;
1762
1763	pmap_ensure_rid(pmap, va);
1764
1765	oldpmap = pmap_install(pmap);
1766
1767	pte = pmap_find_pte(va);
1768	if (pte->pte_p)
1769		return;
1770
1771	/*
1772	 * Enter on the PV list since its part of our managed memory.
1773	 */
1774	pmap_insert_entry(pmap, va, m);
1775
1776	/*
1777	 * Increment counters
1778	 */
1779	pmap->pm_stats.resident_count++;
1780
1781	/*
1782	 * Initialise PTE with read-only protection and enter into VHPT.
1783	 */
1784	pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m),
1785		     PTE_IG_MANAGED,
1786		     PTE_PL_USER, PTE_AR_R);
1787
1788	pmap_install(oldpmap);
1789}
1790
1791/*
1792 * Make temporary mapping for a physical address. This is called
1793 * during dump.
1794 */
1795void *
1796pmap_kenter_temporary(vm_offset_t pa, int i)
1797{
1798	return (void *) IA64_PHYS_TO_RR7(pa - (i * PAGE_SIZE));
1799}
1800
1801#define MAX_INIT_PT (96)
1802/*
1803 * pmap_object_init_pt preloads the ptes for a given object
1804 * into the specified pmap.  This eliminates the blast of soft
1805 * faults on process startup and immediately after an mmap.
1806 */
1807void
1808pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1809		    vm_object_t object, vm_pindex_t pindex,
1810		    vm_size_t size, int limit)
1811{
1812	pmap_t oldpmap;
1813	vm_offset_t tmpidx;
1814	int psize;
1815	vm_page_t p;
1816	int objpgs;
1817
1818	if (pmap == NULL || object == NULL)
1819		return;
1820
1821	oldpmap = pmap_install(pmap);
1822
1823	psize = ia64_btop(size);
1824
1825	if ((object->type != OBJT_VNODE) ||
1826		((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
1827			(object->resident_page_count > MAX_INIT_PT))) {
1828		pmap_install(oldpmap);
1829		return;
1830	}
1831
1832	if (psize + pindex > object->size) {
1833		if (object->size < pindex)
1834			return;
1835		psize = object->size - pindex;
1836	}
1837
1838	/*
1839	 * if we are processing a major portion of the object, then scan the
1840	 * entire thing.
1841	 */
1842	if (psize > (object->resident_page_count >> 2)) {
1843		objpgs = psize;
1844
1845		for (p = TAILQ_FIRST(&object->memq);
1846		    ((objpgs > 0) && (p != NULL));
1847		    p = TAILQ_NEXT(p, listq)) {
1848
1849			tmpidx = p->pindex;
1850			if (tmpidx < pindex) {
1851				continue;
1852			}
1853			tmpidx -= pindex;
1854			if (tmpidx >= psize) {
1855				continue;
1856			}
1857			/*
1858			 * don't allow an madvise to blow away our really
1859			 * free pages allocating pv entries.
1860			 */
1861			if ((limit & MAP_PREFAULT_MADVISE) &&
1862			    cnt.v_free_count < cnt.v_free_reserved) {
1863				break;
1864			}
1865			vm_page_lock_queues();
1866			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1867				(p->busy == 0) &&
1868			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1869				if ((p->queue - p->pc) == PQ_CACHE)
1870					vm_page_deactivate(p);
1871				vm_page_busy(p);
1872				vm_page_unlock_queues();
1873				pmap_enter_quick(pmap,
1874						 addr + ia64_ptob(tmpidx), p);
1875				vm_page_lock_queues();
1876				vm_page_wakeup(p);
1877			}
1878			vm_page_unlock_queues();
1879			objpgs -= 1;
1880		}
1881	} else {
1882		/*
1883		 * else lookup the pages one-by-one.
1884		 */
1885		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
1886			/*
1887			 * don't allow an madvise to blow away our really
1888			 * free pages allocating pv entries.
1889			 */
1890			if ((limit & MAP_PREFAULT_MADVISE) &&
1891			    cnt.v_free_count < cnt.v_free_reserved) {
1892				break;
1893			}
1894			p = vm_page_lookup(object, tmpidx + pindex);
1895			if (p == NULL)
1896				continue;
1897			vm_page_lock_queues();
1898			if ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL &&
1899				(p->busy == 0) &&
1900			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1901				if ((p->queue - p->pc) == PQ_CACHE)
1902					vm_page_deactivate(p);
1903				vm_page_busy(p);
1904				vm_page_unlock_queues();
1905				pmap_enter_quick(pmap,
1906						 addr + ia64_ptob(tmpidx), p);
1907				vm_page_lock_queues();
1908				vm_page_wakeup(p);
1909			}
1910			vm_page_unlock_queues();
1911		}
1912	}
1913	pmap_install(oldpmap);
1914	return;
1915}
1916
1917/*
1918 * pmap_prefault provides a quick way of clustering
1919 * pagefaults into a processes address space.  It is a "cousin"
1920 * of pmap_object_init_pt, except it runs at page fault time instead
1921 * of mmap time.
1922 */
1923#define PFBAK 4
1924#define PFFOR 4
1925#define PAGEORDER_SIZE (PFBAK+PFFOR)
1926
1927static int pmap_prefault_pageorder[] = {
1928	-1 * PAGE_SIZE, 1 * PAGE_SIZE,
1929	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
1930	-3 * PAGE_SIZE, 3 * PAGE_SIZE,
1931	-4 * PAGE_SIZE, 4 * PAGE_SIZE
1932};
1933
1934void
1935pmap_prefault(pmap, addra, entry)
1936	pmap_t pmap;
1937	vm_offset_t addra;
1938	vm_map_entry_t entry;
1939{
1940	int i;
1941	vm_offset_t starta;
1942	vm_offset_t addr;
1943	vm_pindex_t pindex;
1944	vm_page_t m, mpte;
1945	vm_object_t object;
1946
1947	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)))
1948		return;
1949
1950	object = entry->object.vm_object;
1951
1952	starta = addra - PFBAK * PAGE_SIZE;
1953	if (starta < entry->start) {
1954		starta = entry->start;
1955	} else if (starta > addra) {
1956		starta = 0;
1957	}
1958
1959	mpte = NULL;
1960	for (i = 0; i < PAGEORDER_SIZE; i++) {
1961		vm_object_t lobject;
1962		struct ia64_lpte *pte;
1963
1964		addr = addra + pmap_prefault_pageorder[i];
1965		if (addr > addra + (PFFOR * PAGE_SIZE))
1966			addr = 0;
1967
1968		if (addr < starta || addr >= entry->end)
1969			continue;
1970
1971		pte = pmap_find_vhpt(addr);
1972		if (pte && pte->pte_p)
1973			continue;
1974
1975		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
1976		lobject = object;
1977		for (m = vm_page_lookup(lobject, pindex);
1978		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
1979		    lobject = lobject->backing_object) {
1980			if (lobject->backing_object_offset & PAGE_MASK)
1981				break;
1982			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
1983			m = vm_page_lookup(lobject->backing_object, pindex);
1984		}
1985
1986		/*
1987		 * give-up when a page is not in memory
1988		 */
1989		if (m == NULL)
1990			break;
1991		vm_page_lock_queues();
1992		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1993			(m->busy == 0) &&
1994		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1995
1996			if ((m->queue - m->pc) == PQ_CACHE) {
1997				vm_page_deactivate(m);
1998			}
1999			vm_page_busy(m);
2000			vm_page_unlock_queues();
2001			pmap_enter_quick(pmap, addr, m);
2002			vm_page_lock_queues();
2003			vm_page_wakeup(m);
2004		}
2005		vm_page_unlock_queues();
2006	}
2007}
2008
2009/*
2010 *	Routine:	pmap_change_wiring
2011 *	Function:	Change the wiring attribute for a map/virtual-address
2012 *			pair.
2013 *	In/out conditions:
2014 *			The mapping must already exist in the pmap.
2015 */
2016void
2017pmap_change_wiring(pmap, va, wired)
2018	register pmap_t pmap;
2019	vm_offset_t va;
2020	boolean_t wired;
2021{
2022	pmap_t oldpmap;
2023	struct ia64_lpte *pte;
2024
2025	if (pmap == NULL)
2026		return;
2027
2028	oldpmap = pmap_install(pmap);
2029
2030	pte = pmap_find_vhpt(va);
2031
2032	if (wired && !pmap_pte_w(pte))
2033		pmap->pm_stats.wired_count++;
2034	else if (!wired && pmap_pte_w(pte))
2035		pmap->pm_stats.wired_count--;
2036
2037	/*
2038	 * Wiring is not a hardware characteristic so there is no need to
2039	 * invalidate TLB.
2040	 */
2041	pmap_pte_set_w(pte, wired);
2042
2043	pmap_install(oldpmap);
2044}
2045
2046
2047
2048/*
2049 *	Copy the range specified by src_addr/len
2050 *	from the source map to the range dst_addr/len
2051 *	in the destination map.
2052 *
2053 *	This routine is only advisory and need not do anything.
2054 */
2055
2056void
2057pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2058	  vm_offset_t src_addr)
2059{
2060}
2061
2062
2063/*
2064 *	pmap_zero_page zeros the specified hardware page by
2065 *	mapping it into virtual memory and using bzero to clear
2066 *	its contents.
2067 */
2068
2069void
2070pmap_zero_page(vm_page_t m)
2071{
2072	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2073	bzero((caddr_t) va, PAGE_SIZE);
2074}
2075
2076
2077/*
2078 *	pmap_zero_page_area zeros the specified hardware page by
2079 *	mapping it into virtual memory and using bzero to clear
2080 *	its contents.
2081 *
2082 *	off and size must reside within a single page.
2083 */
2084
2085void
2086pmap_zero_page_area(vm_page_t m, int off, int size)
2087{
2088	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2089	bzero((char *)(caddr_t)va + off, size);
2090}
2091
2092
2093/*
2094 *	pmap_zero_page_idle zeros the specified hardware page by
2095 *	mapping it into virtual memory and using bzero to clear
2096 *	its contents.  This is for the vm_idlezero process.
2097 */
2098
2099void
2100pmap_zero_page_idle(vm_page_t m)
2101{
2102	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2103	bzero((caddr_t) va, PAGE_SIZE);
2104}
2105
2106
2107/*
2108 *	pmap_copy_page copies the specified (machine independent)
2109 *	page by mapping the page into virtual memory and using
2110 *	bcopy to copy the page, one machine dependent page at a
2111 *	time.
2112 */
2113void
2114pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2115{
2116	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
2117	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
2118	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
2119}
2120
2121/*
2122 * Returns true if the pmap's pv is one of the first
2123 * 16 pvs linked to from this page.  This count may
2124 * be changed upwards or downwards in the future; it
2125 * is only necessary that true be returned for a small
2126 * subset of pmaps for proper page aging.
2127 */
2128boolean_t
2129pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2130{
2131	pv_entry_t pv;
2132	int loops = 0;
2133	int s;
2134
2135	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2136		return FALSE;
2137
2138	s = splvm();
2139
2140	/*
2141	 * Not found, check current mappings returning immediately if found.
2142	 */
2143	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2144		if (pv->pv_pmap == pmap) {
2145			splx(s);
2146			return TRUE;
2147		}
2148		loops++;
2149		if (loops >= 16)
2150			break;
2151	}
2152	splx(s);
2153	return (FALSE);
2154}
2155
2156#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2157/*
2158 * Remove all pages from specified address space
2159 * this aids process exit speeds.  Also, this code
2160 * is special cased for current process only, but
2161 * can have the more generic (and slightly slower)
2162 * mode enabled.  This is much faster than pmap_remove
2163 * in the case of running down an entire address space.
2164 */
2165void
2166pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2167{
2168	pv_entry_t pv, npv;
2169	int s;
2170
2171#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2172	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
2173		printf("warning: pmap_remove_pages called with non-current pmap\n");
2174		return;
2175	}
2176#endif
2177
2178	s = splvm();
2179	for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
2180		pv;
2181		pv = npv) {
2182		struct ia64_lpte *pte;
2183
2184		npv = TAILQ_NEXT(pv, pv_plist);
2185
2186		if (pv->pv_va >= eva || pv->pv_va < sva) {
2187			continue;
2188		}
2189
2190		pte = pmap_find_vhpt(pv->pv_va);
2191		if (!pte)
2192			panic("pmap_remove_pages: page on pm_pvlist has no pte\n");
2193
2194
2195/*
2196 * We cannot remove wired pages from a process' mapping at this time
2197 */
2198		if (pte->pte_ig & PTE_IG_WIRED) {
2199			continue;
2200		}
2201
2202		pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
2203	}
2204	splx(s);
2205
2206	pmap_invalidate_all(pmap);
2207}
2208
2209/*
2210 *      pmap_page_protect:
2211 *
2212 *      Lower the permission for all mappings to a given page.
2213 */
2214void
2215pmap_page_protect(vm_page_t m, vm_prot_t prot)
2216{
2217	pv_entry_t pv;
2218
2219	if ((prot & VM_PROT_WRITE) != 0)
2220		return;
2221	if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
2222		if ((m->flags & PG_WRITEABLE) == 0)
2223			return;
2224		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2225			int newprot = pte_prot(pv->pv_pmap, prot);
2226			pmap_t oldpmap = pmap_install(pv->pv_pmap);
2227			struct ia64_lpte *pte;
2228			pte = pmap_find_vhpt(pv->pv_va);
2229			pmap_pte_set_prot(pte, newprot);
2230			pmap_update_vhpt(pte, pv->pv_va);
2231			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2232			pmap_install(oldpmap);
2233		}
2234		vm_page_flag_clear(m, PG_WRITEABLE);
2235	} else {
2236		pmap_remove_all(m);
2237	}
2238}
2239
2240vm_offset_t
2241pmap_phys_address(int ppn)
2242{
2243	return (ia64_ptob(ppn));
2244}
2245
2246/*
2247 *	pmap_ts_referenced:
2248 *
2249 *	Return a count of reference bits for a page, clearing those bits.
2250 *	It is not necessary for every reference bit to be cleared, but it
2251 *	is necessary that 0 only be returned when there are truly no
2252 *	reference bits set.
2253 *
2254 *	XXX: The exact number of bits to check and clear is a matter that
2255 *	should be tested and standardized at some point in the future for
2256 *	optimal aging of shared pages.
2257 */
2258int
2259pmap_ts_referenced(vm_page_t m)
2260{
2261	pv_entry_t pv;
2262	int count = 0;
2263
2264	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2265		return 0;
2266
2267	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2268		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2269		struct ia64_lpte *pte;
2270		pte = pmap_find_vhpt(pv->pv_va);
2271		if (pte->pte_a) {
2272			count++;
2273			pte->pte_a = 0;
2274			pmap_update_vhpt(pte, pv->pv_va);
2275			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2276		}
2277		pmap_install(oldpmap);
2278	}
2279
2280	return count;
2281}
2282
2283#if 0
2284/*
2285 *	pmap_is_referenced:
2286 *
2287 *	Return whether or not the specified physical page was referenced
2288 *	in any physical maps.
2289 */
2290static boolean_t
2291pmap_is_referenced(vm_page_t m)
2292{
2293	pv_entry_t pv;
2294
2295	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2296		return FALSE;
2297
2298	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2299		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2300		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2301		pmap_install(oldpmap);
2302		if (pte->pte_a)
2303			return 1;
2304	}
2305
2306	return 0;
2307}
2308#endif
2309
2310/*
2311 *	pmap_is_modified:
2312 *
2313 *	Return whether or not the specified physical page was modified
2314 *	in any physical maps.
2315 */
2316boolean_t
2317pmap_is_modified(vm_page_t m)
2318{
2319	pv_entry_t pv;
2320
2321	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2322		return FALSE;
2323
2324	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2325		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2326		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2327		pmap_install(oldpmap);
2328		if (pte->pte_d)
2329			return 1;
2330	}
2331
2332	return 0;
2333}
2334
2335/*
2336 *	Clear the modify bits on the specified physical page.
2337 */
2338void
2339pmap_clear_modify(vm_page_t m)
2340{
2341	pv_entry_t pv;
2342
2343	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2344		return;
2345
2346	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2347		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2348		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2349		if (pte->pte_d) {
2350			pte->pte_d = 0;
2351			pmap_update_vhpt(pte, pv->pv_va);
2352			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2353		}
2354		pmap_install(oldpmap);
2355	}
2356}
2357
2358/*
2359 *	pmap_clear_reference:
2360 *
2361 *	Clear the reference bit on the specified physical page.
2362 */
2363void
2364pmap_clear_reference(vm_page_t m)
2365{
2366	pv_entry_t pv;
2367
2368	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2369		return;
2370
2371	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2372		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2373		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2374		if (pte->pte_a) {
2375			pte->pte_a = 0;
2376			pmap_update_vhpt(pte, pv->pv_va);
2377			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2378		}
2379		pmap_install(oldpmap);
2380	}
2381}
2382
2383/*
2384 * Miscellaneous support routines follow
2385 */
2386
2387static void
2388ia64_protection_init()
2389{
2390	int prot, *kp, *up;
2391
2392	kp = protection_codes[0];
2393	up = protection_codes[1];
2394
2395	for (prot = 0; prot < 8; prot++) {
2396		switch (prot) {
2397		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
2398			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2399			*up++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2400			break;
2401
2402		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
2403			*kp++ = (PTE_AR_X_RX << 2) | PTE_PL_KERN;
2404			*up++ = (PTE_AR_X_RX << 2) | PTE_PL_USER;
2405			break;
2406
2407		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
2408			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2409			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2410			break;
2411
2412		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
2413			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2414			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2415			break;
2416
2417		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
2418			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2419			*up++ = (PTE_AR_R << 2) | PTE_PL_USER;
2420			break;
2421
2422		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
2423			*kp++ = (PTE_AR_RX << 2) | PTE_PL_KERN;
2424			*up++ = (PTE_AR_RX << 2) | PTE_PL_USER;
2425			break;
2426
2427		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
2428			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2429			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2430			break;
2431
2432		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
2433			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2434			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2435			break;
2436		}
2437	}
2438}
2439
2440/*
2441 * Map a set of physical memory pages into the kernel virtual
2442 * address space. Return a pointer to where it is mapped. This
2443 * routine is intended to be used for mapping device memory,
2444 * NOT real memory.
2445 */
2446void *
2447pmap_mapdev(vm_offset_t pa, vm_size_t size)
2448{
2449	return (void*) IA64_PHYS_TO_RR6(pa);
2450}
2451
2452/*
2453 * 'Unmap' a range mapped by pmap_mapdev().
2454 */
2455void
2456pmap_unmapdev(vm_offset_t va, vm_size_t size)
2457{
2458	return;
2459}
2460
2461/*
2462 * perform the pmap work for mincore
2463 */
2464int
2465pmap_mincore(pmap_t pmap, vm_offset_t addr)
2466{
2467	pmap_t oldpmap;
2468	struct ia64_lpte *pte;
2469	int val = 0;
2470
2471	oldpmap = pmap_install(pmap);
2472	pte = pmap_find_vhpt(addr);
2473	pmap_install(oldpmap);
2474
2475	if (!pte)
2476		return 0;
2477
2478	if (pmap_pte_v(pte)) {
2479		vm_page_t m;
2480		vm_offset_t pa;
2481
2482		val = MINCORE_INCORE;
2483		if ((pte->pte_ig & PTE_IG_MANAGED) == 0)
2484			return val;
2485
2486		pa = pmap_pte_pa(pte);
2487
2488		m = PHYS_TO_VM_PAGE(pa);
2489
2490		/*
2491		 * Modified by us
2492		 */
2493		if (pte->pte_d)
2494			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2495		/*
2496		 * Modified by someone
2497		 */
2498		else if (pmap_is_modified(m))
2499			val |= MINCORE_MODIFIED_OTHER;
2500		/*
2501		 * Referenced by us
2502		 */
2503		if (pte->pte_a)
2504			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2505
2506		/*
2507		 * Referenced by someone
2508		 */
2509		else if (pmap_ts_referenced(m)) {
2510			val |= MINCORE_REFERENCED_OTHER;
2511			vm_page_flag_set(m, PG_REFERENCED);
2512		}
2513	}
2514	return val;
2515}
2516
2517void
2518pmap_activate(struct thread *td)
2519{
2520	pmap_install(vmspace_pmap(td->td_proc->p_vmspace));
2521}
2522
2523pmap_t
2524pmap_install(pmap_t pmap)
2525{
2526	pmap_t oldpmap;
2527	int i;
2528
2529	critical_enter();
2530
2531	oldpmap = PCPU_GET(current_pmap);
2532
2533	if (pmap == oldpmap || pmap == kernel_pmap) {
2534		critical_exit();
2535		return pmap;
2536	}
2537
2538	if (oldpmap) {
2539		atomic_clear_32(&pmap->pm_active, PCPU_GET(cpumask));
2540	}
2541
2542	PCPU_SET(current_pmap, pmap);
2543	if (!pmap) {
2544		/*
2545		 * RIDs 0..4 have no mappings to make sure we generate
2546		 * page faults on accesses.
2547		 */
2548		ia64_set_rr(IA64_RR_BASE(0), (0 << 8)|(PAGE_SHIFT << 2)|1);
2549		ia64_set_rr(IA64_RR_BASE(1), (1 << 8)|(PAGE_SHIFT << 2)|1);
2550		ia64_set_rr(IA64_RR_BASE(2), (2 << 8)|(PAGE_SHIFT << 2)|1);
2551		ia64_set_rr(IA64_RR_BASE(3), (3 << 8)|(PAGE_SHIFT << 2)|1);
2552		ia64_set_rr(IA64_RR_BASE(4), (4 << 8)|(PAGE_SHIFT << 2)|1);
2553		critical_exit();
2554		return oldpmap;
2555	}
2556
2557	atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask));
2558
2559	for (i = 0; i < 5; i++)
2560		ia64_set_rr(IA64_RR_BASE(i),
2561			    (pmap->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2562
2563	critical_exit();
2564	return oldpmap;
2565}
2566
2567vm_offset_t
2568pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2569{
2570
2571	return addr;
2572}
2573
2574#include "opt_ddb.h"
2575
2576#ifdef DDB
2577
2578#include <ddb/ddb.h>
2579
2580static const char*	psnames[] = {
2581	"1B",	"2B",	"4B",	"8B",
2582	"16B",	"32B",	"64B",	"128B",
2583	"256B",	"512B",	"1K",	"2K",
2584	"4K",	"8K",	"16K",	"32K",
2585	"64K",	"128K",	"256K",	"512K",
2586	"1M",	"2M",	"4M",	"8M",
2587	"16M",	"32M",	"64M",	"128M",
2588	"256M",	"512M",	"1G",	"2G"
2589};
2590
2591static void
2592print_trs(int type)
2593{
2594	struct ia64_pal_result	res;
2595	int			i, maxtr;
2596	struct {
2597		struct ia64_pte	pte;
2598		struct ia64_itir itir;
2599		struct ia64_ifa ifa;
2600		struct ia64_rr	rr;
2601	}			buf;
2602	static const char*	manames[] = {
2603		"WB",	"bad",	"bad",	"bad",
2604		"UC",	"UCE",	"WC",	"NaT",
2605
2606	};
2607
2608	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2609	if (res.pal_status != 0) {
2610		db_printf("Can't get VM summary\n");
2611		return;
2612	}
2613
2614	if (type == 0)
2615		maxtr = (res.pal_result[0] >> 40) & 0xff;
2616	else
2617		maxtr = (res.pal_result[0] >> 32) & 0xff;
2618
2619	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2620	for (i = 0; i <= maxtr; i++) {
2621		bzero(&buf, sizeof(buf));
2622		res = ia64_call_pal_stacked_physical
2623			(PAL_VM_TR_READ, i, type, ia64_tpa((u_int64_t) &buf));
2624		if (!(res.pal_result[0] & 1))
2625			buf.pte.pte_ar = 0;
2626		if (!(res.pal_result[0] & 2))
2627			buf.pte.pte_pl = 0;
2628		if (!(res.pal_result[0] & 4))
2629			buf.pte.pte_d = 0;
2630		if (!(res.pal_result[0] & 8))
2631			buf.pte.pte_ma = 0;
2632		db_printf(
2633			"%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s %d %06x\n",
2634			buf.ifa.ifa_ig & 1,
2635			buf.rr.rr_rid,
2636			buf.ifa.ifa_vpn,
2637			buf.pte.pte_ppn,
2638			psnames[buf.itir.itir_ps],
2639			buf.pte.pte_ed,
2640			buf.pte.pte_ar,
2641			buf.pte.pte_pl,
2642			buf.pte.pte_d,
2643			buf.pte.pte_a,
2644			manames[buf.pte.pte_ma],
2645			buf.pte.pte_p,
2646			buf.itir.itir_key);
2647	}
2648}
2649
2650DB_COMMAND(itr, db_itr)
2651{
2652	print_trs(0);
2653}
2654
2655DB_COMMAND(dtr, db_dtr)
2656{
2657	print_trs(1);
2658}
2659
2660DB_COMMAND(rr, db_rr)
2661{
2662	int i;
2663	u_int64_t t;
2664	struct ia64_rr rr;
2665
2666	printf("RR RID    PgSz VE\n");
2667	for (i = 0; i < 8; i++) {
2668		__asm __volatile ("mov %0=rr[%1]"
2669				  : "=r"(t)
2670				  : "r"(IA64_RR_BASE(i)));
2671		*(u_int64_t *) &rr = t;
2672		printf("%d  %06x %4s %d\n",
2673		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2674	}
2675}
2676
2677DB_COMMAND(thash, db_thash)
2678{
2679	if (!have_addr)
2680		return;
2681
2682	db_printf("%p\n", (void *) ia64_thash(addr));
2683}
2684
2685DB_COMMAND(ttag, db_ttag)
2686{
2687	if (!have_addr)
2688		return;
2689
2690	db_printf("0x%lx\n", ia64_ttag(addr));
2691}
2692
2693#endif
2694