pmap.c revision 88088
1169092Sdeischen/*
2169092Sdeischen * Copyright (c) 1991 Regents of the University of California.
3169092Sdeischen * All rights reserved.
4156608Sdeischen * Copyright (c) 1994 John S. Dyson
5156608Sdeischen * All rights reserved.
6156608Sdeischen * Copyright (c) 1994 David Greenman
7156608Sdeischen * All rights reserved.
8156608Sdeischen * Copyright (c) 1998,2000 Doug Rabson
9156608Sdeischen * All rights reserved.
10156608Sdeischen *
11156608Sdeischen * This code is derived from software contributed to Berkeley by
12156608Sdeischen * the Systems Programming Group of the University of Utah Computer
13156608Sdeischen * Science Department and William Jolitz of UUNET Technologies Inc.
14156608Sdeischen *
15156608Sdeischen * Redistribution and use in source and binary forms, with or without
16156608Sdeischen * modification, are permitted provided that the following conditions
17156608Sdeischen * are met:
18156608Sdeischen * 1. Redistributions of source code must retain the above copyright
19156608Sdeischen *    notice, this list of conditions and the following disclaimer.
20199827Skib * 2. Redistributions in binary form must reproduce the above copyright
21199827Skib *    notice, this list of conditions and the following disclaimer in the
22199827Skib *    documentation and/or other materials provided with the distribution.
23199827Skib * 3. All advertising materials mentioning features or use of this software
24199827Skib *    must display the following acknowledgement:
25199827Skib *	This product includes software developed by the University of
26199827Skib *	California, Berkeley and its contributors.
27199827Skib * 4. Neither the name of the University nor the names of its contributors
28169092Sdeischen *    may be used to endorse or promote products derived from this software
29156608Sdeischen *    without specific prior written permission.
30156608Sdeischen *
31156608Sdeischen * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 * $FreeBSD: head/sys/ia64/ia64/pmap.c 88088 2001-12-18 00:27:18Z jhb $
47 */
48
49/*
50 *	Manages physical address maps.
51 *
52 *	In addition to hardware address maps, this
53 *	module is called upon to provide software-use-only
54 *	maps which may or may not be stored in the same
55 *	form as hardware maps.  These pseudo-maps are
56 *	used to store intermediate results from copy
57 *	operations to and from address spaces.
58 *
59 *	Since the information managed by this module is
60 *	also stored by the logical address mapping module,
61 *	this module may throw away valid virtual-to-physical
62 *	mappings at almost any time.  However, invalidations
63 *	of virtual-to-physical mappings must be done as
64 *	requested.
65 *
66 *	In order to cope with hardware architectures which
67 *	make virtual-to-physical map invalidates expensive,
68 *	this module may delay invalidate or reduced protection
69 *	operations until such time as they are actually
70 *	necessary.  This module is given full information as
71 *	to which processors are currently using which maps,
72 *	and to when physical maps must be made correct.
73 */
74
75/*
76 * Following the Linux model, region IDs are allocated in groups of
77 * eight so that a single region ID can be used for as many RRs as we
78 * want by encoding the RR number into the low bits of the ID.
79 *
80 * We reserve region ID 0 for the kernel and allocate the remaining
81 * IDs for user pmaps.
82 *
83 * Region 0..4
84 *	User virtually mapped
85 *
86 * Region 5
87 *	Kernel virtually mapped
88 *
89 * Region 6
90 *	Kernel physically mapped uncacheable
91 *
92 * Region 7
93 *	Kernel physically mapped cacheable
94 */
95
96#include <sys/param.h>
97#include <sys/kernel.h>
98#include <sys/lock.h>
99#include <sys/malloc.h>
100#include <sys/mman.h>
101#include <sys/msgbuf.h>
102#include <sys/mutex.h>
103#include <sys/proc.h>
104#include <sys/sx.h>
105#include <sys/systm.h>
106#include <sys/vmmeter.h>
107#include <sys/sysctl.h>
108
109#include <vm/vm.h>
110#include <vm/vm_param.h>
111#include <vm/vm_kern.h>
112#include <vm/vm_page.h>
113#include <vm/vm_map.h>
114#include <vm/vm_object.h>
115#include <vm/vm_extern.h>
116#include <vm/vm_pageout.h>
117#include <vm/vm_pager.h>
118#include <vm/vm_zone.h>
119
120#include <sys/user.h>
121
122#include <machine/pal.h>
123#include <machine/md_var.h>
124
125MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
126
127#ifndef PMAP_SHPGPERPROC
128#define PMAP_SHPGPERPROC 200
129#endif
130
131#if defined(DIAGNOSTIC)
132#define PMAP_DIAGNOSTIC
133#endif
134
135#define MINPV 2048
136
137#if 0
138#define PMAP_DIAGNOSTIC
139#define PMAP_DEBUG
140#endif
141
142#if !defined(PMAP_DIAGNOSTIC)
143#define PMAP_INLINE __inline
144#else
145#define PMAP_INLINE
146#endif
147
148/*
149 * Get PDEs and PTEs for user/kernel address space
150 */
151#define pmap_pte_w(pte)		((pte)->pte_ig & PTE_IG_WIRED)
152#define pmap_pte_managed(pte)	((pte)->pte_ig & PTE_IG_MANAGED)
153#define pmap_pte_v(pte)		((pte)->pte_p)
154#define pmap_pte_pa(pte)	(((pte)->pte_ppn) << 12)
155#define pmap_pte_prot(pte)	(((pte)->pte_ar << 2) | (pte)->pte_pl)
156
157#define pmap_pte_set_w(pte, v) ((v)?((pte)->pte_ig |= PTE_IG_WIRED) \
158				:((pte)->pte_ig &= ~PTE_IG_WIRED))
159#define pmap_pte_set_prot(pte, v) do {		\
160    (pte)->pte_ar = v >> 2;			\
161    (pte)->pte_pl = v & 3;			\
162} while (0)
163
164/*
165 * Given a map and a machine independent protection code,
166 * convert to an ia64 protection code.
167 */
168#define pte_prot(m, p)		(protection_codes[m == pmap_kernel() ? 0 : 1][p])
169#define pte_prot_pl(m, p)	(pte_prot(m, p) & 3)
170#define pte_prot_ar(m, p)	(pte_prot(m, p) >> 2)
171int	protection_codes[2][8];
172
173/*
174 * Return non-zero if this pmap is currently active
175 */
176#define pmap_isactive(pmap)	(pmap->pm_active)
177
178/*
179 * Statically allocated kernel pmap
180 */
181static struct pmap kernel_pmap_store;
182pmap_t kernel_pmap;
183
184vm_offset_t avail_start;	/* PA of first available physical page */
185vm_offset_t avail_end;		/* PA of last available physical page */
186vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
187vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
188static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
189
190/*
191 * We use an object to own the kernel's 'page tables'. For simplicity,
192 * we use one page directory to index a set of pages containing
193 * ia64_lptes. This gives us up to 2Gb of kernel virtual space.
194 */
195static vm_object_t kptobj;
196static int nkpt;
197static struct ia64_lpte **kptdir;
198#define KPTE_DIR_INDEX(va) \
199	((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
200#define KPTE_PTE_INDEX(va) \
201	((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
202#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
203
204vm_offset_t kernel_vm_end;
205
206/*
207 * Values for ptc.e. XXX values for SKI.
208 */
209static u_int64_t pmap_ptc_e_base = 0x100000000;
210static u_int64_t pmap_ptc_e_count1 = 3;
211static u_int64_t pmap_ptc_e_count2 = 2;
212static u_int64_t pmap_ptc_e_stride1 = 0x2000;
213static u_int64_t pmap_ptc_e_stride2 = 0x100000000;
214
215/*
216 * Data for the RID allocator
217 */
218static u_int64_t *pmap_ridbusy;
219static int pmap_ridmax, pmap_ridcount;
220
221/*
222 * Data for the pv entry allocation mechanism
223 */
224static vm_zone_t pvzone;
225static struct vm_zone pvzone_store;
226static struct vm_object pvzone_obj;
227static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
228static int pmap_pagedaemon_waken = 0;
229static struct pv_entry *pvinit;
230static struct pv_entry *pvbootentries;
231static int pvbootnext, pvbootmax;
232
233/*
234 * Data for allocating PTEs for user processes.
235 */
236static vm_zone_t ptezone;
237static struct vm_zone ptezone_store;
238static struct vm_object ptezone_obj;
239static struct ia64_lpte *pteinit;
240
241/*
242 * VHPT instrumentation.
243 */
244static int pmap_vhpt_inserts;
245static int pmap_vhpt_collisions;
246static int pmap_vhpt_resident;
247SYSCTL_DECL(_vm_stats);
248SYSCTL_NODE(_vm_stats, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
249SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
250	   &pmap_vhpt_inserts, 0, "");
251SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, collisions, CTLFLAG_RD,
252	   &pmap_vhpt_collisions, 0, "");
253SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, resident, CTLFLAG_RD,
254	   &pmap_vhpt_resident, 0, "");
255
256static PMAP_INLINE void	free_pv_entry __P((pv_entry_t pv));
257static pv_entry_t get_pv_entry __P((void));
258static void	ia64_protection_init __P((void));
259
260static void	pmap_invalidate_all  __P((pmap_t pmap));
261static void	pmap_remove_all __P((vm_page_t m));
262static void	pmap_enter_quick __P((pmap_t pmap, vm_offset_t va, vm_page_t m));
263
264vm_offset_t
265pmap_steal_memory(vm_size_t size)
266{
267	vm_size_t bank_size;
268	vm_offset_t pa, va;
269
270	size = round_page(size);
271
272	bank_size = phys_avail[1] - phys_avail[0];
273	while (size > bank_size) {
274		int i;
275		for (i = 0; phys_avail[i+2]; i+= 2) {
276			phys_avail[i] = phys_avail[i+2];
277			phys_avail[i+1] = phys_avail[i+3];
278		}
279		phys_avail[i] = 0;
280		phys_avail[i+1] = 0;
281		if (!phys_avail[0])
282			panic("pmap_steal_memory: out of memory");
283		bank_size = phys_avail[1] - phys_avail[0];
284	}
285
286	pa = phys_avail[0];
287	phys_avail[0] += size;
288
289	va = IA64_PHYS_TO_RR7(pa);
290	bzero((caddr_t) va, size);
291	return va;
292}
293
294/*
295 *	Bootstrap the system enough to run with virtual memory.
296 */
297void
298pmap_bootstrap()
299{
300	int i, j, count, ridbits;
301	vm_offset_t vhpt_size, vhpt_base;
302	struct ia64_pal_result res;
303
304	/*
305	 * Query the PAL Code to find the loop parameters for the
306	 * ptc.e instruction.
307	 */
308	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
309	if (res.pal_status != 0)
310		panic("Can't configure ptc.e parameters");
311	pmap_ptc_e_base = res.pal_result[0];
312	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
313	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
314	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
315	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
316	if (bootverbose)
317		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
318		       "stride1=0x%lx, stride2=0x%lx\n",
319		       pmap_ptc_e_base,
320		       pmap_ptc_e_count1,
321		       pmap_ptc_e_count2,
322		       pmap_ptc_e_stride1,
323		       pmap_ptc_e_stride2);
324
325	/*
326	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
327	 */
328	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
329	if (res.pal_status != 0) {
330		if (bootverbose)
331			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
332		ridbits = 18; /* guaranteed minimum */
333	} else {
334		ridbits = (res.pal_result[1] >> 8) & 0xff;
335		if (bootverbose)
336			printf("Processor supports %d Region ID bits\n",
337			       ridbits);
338	}
339	pmap_ridmax = (1 << ridbits);
340	pmap_ridcount = 8;
341	pmap_ridbusy = (u_int64_t *)
342		pmap_steal_memory(pmap_ridmax / 8);
343	bzero(pmap_ridbusy, pmap_ridmax / 8);
344	pmap_ridbusy[0] |= 0xff;
345
346	/*
347	 * Allocate some memory for initial kernel 'page tables'.
348	 */
349	kptdir = (struct ia64_lpte **) pmap_steal_memory(PAGE_SIZE);
350	for (i = 0; i < NKPT; i++) {
351		kptdir[i] = (struct ia64_lpte *) pmap_steal_memory(PAGE_SIZE);
352	}
353	nkpt = NKPT;
354
355	avail_start = phys_avail[0];
356	for (i = 0; phys_avail[i+2]; i+= 2) ;
357	avail_end = phys_avail[i+1];
358	count = i+2;
359
360	/*
361	 * Figure out a useful size for the VHPT, based on the size of
362	 * physical memory and try to locate a region which is large
363	 * enough to contain the VHPT (which must be a power of two in
364	 * size and aligned to a natural boundary).
365	 */
366	vhpt_size = 15;
367	while ((1<<vhpt_size) < ia64_btop(avail_end - avail_start) * 32)
368		vhpt_size++;
369
370	vhpt_base = 0;
371	while (!vhpt_base) {
372		vm_offset_t mask;
373		if (bootverbose)
374			printf("Trying VHPT size 0x%lx\n", (1L<<vhpt_size));
375		mask = (1L << vhpt_size) - 1;
376		for (i = 0; i < count; i += 2) {
377			vm_offset_t base, limit;
378			base = (phys_avail[i] + mask) & ~mask;
379			limit = base + (1L << vhpt_size);
380			if (limit <= phys_avail[i+1])
381				/*
382				 * VHPT can fit in this region
383				 */
384				break;
385		}
386		if (!phys_avail[i]) {
387			/*
388			 * Can't fit, try next smaller size.
389			 */
390			vhpt_size--;
391		} else {
392			vhpt_base = (phys_avail[i] + mask) & ~mask;
393		}
394	}
395	if (vhpt_size < 15)
396		panic("Can't find space for VHPT");
397
398	if (bootverbose)
399		printf("Putting VHPT at %p\n", (void *) vhpt_base);
400	if (vhpt_base != phys_avail[i]) {
401		/*
402		 * Split this region.
403		 */
404		if (bootverbose)
405			printf("Splitting [%p-%p]\n",
406			       (void *) phys_avail[i],
407			       (void *) phys_avail[i+1]);
408		for (j = count; j > i; j -= 2) {
409			phys_avail[j] = phys_avail[j-2];
410			phys_avail[j+1] = phys_avail[j-2+1];
411		}
412		phys_avail[count+2] = 0;
413		phys_avail[count+3] = 0;
414		phys_avail[i+1] = vhpt_base;
415		phys_avail[i+2] = vhpt_base + (1L << vhpt_size);
416	} else {
417		phys_avail[i] = vhpt_base + (1L << vhpt_size);
418	}
419
420	vhpt_base = IA64_PHYS_TO_RR7(vhpt_base);
421	bzero((void *) vhpt_base, (1L << vhpt_size));
422	__asm __volatile("mov cr.pta=%0;; srlz.i;;"
423			 :: "r" (vhpt_base + (1<<8) + (vhpt_size<<2) + 1));
424
425	virtual_avail = IA64_RR_BASE(5);
426	virtual_end = IA64_RR_BASE(6)-1;
427
428	/*
429	 * Initialize protection array.
430	 */
431	ia64_protection_init();
432
433	/*
434	 * The kernel's pmap is statically allocated so we don't have to use
435	 * pmap_create, which is unlikely to work correctly at this part of
436	 * the boot sequence (XXX and which no longer exists).
437	 */
438	kernel_pmap = &kernel_pmap_store;
439	for (i = 0; i < 5; i++)
440		kernel_pmap->pm_rid[i] = 0;
441	kernel_pmap->pm_count = 1;
442	kernel_pmap->pm_active = 1;
443	TAILQ_INIT(&kernel_pmap->pm_pvlist);
444
445	/*
446	 * Region 5 is mapped via the vhpt.
447	 */
448	ia64_set_rr(IA64_RR_BASE(5),
449		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
450
451	/*
452	 * Region 6 is direct mapped UC and region 7 is direct mapped
453	 * WC. The details of this is controlled by the Alt {I,D}TLB
454	 * handlers. Here we just make sure that they have the largest
455	 * possible page size to minimise TLB usage.
456	 */
457	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (28 << 2));
458	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (28 << 2));
459
460	/*
461	 * Set up proc0's PCB.
462	 */
463#if 0
464	thread0->td_pcb->pcb_hw.apcb_asn = 0;
465#endif
466
467	/*
468	 * Reserve some memory for allocating pvs while bootstrapping
469	 * the pv allocator. We need to have enough to cover mapping
470	 * the kmem_alloc region used to allocate the initial_pvs in
471	 * pmap_init. In general, the size of this region is
472	 * appoximately (# physical pages) * (size of pv entry).
473	 */
474	pvbootmax = ((physmem * sizeof(struct pv_entry)) >> PAGE_SHIFT) + 128;
475	pvbootentries = (struct pv_entry *)
476		pmap_steal_memory(pvbootmax * sizeof(struct pv_entry));
477	pvbootnext = 0;
478
479	/*
480	 * Clear out any random TLB entries left over from booting.
481	 */
482	pmap_invalidate_all(kernel_pmap);
483}
484
485/*
486 *	Initialize the pmap module.
487 *	Called by vm_init, to initialize any structures that the pmap
488 *	system needs to map virtual memory.
489 *	pmap_init has been enhanced to support in a fairly consistant
490 *	way, discontiguous physical memory.
491 */
492void
493pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
494{
495	int i;
496	int initial_pvs;
497
498	/*
499	 * Allocate memory for random pmap data structures.  Includes the
500	 * pv_head_table.
501	 */
502
503	for(i = 0; i < vm_page_array_size; i++) {
504		vm_page_t m;
505
506		m = &vm_page_array[i];
507		TAILQ_INIT(&m->md.pv_list);
508		m->md.pv_list_count = 0;
509 	}
510
511	/*
512	 * Init the pv free list and the PTE free list.
513	 */
514	initial_pvs = vm_page_array_size;
515	if (initial_pvs < MINPV)
516		initial_pvs = MINPV;
517	pvzone = &pvzone_store;
518	pvinit = (struct pv_entry *) kmem_alloc(kernel_map,
519		initial_pvs * sizeof (struct pv_entry));
520	zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit,
521		  vm_page_array_size);
522
523	ptezone = &ptezone_store;
524	pteinit = (struct ia64_lpte *) kmem_alloc(kernel_map,
525		initial_pvs * sizeof (struct ia64_lpte));
526	zbootinit(ptezone, "PT ENTRY", sizeof (struct ia64_lpte), pteinit,
527		  vm_page_array_size);
528
529	/*
530	 * Create the object for the kernel's page tables.
531	 */
532	kptobj = vm_object_allocate(OBJT_DEFAULT, MAXKPT);
533
534	/*
535	 * Now it is safe to enable pv_table recording.
536	 */
537	pmap_initialized = TRUE;
538}
539
540/*
541 * Initialize the address space (zone) for the pv_entries.  Set a
542 * high water mark so that the system can recover from excessive
543 * numbers of pv entries.
544 */
545void
546pmap_init2()
547{
548	int shpgperproc = PMAP_SHPGPERPROC;
549
550	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
551	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
552	pv_entry_high_water = 9 * (pv_entry_max / 10);
553	zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
554	zinitna(ptezone, &ptezone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
555}
556
557
558/***************************************************
559 * Manipulate TLBs for a pmap
560 ***************************************************/
561
562static void
563pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
564{
565	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
566		("invalidating TLB for non-current pmap"));
567	ia64_ptc_l(va, PAGE_SHIFT << 2);
568}
569
570static void
571pmap_invalidate_all(pmap_t pmap)
572{
573	u_int64_t addr;
574	int i, j;
575	critical_t psr;
576
577	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
578		("invalidating TLB for non-current pmap"));
579
580	psr = cpu_critical_enter();
581	addr = pmap_ptc_e_base;
582	for (i = 0; i < pmap_ptc_e_count1; i++) {
583		for (j = 0; j < pmap_ptc_e_count2; j++) {
584			ia64_ptc_e(addr);
585			addr += pmap_ptc_e_stride2;
586		}
587		addr += pmap_ptc_e_stride1;
588	}
589	cpu_critical_exit(psr);
590}
591
592static u_int32_t
593pmap_allocate_rid(void)
594{
595	int rid;
596
597	if (pmap_ridcount == pmap_ridmax)
598		panic("pmap_allocate_rid: All Region IDs used");
599
600	do {
601		rid = arc4random() & (pmap_ridmax - 1);
602	} while (pmap_ridbusy[rid / 64] & (1L << (rid & 63)));
603	pmap_ridbusy[rid / 64] |= (1L << (rid & 63));
604	pmap_ridcount++;
605
606	return rid;
607}
608
609static void
610pmap_free_rid(u_int32_t rid)
611{
612	pmap_ridbusy[rid / 64] &= ~(1L << (rid & 63));
613	pmap_ridcount--;
614}
615
616static void
617pmap_ensure_rid(pmap_t pmap, vm_offset_t va)
618{
619	int rr;
620
621	rr = va >> 61;
622	if (pmap->pm_rid[rr])
623		return;
624	pmap->pm_rid[rr] = pmap_allocate_rid();
625
626	if (pmap == PCPU_GET(current_pmap))
627		ia64_set_rr(IA64_RR_BASE(rr),
628			    (pmap->pm_rid[rr] << 8)|(PAGE_SHIFT << 2)|1);
629}
630
631/***************************************************
632 * Low level helper routines.....
633 ***************************************************/
634
635/*
636 * Install a pte into the VHPT
637 */
638static PMAP_INLINE void
639pmap_install_pte(struct ia64_lpte *vhpte, struct ia64_lpte *pte)
640{
641	u_int64_t *vhp, *p;
642
643	/* invalidate the pte */
644	atomic_set_64(&vhpte->pte_tag, 1L << 63);
645	ia64_mf();			/* make sure everyone sees */
646
647	vhp = (u_int64_t *) vhpte;
648	p = (u_int64_t *) pte;
649
650	vhp[0] = p[0];
651	vhp[1] = p[1];
652	vhp[2] = p[2];			/* sets ti to one */
653
654	ia64_mf();
655}
656
657/*
658 * Compare essential parts of pte.
659 */
660static PMAP_INLINE int
661pmap_equal_pte(struct ia64_lpte *pte1, struct ia64_lpte *pte2)
662{
663	return *(u_int64_t *) pte1 == *(u_int64_t *) pte2;
664}
665
666/*
667 * this routine defines the region(s) of memory that should
668 * not be tested for the modified bit.
669 */
670static PMAP_INLINE int
671pmap_track_modified(vm_offset_t va)
672{
673	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
674		return 1;
675	else
676		return 0;
677}
678
679/*
680 * Create the U area for a new process.
681 * This routine directly affects the fork perf for a process.
682 */
683void
684pmap_new_proc(struct proc *p)
685{
686	struct user *up;
687
688	/*
689	 * Use contigmalloc for user area so that we can use a region
690	 * 7 address for it which makes it impossible to accidentally
691	 * lose when recording a trapframe.
692	 */
693	up = contigmalloc(UAREA_PAGES * PAGE_SIZE, M_PMAP,
694			  M_WAITOK,
695			  0ul,
696			  256*1024*1024 - 1,
697			  PAGE_SIZE,
698			  256*1024*1024);
699
700	p->p_md.md_uservirt = up;
701	p->p_uarea = (struct user *)
702		IA64_PHYS_TO_RR7(ia64_tpa((u_int64_t) up));
703}
704
705/*
706 * Dispose the U area for a process that has exited.
707 * This routine directly impacts the exit perf of a process.
708 */
709void
710pmap_dispose_proc(struct proc *p)
711{
712	contigfree(p->p_md.md_uservirt, UAREA_PAGES * PAGE_SIZE, M_PMAP);
713	p->p_md.md_uservirt = 0;
714	p->p_uarea = 0;
715}
716
717/*
718 * Allow the U area for a process to be prejudicially paged out.
719 */
720void
721pmap_swapout_proc(struct proc *p)
722{
723}
724
725/*
726 * Bring the U area for a specified process back in.
727 */
728void
729pmap_swapin_proc(struct proc *p)
730{
731}
732
733/*
734 * Create the KSTACK for a new thread.
735 * This routine directly affects the fork perf for a process/thread.
736 */
737void
738pmap_new_thread(struct thread *td)
739{
740	vm_offset_t *ks;
741
742	/*
743	 * Use contigmalloc for user area so that we can use a region
744	 * 7 address for it which makes it impossible to accidentally
745	 * lose when recording a trapframe.
746	 */
747	ks = contigmalloc(KSTACK_PAGES * PAGE_SIZE, M_PMAP,
748			  M_WAITOK,
749			  0ul,
750			  256*1024*1024 - 1,
751			  PAGE_SIZE,
752			  256*1024*1024);
753
754	td->td_md.md_kstackvirt = ks;
755	td->td_kstack = IA64_PHYS_TO_RR7(ia64_tpa((u_int64_t)ks));
756}
757
758/*
759 * Dispose the KSTACK for a thread that has exited.
760 * This routine directly impacts the exit perf of a process/thread.
761 */
762void
763pmap_dispose_thread(struct thread *td)
764{
765	contigfree(td->td_md.md_kstackvirt, KSTACK_PAGES * PAGE_SIZE, M_PMAP);
766	td->td_md.md_kstackvirt = 0;
767	td->td_kstack = 0;
768}
769
770/*
771 * Allow the KSTACK for a thread to be prejudicially paged out.
772 */
773void
774pmap_swapout_thread(struct thread *td)
775{
776}
777
778/*
779 * Bring the KSTACK for a specified thread back in.
780 */
781void
782pmap_swapin_thread(struct thread *td)
783{
784}
785
786/***************************************************
787 * Page table page management routines.....
788 ***************************************************/
789
790void
791pmap_pinit0(struct pmap *pmap)
792{
793	int i;
794
795	/*
796	 * kernel_pmap is the same as any other pmap.
797	 */
798	pmap_pinit(pmap);
799	pmap->pm_flags = 0;
800	for (i = 0; i < 5; i++)
801		pmap->pm_rid[i] = 0;
802	pmap->pm_count = 1;
803	pmap->pm_ptphint = NULL;
804	pmap->pm_active = 0;
805	TAILQ_INIT(&pmap->pm_pvlist);
806	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
807}
808
809/*
810 * Initialize a preallocated and zeroed pmap structure,
811 * such as one in a vmspace structure.
812 */
813void
814pmap_pinit(struct pmap *pmap)
815{
816	int i;
817
818	pmap->pm_flags = 0;
819	for (i = 0; i < 5; i++)
820		pmap->pm_rid[i] = 0;
821	pmap->pm_count = 1;
822	pmap->pm_ptphint = NULL;
823	pmap->pm_active = 0;
824	TAILQ_INIT(&pmap->pm_pvlist);
825	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
826}
827
828/*
829 * Wire in kernel global address entries.  To avoid a race condition
830 * between pmap initialization and pmap_growkernel, this procedure
831 * should be called after the vmspace is attached to the process
832 * but before this pmap is activated.
833 */
834void
835pmap_pinit2(struct pmap *pmap)
836{
837}
838
839/***************************************************
840* Pmap allocation/deallocation routines.
841 ***************************************************/
842
843/*
844 * Release any resources held by the given physical map.
845 * Called when a pmap initialized by pmap_pinit is being released.
846 * Should only be called if the map contains no valid mappings.
847 */
848void
849pmap_release(pmap_t pmap)
850{
851	int i;
852
853#if defined(DIAGNOSTIC)
854	if (object->ref_count != 1)
855		panic("pmap_release: pteobj reference count != 1");
856#endif
857
858	for (i = 0; i < 5; i++)
859		if (pmap->pm_rid[i])
860			pmap_free_rid(pmap->pm_rid[i]);
861}
862
863/*
864 * grow the number of kernel page table entries, if needed
865 */
866void
867pmap_growkernel(vm_offset_t addr)
868{
869	struct ia64_lpte *ptepage;
870	vm_page_t nkpg;
871
872	if (kernel_vm_end == 0) {
873		kernel_vm_end = nkpt * PAGE_SIZE * NKPTEPG
874			+ IA64_RR_BASE(5);
875	}
876	addr = (addr + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
877	while (kernel_vm_end < addr) {
878		if (kptdir[KPTE_DIR_INDEX(kernel_vm_end)]) {
879			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG)
880				& ~(PAGE_SIZE * NKPTEPG - 1);
881			continue;
882		}
883
884		/*
885		 * We could handle more by increasing the size of kptdir.
886		 */
887		if (nkpt == MAXKPT)
888			panic("pmap_growkernel: out of kernel address space");
889
890		/*
891		 * This index is bogus, but out of the way
892		 */
893		nkpg = vm_page_alloc(kptobj, nkpt, VM_ALLOC_SYSTEM);
894		if (!nkpg)
895			panic("pmap_growkernel: no memory to grow kernel");
896
897		nkpt++;
898
899		vm_page_wire(nkpg);
900		ptepage = (struct ia64_lpte *)
901			IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
902		bzero(ptepage, PAGE_SIZE);
903		kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage;
904
905		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
906	}
907}
908
909/*
910 *	Retire the given physical map from service.
911 *	Should only be called if the map contains
912 *	no valid mappings.
913 */
914void
915pmap_destroy(pmap_t pmap)
916{
917	int count;
918
919	if (pmap == NULL)
920		return;
921
922	count = --pmap->pm_count;
923	if (count == 0) {
924		pmap_release(pmap);
925		panic("destroying a pmap is not yet implemented");
926	}
927}
928
929/*
930 *	Add a reference to the specified pmap.
931 */
932void
933pmap_reference(pmap_t pmap)
934{
935	if (pmap != NULL) {
936		pmap->pm_count++;
937	}
938}
939
940/***************************************************
941* page management routines.
942 ***************************************************/
943
944/*
945 * free the pv_entry back to the free list
946 */
947static PMAP_INLINE void
948free_pv_entry(pv_entry_t pv)
949{
950	pv_entry_count--;
951	zfree(pvzone, pv);
952}
953
954/*
955 * get a new pv_entry, allocating a block from the system
956 * when needed.
957 * the memory allocation is performed bypassing the malloc code
958 * because of the possibility of allocations at interrupt time.
959 */
960static pv_entry_t
961get_pv_entry(void)
962{
963	pv_entry_count++;
964	if (pv_entry_high_water &&
965		(pv_entry_count > pv_entry_high_water) &&
966		(pmap_pagedaemon_waken == 0)) {
967		pmap_pagedaemon_waken = 1;
968		wakeup (&vm_pages_needed);
969	}
970	return zalloc(pvzone);
971}
972
973/*
974 * Add an ia64_lpte to the VHPT.
975 */
976static void
977pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
978{
979	struct ia64_lpte *vhpte;
980
981	pmap_vhpt_inserts++;
982	pmap_vhpt_resident++;
983
984	vhpte = (struct ia64_lpte *) ia64_thash(va);
985
986	if (vhpte->pte_chain)
987		pmap_vhpt_collisions++;
988
989	pte->pte_chain = vhpte->pte_chain;
990	vhpte->pte_chain = ia64_tpa((vm_offset_t) pte);
991
992	if (!vhpte->pte_p && pte->pte_p)
993		pmap_install_pte(vhpte, pte);
994	else
995		ia64_mf();
996}
997
998/*
999 * Update VHPT after a pte has changed.
1000 */
1001static void
1002pmap_update_vhpt(struct ia64_lpte *pte, vm_offset_t va)
1003{
1004	struct ia64_lpte *vhpte;
1005
1006	vhpte = (struct ia64_lpte *) ia64_thash(va);
1007
1008	if ((!vhpte->pte_p || vhpte->pte_tag == pte->pte_tag)
1009	    && pte->pte_p)
1010		pmap_install_pte(vhpte, pte);
1011}
1012
1013/*
1014 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
1015 * worked or an appropriate error code otherwise.
1016 */
1017static int
1018pmap_remove_vhpt(vm_offset_t va)
1019{
1020	struct ia64_lpte *pte;
1021	struct ia64_lpte *lpte;
1022	struct ia64_lpte *vhpte;
1023	u_int64_t tag;
1024	int error = ENOENT;
1025
1026	vhpte = (struct ia64_lpte *) ia64_thash(va);
1027
1028	/*
1029	 * If the VHPTE is invalid, there can't be a collision chain.
1030	 */
1031	if (!vhpte->pte_p) {
1032		KASSERT(!vhpte->pte_chain, ("bad vhpte"));
1033		printf("can't remove vhpt entry for 0x%lx\n", va);
1034		goto done;
1035	}
1036
1037	lpte = vhpte;
1038	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(vhpte->pte_chain);
1039	tag = ia64_ttag(va);
1040
1041	while (pte->pte_tag != tag) {
1042		lpte = pte;
1043		if (pte->pte_chain)
1044			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1045		else {
1046			printf("can't remove vhpt entry for 0x%lx\n", va);
1047			goto done;
1048		}
1049	}
1050
1051	/*
1052	 * Snip this pv_entry out of the collision chain.
1053	 */
1054	lpte->pte_chain = pte->pte_chain;
1055
1056	/*
1057	 * If the VHPTE matches as well, change it to map the first
1058	 * element from the chain if there is one.
1059	 */
1060	if (vhpte->pte_tag == tag) {
1061		if (vhpte->pte_chain) {
1062			pte = (struct ia64_lpte *)
1063				IA64_PHYS_TO_RR7(vhpte->pte_chain);
1064			pmap_install_pte(vhpte, pte);
1065		} else {
1066			vhpte->pte_p = 0;
1067			ia64_mf();
1068		}
1069	}
1070
1071	pmap_vhpt_resident--;
1072	error = 0;
1073 done:
1074	return error;
1075}
1076
1077/*
1078 * Find the ia64_lpte for the given va, if any.
1079 */
1080static struct ia64_lpte *
1081pmap_find_vhpt(vm_offset_t va)
1082{
1083	struct ia64_lpte *pte;
1084	u_int64_t tag;
1085
1086	pte = (struct ia64_lpte *) ia64_thash(va);
1087	if (!pte->pte_chain) {
1088		pte = 0;
1089		goto done;
1090	}
1091
1092	tag = ia64_ttag(va);
1093	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1094
1095	while (pte->pte_tag != tag) {
1096		if (pte->pte_chain) {
1097			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1098		} else {
1099			pte = 0;
1100			break;
1101		}
1102	}
1103
1104 done:
1105	return pte;
1106}
1107
1108/*
1109 * Remove an entry from the list of managed mappings.
1110 */
1111static int
1112pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1113{
1114	if (!pv) {
1115		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1116			for (pv = TAILQ_FIRST(&m->md.pv_list);
1117			     pv;
1118			     pv = TAILQ_NEXT(pv, pv_list)) {
1119				if (pmap == pv->pv_pmap && va == pv->pv_va)
1120					break;
1121			}
1122		} else {
1123			for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
1124			     pv;
1125			     pv = TAILQ_NEXT(pv, pv_plist)) {
1126				if (va == pv->pv_va)
1127					break;
1128			}
1129		}
1130	}
1131
1132	if (pv) {
1133		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1134		m->md.pv_list_count--;
1135		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1136			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
1137
1138		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1139		free_pv_entry(pv);
1140		return 0;
1141	} else {
1142		return ENOENT;
1143	}
1144}
1145
1146/*
1147 * Create a pv entry for page at pa for
1148 * (pmap, va).
1149 */
1150static void
1151pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1152{
1153	pv_entry_t pv;
1154
1155	pv = get_pv_entry();
1156	pv->pv_pmap = pmap;
1157	pv->pv_va = va;
1158
1159	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1160	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1161	m->md.pv_list_count++;
1162}
1163
1164/*
1165 *	Routine:	pmap_extract
1166 *	Function:
1167 *		Extract the physical page address associated
1168 *		with the given map/virtual_address pair.
1169 */
1170vm_offset_t
1171pmap_extract(pmap, va)
1172	register pmap_t pmap;
1173	vm_offset_t va;
1174{
1175	pmap_t oldpmap;
1176	vm_offset_t pa;
1177
1178	oldpmap = pmap_install(pmap);
1179	pa = ia64_tpa(va);
1180	pmap_install(oldpmap);
1181	return pa;
1182}
1183
1184/***************************************************
1185 * Low level mapping routines.....
1186 ***************************************************/
1187
1188/*
1189 * Find the kernel lpte for mapping the given virtual address, which
1190 * must be in the part of region 5 which we can cover with our kernel
1191 * 'page tables'.
1192 */
1193static struct ia64_lpte *
1194pmap_find_kpte(vm_offset_t va)
1195{
1196	KASSERT((va >> 61) == 5,
1197		("kernel mapping 0x%lx not in region 5", va));
1198	KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG),
1199		("kernel mapping 0x%lx out of range", va));
1200	return &kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)];
1201}
1202
1203/*
1204 * Find a pte suitable for mapping a user-space address. If one exists
1205 * in the VHPT, that one will be returned, otherwise a new pte is
1206 * allocated.
1207 */
1208static struct ia64_lpte *
1209pmap_find_pte(vm_offset_t va)
1210{
1211	struct ia64_lpte *pte;
1212
1213	if (va >= VM_MAXUSER_ADDRESS)
1214		return pmap_find_kpte(va);
1215
1216	pte = pmap_find_vhpt(va);
1217	if (!pte) {
1218		pte = zalloc(ptezone);
1219		pte->pte_p = 0;
1220	}
1221	return pte;
1222}
1223
1224/*
1225 * Free a pte which is now unused. This simply returns it to the zone
1226 * allocator if it is a user mapping. For kernel mappings, clear the
1227 * valid bit to make it clear that the mapping is not currently used.
1228 */
1229static void
1230pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1231{
1232	if (va < VM_MAXUSER_ADDRESS)
1233		zfree(ptezone, pte);
1234	else
1235		pte->pte_p = 0;
1236}
1237
1238/*
1239 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1240 * the pte was orginally valid, then its assumed to already be in the
1241 * VHPT.
1242 */
1243static void
1244pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1245	     int ig, int pl, int ar)
1246{
1247	int wasvalid = pte->pte_p;
1248
1249	pte->pte_p = 1;
1250	pte->pte_ma = PTE_MA_WB;
1251	if (ig & PTE_IG_MANAGED) {
1252		pte->pte_a = 0;
1253		pte->pte_d = 0;
1254	} else {
1255		pte->pte_a = 1;
1256		pte->pte_d = 1;
1257	}
1258	pte->pte_pl = pl;
1259	pte->pte_ar = ar;
1260	pte->pte_ppn = pa >> 12;
1261	pte->pte_ed = 0;
1262	pte->pte_ig = ig;
1263
1264	pte->pte_ps = PAGE_SHIFT;
1265	pte->pte_key = 0;
1266
1267	pte->pte_tag = ia64_ttag(va);
1268
1269	if (wasvalid) {
1270		pmap_update_vhpt(pte, va);
1271	} else {
1272		pmap_enter_vhpt(pte, va);
1273	}
1274}
1275
1276/*
1277 * If a pte contains a valid mapping, clear it and update the VHPT.
1278 */
1279static void
1280pmap_clear_pte(struct ia64_lpte *pte, vm_offset_t va)
1281{
1282	if (pte->pte_p) {
1283		pmap_remove_vhpt(va);
1284		ia64_ptc_l(va, PAGE_SHIFT << 2);
1285		pte->pte_p = 0;
1286	}
1287}
1288
1289/*
1290 * Remove the (possibly managed) mapping represented by pte from the
1291 * given pmap.
1292 */
1293static int
1294pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1295		pv_entry_t pv, int freepte)
1296{
1297	int error;
1298	vm_page_t m;
1299
1300	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1301		("removing pte for non-current pmap"));
1302
1303	/*
1304	 * First remove from the VHPT.
1305	 */
1306	error = pmap_remove_vhpt(va);
1307	if (error)
1308		return error;
1309
1310	/*
1311	 * Make sure pmap_set_pte() knows it isn't in the VHPT.
1312	 */
1313	pte->pte_p = 0;
1314
1315	if (pte->pte_ig & PTE_IG_WIRED)
1316		pmap->pm_stats.wired_count -= 1;
1317
1318	pmap->pm_stats.resident_count -= 1;
1319	if (pte->pte_ig & PTE_IG_MANAGED) {
1320		m = PHYS_TO_VM_PAGE(pmap_pte_pa(pte));
1321		if (pte->pte_d)
1322			if (pmap_track_modified(va))
1323				vm_page_dirty(m);
1324		if (pte->pte_a)
1325			vm_page_flag_set(m, PG_REFERENCED);
1326
1327		if (freepte)
1328			pmap_free_pte(pte, va);
1329		return pmap_remove_entry(pmap, m, va, pv);
1330	} else {
1331		if (freepte)
1332			pmap_free_pte(pte, va);
1333		return 0;
1334	}
1335}
1336
1337/*
1338 * Add a list of wired pages to the kva
1339 * this routine is only used for temporary
1340 * kernel mappings that do not need to have
1341 * page modification or references recorded.
1342 * Note that old mappings are simply written
1343 * over.  The page *must* be wired.
1344 */
1345void
1346pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1347{
1348	int i;
1349	struct ia64_lpte *pte;
1350
1351	for (i = 0; i < count; i++) {
1352		vm_offset_t tva = va + i * PAGE_SIZE;
1353		int wasvalid;
1354		pte = pmap_find_kpte(tva);
1355		wasvalid = pte->pte_p;
1356		pmap_set_pte(pte, tva, VM_PAGE_TO_PHYS(m[i]),
1357			     0, PTE_PL_KERN, PTE_AR_RWX);
1358		if (wasvalid)
1359			ia64_ptc_l(tva, PAGE_SHIFT << 2);
1360	}
1361}
1362
1363/*
1364 * this routine jerks page mappings from the
1365 * kernel -- it is meant only for temporary mappings.
1366 */
1367void
1368pmap_qremove(vm_offset_t va, int count)
1369{
1370	int i;
1371	struct ia64_lpte *pte;
1372
1373	for (i = 0; i < count; i++) {
1374		pte = pmap_find_kpte(va);
1375		pmap_clear_pte(pte, va);
1376		va += PAGE_SIZE;
1377	}
1378}
1379
1380/*
1381 * Add a wired page to the kva.
1382 */
1383void
1384pmap_kenter(vm_offset_t va, vm_offset_t pa)
1385{
1386	struct ia64_lpte *pte;
1387	int wasvalid;
1388
1389	pte = pmap_find_kpte(va);
1390	wasvalid = pte->pte_p;
1391	pmap_set_pte(pte, va, pa, 0, PTE_PL_KERN, PTE_AR_RWX);
1392	if (wasvalid)
1393		ia64_ptc_l(va, PAGE_SHIFT << 2);
1394}
1395
1396/*
1397 * Remove a page from the kva
1398 */
1399void
1400pmap_kremove(vm_offset_t va)
1401{
1402	struct ia64_lpte *pte;
1403
1404	pte = pmap_find_kpte(va);
1405	pmap_clear_pte(pte, va);
1406}
1407
1408/*
1409 *	Used to map a range of physical addresses into kernel
1410 *	virtual address space.
1411 *
1412 *	The value passed in '*virt' is a suggested virtual address for
1413 *	the mapping. Architectures which can support a direct-mapped
1414 *	physical to virtual region can return the appropriate address
1415 *	within that region, leaving '*virt' unchanged. Other
1416 *	architectures should map the pages starting at '*virt' and
1417 *	update '*virt' with the first usable address after the mapped
1418 *	region.
1419 */
1420vm_offset_t
1421pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1422{
1423	return IA64_PHYS_TO_RR7(start);
1424}
1425
1426/*
1427 * This routine is very drastic, but can save the system
1428 * in a pinch.
1429 */
1430void
1431pmap_collect()
1432{
1433	int i;
1434	vm_page_t m;
1435	static int warningdone = 0;
1436
1437	if (pmap_pagedaemon_waken == 0)
1438		return;
1439
1440	if (warningdone < 5) {
1441		printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
1442		warningdone++;
1443	}
1444
1445	for(i = 0; i < vm_page_array_size; i++) {
1446		m = &vm_page_array[i];
1447		if (m->wire_count || m->hold_count || m->busy ||
1448		    (m->flags & (PG_BUSY | PG_UNMANAGED)))
1449			continue;
1450		pmap_remove_all(m);
1451	}
1452	pmap_pagedaemon_waken = 0;
1453}
1454
1455/*
1456 * Remove a single page from a process address space
1457 */
1458static void
1459pmap_remove_page(pmap_t pmap, vm_offset_t va)
1460{
1461	struct ia64_lpte *pte;
1462
1463	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1464		("removing page for non-current pmap"));
1465
1466	pte = pmap_find_vhpt(va);
1467	if (pte) {
1468		pmap_remove_pte(pmap, pte, va, 0, 1);
1469		pmap_invalidate_page(pmap, va);
1470	}
1471	return;
1472}
1473
1474/*
1475 *	Remove the given range of addresses from the specified map.
1476 *
1477 *	It is assumed that the start and end are properly
1478 *	rounded to the page size.
1479 */
1480void
1481pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1482{
1483	pmap_t oldpmap;
1484	vm_offset_t va;
1485	pv_entry_t pv;
1486	struct ia64_lpte *pte;
1487
1488	if (pmap == NULL)
1489		return;
1490
1491	if (pmap->pm_stats.resident_count == 0)
1492		return;
1493
1494	oldpmap = pmap_install(pmap);
1495
1496	/*
1497	 * special handling of removing one page.  a very
1498	 * common operation and easy to short circuit some
1499	 * code.
1500	 */
1501	if (sva + PAGE_SIZE == eva) {
1502		pmap_remove_page(pmap, sva);
1503		pmap_install(oldpmap);
1504		return;
1505	}
1506
1507	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1508		for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
1509		     pv;
1510		     pv = TAILQ_NEXT(pv, pv_plist)) {
1511			va = pv->pv_va;
1512			if (va >= sva && va < eva) {
1513				pte = pmap_find_vhpt(va);
1514				pmap_remove_pte(pmap, pte, va, pv, 1);
1515				pmap_invalidate_page(pmap, va);
1516			}
1517		}
1518
1519	} else {
1520		for (va = sva; va < eva; va = va += PAGE_SIZE) {
1521			pte = pmap_find_vhpt(va);
1522			if (pte) {
1523				pmap_remove_pte(pmap, pte, va, 0, 1);
1524				pmap_invalidate_page(pmap, va);
1525			}
1526		}
1527	}
1528
1529	pmap_install(oldpmap);
1530}
1531
1532/*
1533 *	Routine:	pmap_remove_all
1534 *	Function:
1535 *		Removes this physical page from
1536 *		all physical maps in which it resides.
1537 *		Reflects back modify bits to the pager.
1538 *
1539 *	Notes:
1540 *		Original versions of this routine were very
1541 *		inefficient because they iteratively called
1542 *		pmap_remove (slow...)
1543 */
1544
1545static void
1546pmap_remove_all(vm_page_t m)
1547{
1548	pmap_t oldpmap;
1549	pv_entry_t pv;
1550	int nmodify;
1551	int s;
1552
1553	nmodify = 0;
1554#if defined(PMAP_DIAGNOSTIC)
1555	/*
1556	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1557	 * pages!
1558	 */
1559	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1560		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1561	}
1562#endif
1563
1564	s = splvm();
1565
1566	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1567		struct ia64_lpte *pte;
1568		pmap_t pmap = pv->pv_pmap;
1569		vm_offset_t va = pv->pv_va;
1570
1571		oldpmap = pmap_install(pmap);
1572		pte = pmap_find_vhpt(va);
1573		if (pmap_pte_pa(pte) != VM_PAGE_TO_PHYS(m))
1574			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1575		pmap_remove_pte(pmap, pte, va, pv, 1);
1576		pmap_invalidate_page(pmap, va);
1577		pmap_install(oldpmap);
1578	}
1579
1580	vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
1581
1582	splx(s);
1583	return;
1584}
1585
1586/*
1587 *	Set the physical protection on the
1588 *	specified range of this map as requested.
1589 */
1590void
1591pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1592{
1593	pmap_t oldpmap;
1594	struct ia64_lpte *pte;
1595	int newprot;
1596
1597	if (pmap == NULL)
1598		return;
1599
1600	oldpmap = pmap_install(pmap);
1601
1602	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1603		pmap_remove(pmap, sva, eva);
1604		pmap_install(oldpmap);
1605		return;
1606	}
1607
1608	if (prot & VM_PROT_WRITE) {
1609		pmap_install(oldpmap);
1610		return;
1611	}
1612
1613	newprot = pte_prot(pmap, prot);
1614
1615	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1616		panic("pmap_protect: unaligned addresses");
1617
1618	while (sva < eva) {
1619		/*
1620		 * If page is invalid, skip this page
1621		 */
1622		pte = pmap_find_vhpt(sva);
1623		if (!pte) {
1624			sva += PAGE_SIZE;
1625			continue;
1626		}
1627
1628		if (pmap_pte_prot(pte) != newprot) {
1629			if (pte->pte_ig & PTE_IG_MANAGED) {
1630				vm_offset_t pa = pmap_pte_pa(pte);
1631				vm_page_t m = PHYS_TO_VM_PAGE(pa);
1632				if (pte->pte_d) {
1633					if (pmap_track_modified(sva))
1634						vm_page_dirty(m);
1635					pte->pte_d = 0;
1636				}
1637				if (pte->pte_a) {
1638					vm_page_flag_set(m, PG_REFERENCED);
1639					pte->pte_a = 0;
1640				}
1641			}
1642			pmap_pte_set_prot(pte, newprot);
1643			pmap_update_vhpt(pte, sva);
1644			pmap_invalidate_page(pmap, sva);
1645		}
1646
1647		sva += PAGE_SIZE;
1648	}
1649	pmap_install(oldpmap);
1650}
1651
1652/*
1653 *	Insert the given physical page (p) at
1654 *	the specified virtual address (v) in the
1655 *	target physical map with the protection requested.
1656 *
1657 *	If specified, the page will be wired down, meaning
1658 *	that the related pte can not be reclaimed.
1659 *
1660 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1661 *	or lose information.  That is, this routine must actually
1662 *	insert this page into the given map NOW.
1663 */
1664void
1665pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1666	   boolean_t wired)
1667{
1668	pmap_t oldpmap;
1669	vm_offset_t pa;
1670	vm_offset_t opa;
1671	struct ia64_lpte origpte;
1672	struct ia64_lpte *pte;
1673	int managed;
1674
1675	if (pmap == NULL)
1676		return;
1677
1678	pmap_ensure_rid(pmap, va);
1679
1680	oldpmap = pmap_install(pmap);
1681
1682	va &= ~PAGE_MASK;
1683#ifdef PMAP_DIAGNOSTIC
1684	if (va > VM_MAX_KERNEL_ADDRESS)
1685		panic("pmap_enter: toobig");
1686#endif
1687
1688	/*
1689	 * Find (or create) a pte for the given mapping.
1690	 */
1691	pte = pmap_find_pte(va);
1692	origpte = *pte;
1693
1694	if (origpte.pte_p)
1695		opa = pmap_pte_pa(&origpte);
1696	else
1697		opa = 0;
1698	managed = 0;
1699
1700	pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK;
1701
1702	/*
1703	 * Mapping has not changed, must be protection or wiring change.
1704	 */
1705	if (origpte.pte_p && (opa == pa)) {
1706		/*
1707		 * Wiring change, just update stats. We don't worry about
1708		 * wiring PT pages as they remain resident as long as there
1709		 * are valid mappings in them. Hence, if a user page is wired,
1710		 * the PT page will be also.
1711		 */
1712		if (wired && ((origpte.pte_ig & PTE_IG_WIRED) == 0))
1713			pmap->pm_stats.wired_count++;
1714		else if (!wired && (origpte.pte_ig & PTE_IG_WIRED))
1715			pmap->pm_stats.wired_count--;
1716
1717		/*
1718		 * We might be turning off write access to the page,
1719		 * so we go ahead and sense modify status.
1720		 */
1721		if (origpte.pte_ig & PTE_IG_MANAGED) {
1722			if (origpte.pte_d && pmap_track_modified(va)) {
1723				vm_page_t om;
1724				om = PHYS_TO_VM_PAGE(opa);
1725				vm_page_dirty(om);
1726			}
1727		}
1728
1729		managed = origpte.pte_ig & PTE_IG_MANAGED;
1730		goto validate;
1731	}
1732	/*
1733	 * Mapping has changed, invalidate old range and fall
1734	 * through to handle validating new mapping.
1735	 */
1736	if (opa) {
1737		int error;
1738		error = pmap_remove_pte(pmap, pte, va, 0, 0);
1739		if (error)
1740			panic("pmap_enter: pte vanished, va: 0x%lx", va);
1741	}
1742
1743	/*
1744	 * Enter on the PV list if part of our managed memory.
1745	 */
1746	if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
1747		pmap_insert_entry(pmap, va, m);
1748		managed |= PTE_IG_MANAGED;
1749	}
1750
1751	/*
1752	 * Increment counters
1753	 */
1754	pmap->pm_stats.resident_count++;
1755	if (wired)
1756		pmap->pm_stats.wired_count++;
1757
1758validate:
1759
1760	/*
1761	 * Now validate mapping with desired protection/wiring. This
1762	 * adds the pte to the VHPT if necessary.
1763	 */
1764	pmap_set_pte(pte, va, pa, managed | (wired ? PTE_IG_WIRED : 0),
1765		     pte_prot_pl(pmap, prot), pte_prot_ar(pmap, prot));
1766
1767	/*
1768	 * if the mapping or permission bits are different, we need
1769	 * to invalidate the page.
1770	 */
1771	if (!pmap_equal_pte(&origpte, pte))
1772		pmap_invalidate_page(pmap, va);
1773
1774	pmap_install(oldpmap);
1775}
1776
1777/*
1778 * this code makes some *MAJOR* assumptions:
1779 * 1. Current pmap & pmap exists.
1780 * 2. Not wired.
1781 * 3. Read access.
1782 * 4. No page table pages.
1783 * 5. Tlbflush is deferred to calling procedure.
1784 * 6. Page IS managed.
1785 * but is *MUCH* faster than pmap_enter...
1786 */
1787
1788static void
1789pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
1790{
1791	struct ia64_lpte *pte;
1792	pmap_t oldpmap;
1793
1794	pmap_ensure_rid(pmap, va);
1795
1796	oldpmap = pmap_install(pmap);
1797
1798	pte = pmap_find_pte(va);
1799	if (pte->pte_p)
1800		return;
1801
1802	/*
1803	 * Enter on the PV list since its part of our managed memory.
1804	 */
1805	pmap_insert_entry(pmap, va, m);
1806
1807	/*
1808	 * Increment counters
1809	 */
1810	pmap->pm_stats.resident_count++;
1811
1812	/*
1813	 * Initialise PTE with read-only protection and enter into VHPT.
1814	 */
1815	pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m),
1816		     PTE_IG_MANAGED,
1817		     PTE_PL_USER, PTE_AR_R);
1818
1819	pmap_install(oldpmap);
1820}
1821
1822/*
1823 * Make temporary mapping for a physical address. This is called
1824 * during dump.
1825 */
1826void *
1827pmap_kenter_temporary(vm_offset_t pa, int i)
1828{
1829	return (void *) IA64_PHYS_TO_RR7(pa - (i * PAGE_SIZE));
1830}
1831
1832#define MAX_INIT_PT (96)
1833/*
1834 * pmap_object_init_pt preloads the ptes for a given object
1835 * into the specified pmap.  This eliminates the blast of soft
1836 * faults on process startup and immediately after an mmap.
1837 */
1838void
1839pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1840		    vm_object_t object, vm_pindex_t pindex,
1841		    vm_size_t size, int limit)
1842{
1843	pmap_t oldpmap;
1844	vm_offset_t tmpidx;
1845	int psize;
1846	vm_page_t p;
1847	int objpgs;
1848
1849	if (pmap == NULL || object == NULL)
1850		return;
1851
1852	oldpmap = pmap_install(pmap);
1853
1854	psize = ia64_btop(size);
1855
1856	if ((object->type != OBJT_VNODE) ||
1857		((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
1858			(object->resident_page_count > MAX_INIT_PT))) {
1859		pmap_install(oldpmap);
1860		return;
1861	}
1862
1863	if (psize + pindex > object->size) {
1864		if (object->size < pindex)
1865			return;
1866		psize = object->size - pindex;
1867	}
1868
1869	/*
1870	 * if we are processing a major portion of the object, then scan the
1871	 * entire thing.
1872	 */
1873	if (psize > (object->resident_page_count >> 2)) {
1874		objpgs = psize;
1875
1876		for (p = TAILQ_FIRST(&object->memq);
1877		    ((objpgs > 0) && (p != NULL));
1878		    p = TAILQ_NEXT(p, listq)) {
1879
1880			tmpidx = p->pindex;
1881			if (tmpidx < pindex) {
1882				continue;
1883			}
1884			tmpidx -= pindex;
1885			if (tmpidx >= psize) {
1886				continue;
1887			}
1888			/*
1889			 * don't allow an madvise to blow away our really
1890			 * free pages allocating pv entries.
1891			 */
1892			if ((limit & MAP_PREFAULT_MADVISE) &&
1893			    cnt.v_free_count < cnt.v_free_reserved) {
1894				break;
1895			}
1896			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1897				(p->busy == 0) &&
1898			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1899				if ((p->queue - p->pc) == PQ_CACHE)
1900					vm_page_deactivate(p);
1901				vm_page_busy(p);
1902				pmap_enter_quick(pmap,
1903						 addr + ia64_ptob(tmpidx), p);
1904				vm_page_flag_set(p, PG_MAPPED);
1905				vm_page_wakeup(p);
1906			}
1907			objpgs -= 1;
1908		}
1909	} else {
1910		/*
1911		 * else lookup the pages one-by-one.
1912		 */
1913		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
1914			/*
1915			 * don't allow an madvise to blow away our really
1916			 * free pages allocating pv entries.
1917			 */
1918			if ((limit & MAP_PREFAULT_MADVISE) &&
1919			    cnt.v_free_count < cnt.v_free_reserved) {
1920				break;
1921			}
1922			p = vm_page_lookup(object, tmpidx + pindex);
1923			if (p &&
1924			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1925				(p->busy == 0) &&
1926			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1927				if ((p->queue - p->pc) == PQ_CACHE)
1928					vm_page_deactivate(p);
1929				vm_page_busy(p);
1930				pmap_enter_quick(pmap,
1931						 addr + ia64_ptob(tmpidx), p);
1932				vm_page_flag_set(p, PG_MAPPED);
1933				vm_page_wakeup(p);
1934			}
1935		}
1936	}
1937	pmap_install(oldpmap);
1938	return;
1939}
1940
1941/*
1942 * pmap_prefault provides a quick way of clustering
1943 * pagefaults into a processes address space.  It is a "cousin"
1944 * of pmap_object_init_pt, except it runs at page fault time instead
1945 * of mmap time.
1946 */
1947#define PFBAK 4
1948#define PFFOR 4
1949#define PAGEORDER_SIZE (PFBAK+PFFOR)
1950
1951static int pmap_prefault_pageorder[] = {
1952	-PAGE_SIZE, PAGE_SIZE,
1953	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
1954	-3 * PAGE_SIZE, 3 * PAGE_SIZE
1955	-4 * PAGE_SIZE, 4 * PAGE_SIZE
1956};
1957
1958void
1959pmap_prefault(pmap, addra, entry)
1960	pmap_t pmap;
1961	vm_offset_t addra;
1962	vm_map_entry_t entry;
1963{
1964	int i;
1965	vm_offset_t starta;
1966	vm_offset_t addr;
1967	vm_pindex_t pindex;
1968	vm_page_t m, mpte;
1969	vm_object_t object;
1970
1971	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)))
1972		return;
1973
1974	object = entry->object.vm_object;
1975
1976	starta = addra - PFBAK * PAGE_SIZE;
1977	if (starta < entry->start) {
1978		starta = entry->start;
1979	} else if (starta > addra) {
1980		starta = 0;
1981	}
1982
1983	mpte = NULL;
1984	for (i = 0; i < PAGEORDER_SIZE; i++) {
1985		vm_object_t lobject;
1986		struct ia64_lpte *pte;
1987
1988		addr = addra + pmap_prefault_pageorder[i];
1989		if (addr > addra + (PFFOR * PAGE_SIZE))
1990			addr = 0;
1991
1992		if (addr < starta || addr >= entry->end)
1993			continue;
1994
1995		pte = pmap_find_vhpt(addr);
1996		if (pte && pte->pte_p)
1997			continue;
1998
1999		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
2000		lobject = object;
2001		for (m = vm_page_lookup(lobject, pindex);
2002		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
2003		    lobject = lobject->backing_object) {
2004			if (lobject->backing_object_offset & PAGE_MASK)
2005				break;
2006			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
2007			m = vm_page_lookup(lobject->backing_object, pindex);
2008		}
2009
2010		/*
2011		 * give-up when a page is not in memory
2012		 */
2013		if (m == NULL)
2014			break;
2015
2016		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2017			(m->busy == 0) &&
2018		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2019
2020			if ((m->queue - m->pc) == PQ_CACHE) {
2021				vm_page_deactivate(m);
2022			}
2023			vm_page_busy(m);
2024			pmap_enter_quick(pmap, addr, m);
2025			vm_page_flag_set(m, PG_MAPPED);
2026			vm_page_wakeup(m);
2027		}
2028	}
2029}
2030
2031/*
2032 *	Routine:	pmap_change_wiring
2033 *	Function:	Change the wiring attribute for a map/virtual-address
2034 *			pair.
2035 *	In/out conditions:
2036 *			The mapping must already exist in the pmap.
2037 */
2038void
2039pmap_change_wiring(pmap, va, wired)
2040	register pmap_t pmap;
2041	vm_offset_t va;
2042	boolean_t wired;
2043{
2044	pmap_t oldpmap;
2045	struct ia64_lpte *pte;
2046
2047	if (pmap == NULL)
2048		return;
2049
2050	oldpmap = pmap_install(pmap);
2051
2052	pte = pmap_find_vhpt(va);
2053
2054	if (wired && !pmap_pte_w(pte))
2055		pmap->pm_stats.wired_count++;
2056	else if (!wired && pmap_pte_w(pte))
2057		pmap->pm_stats.wired_count--;
2058
2059	/*
2060	 * Wiring is not a hardware characteristic so there is no need to
2061	 * invalidate TLB.
2062	 */
2063	pmap_pte_set_w(pte, wired);
2064
2065	pmap_install(oldpmap);
2066}
2067
2068
2069
2070/*
2071 *	Copy the range specified by src_addr/len
2072 *	from the source map to the range dst_addr/len
2073 *	in the destination map.
2074 *
2075 *	This routine is only advisory and need not do anything.
2076 */
2077
2078void
2079pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2080	  vm_offset_t src_addr)
2081{
2082}
2083
2084/*
2085 *	Routine:	pmap_kernel
2086 *	Function:
2087 *		Returns the physical map handle for the kernel.
2088 */
2089pmap_t
2090pmap_kernel()
2091{
2092	return (kernel_pmap);
2093}
2094
2095/*
2096 *	pmap_zero_page zeros the specified hardware page by
2097 *	mapping it into virtual memory and using bzero to clear
2098 *	its contents.
2099 */
2100
2101void
2102pmap_zero_page(vm_offset_t pa)
2103{
2104	vm_offset_t va = IA64_PHYS_TO_RR7(pa);
2105	bzero((caddr_t) va, PAGE_SIZE);
2106}
2107
2108
2109/*
2110 *	pmap_zero_page_area zeros the specified hardware page by
2111 *	mapping it into virtual memory and using bzero to clear
2112 *	its contents.
2113 *
2114 *	off and size must reside within a single page.
2115 */
2116
2117void
2118pmap_zero_page_area(vm_offset_t pa, int off, int size)
2119{
2120	vm_offset_t va = IA64_PHYS_TO_RR7(pa);
2121	bzero((char *)(caddr_t)va + off, size);
2122}
2123
2124/*
2125 *	pmap_copy_page copies the specified (machine independent)
2126 *	page by mapping the page into virtual memory and using
2127 *	bcopy to copy the page, one machine dependent page at a
2128 *	time.
2129 */
2130void
2131pmap_copy_page(vm_offset_t src, vm_offset_t dst)
2132{
2133	src = IA64_PHYS_TO_RR7(src);
2134	dst = IA64_PHYS_TO_RR7(dst);
2135	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
2136}
2137
2138
2139/*
2140 *	Routine:	pmap_pageable
2141 *	Function:
2142 *		Make the specified pages (by pmap, offset)
2143 *		pageable (or not) as requested.
2144 *
2145 *		A page which is not pageable may not take
2146 *		a fault; therefore, its page table entry
2147 *		must remain valid for the duration.
2148 *
2149 *		This routine is merely advisory; pmap_enter
2150 *		will specify that these pages are to be wired
2151 *		down (or not) as appropriate.
2152 */
2153void
2154pmap_pageable(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
2155	      boolean_t pageable)
2156{
2157}
2158
2159/*
2160 * this routine returns true if a physical page resides
2161 * in the given pmap.
2162 */
2163boolean_t
2164pmap_page_exists(pmap_t pmap, vm_page_t m)
2165{
2166	register pv_entry_t pv;
2167	int s;
2168
2169	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2170		return FALSE;
2171
2172	s = splvm();
2173
2174	/*
2175	 * Not found, check current mappings returning immediately if found.
2176	 */
2177	for (pv = TAILQ_FIRST(&m->md.pv_list);
2178		pv;
2179		pv = TAILQ_NEXT(pv, pv_list)) {
2180		if (pv->pv_pmap == pmap) {
2181			splx(s);
2182			return TRUE;
2183		}
2184	}
2185	splx(s);
2186	return (FALSE);
2187}
2188
2189#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2190/*
2191 * Remove all pages from specified address space
2192 * this aids process exit speeds.  Also, this code
2193 * is special cased for current process only, but
2194 * can have the more generic (and slightly slower)
2195 * mode enabled.  This is much faster than pmap_remove
2196 * in the case of running down an entire address space.
2197 */
2198void
2199pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2200{
2201	pv_entry_t pv, npv;
2202	int s;
2203
2204#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2205	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
2206		printf("warning: pmap_remove_pages called with non-current pmap\n");
2207		return;
2208	}
2209#endif
2210
2211	s = splvm();
2212	for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
2213		pv;
2214		pv = npv) {
2215		struct ia64_lpte *pte;
2216
2217		npv = TAILQ_NEXT(pv, pv_plist);
2218
2219		if (pv->pv_va >= eva || pv->pv_va < sva) {
2220			continue;
2221		}
2222
2223		pte = pmap_find_vhpt(pv->pv_va);
2224		if (!pte)
2225			panic("pmap_remove_pages: page on pm_pvlist has no pte\n");
2226
2227
2228/*
2229 * We cannot remove wired pages from a process' mapping at this time
2230 */
2231		if (pte->pte_ig & PTE_IG_WIRED) {
2232			continue;
2233		}
2234
2235		pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
2236	}
2237	splx(s);
2238
2239	pmap_invalidate_all(pmap);
2240}
2241
2242/*
2243 *      pmap_page_protect:
2244 *
2245 *      Lower the permission for all mappings to a given page.
2246 */
2247void
2248pmap_page_protect(vm_page_t m, vm_prot_t prot)
2249{
2250	pv_entry_t pv;
2251
2252	if ((prot & VM_PROT_WRITE) != 0)
2253		return;
2254	if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
2255		for (pv = TAILQ_FIRST(&m->md.pv_list);
2256		     pv;
2257		     pv = TAILQ_NEXT(pv, pv_list)) {
2258			int newprot = pte_prot(pv->pv_pmap, prot);
2259			pmap_t oldpmap = pmap_install(pv->pv_pmap);
2260			struct ia64_lpte *pte;
2261			pte = pmap_find_vhpt(pv->pv_va);
2262			pmap_pte_set_prot(pte, newprot);
2263			pmap_update_vhpt(pte, pv->pv_va);
2264			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2265			pmap_install(oldpmap);
2266		}
2267	} else {
2268		pmap_remove_all(m);
2269	}
2270}
2271
2272vm_offset_t
2273pmap_phys_address(int ppn)
2274{
2275	return (ia64_ptob(ppn));
2276}
2277
2278/*
2279 *	pmap_ts_referenced:
2280 *
2281 *	Return the count of reference bits for a page, clearing all of them.
2282 *
2283 */
2284int
2285pmap_ts_referenced(vm_page_t m)
2286{
2287	pv_entry_t pv;
2288	int count = 0;
2289
2290	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2291		return 0;
2292
2293	for (pv = TAILQ_FIRST(&m->md.pv_list);
2294		pv;
2295		pv = TAILQ_NEXT(pv, pv_list)) {
2296		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2297		struct ia64_lpte *pte;
2298		pte = pmap_find_vhpt(pv->pv_va);
2299		if (pte->pte_a) {
2300			count++;
2301			pte->pte_a = 0;
2302			pmap_update_vhpt(pte, pv->pv_va);
2303			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2304		}
2305		pmap_install(oldpmap);
2306	}
2307
2308	return count;
2309}
2310
2311#if 0
2312/*
2313 *	pmap_is_referenced:
2314 *
2315 *	Return whether or not the specified physical page was referenced
2316 *	in any physical maps.
2317 */
2318static boolean_t
2319pmap_is_referenced(vm_page_t m)
2320{
2321	pv_entry_t pv;
2322
2323	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2324		return FALSE;
2325
2326	for (pv = TAILQ_FIRST(&m->md.pv_list);
2327		pv;
2328		pv = TAILQ_NEXT(pv, pv_list)) {
2329		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2330		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2331		pmap_install(oldpmap);
2332		if (pte->pte_a)
2333			return 1;
2334	}
2335
2336	return 0;
2337}
2338#endif
2339
2340/*
2341 *	pmap_is_modified:
2342 *
2343 *	Return whether or not the specified physical page was modified
2344 *	in any physical maps.
2345 */
2346boolean_t
2347pmap_is_modified(vm_page_t m)
2348{
2349	pv_entry_t pv;
2350
2351	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2352		return FALSE;
2353
2354	for (pv = TAILQ_FIRST(&m->md.pv_list);
2355		pv;
2356		pv = TAILQ_NEXT(pv, pv_list)) {
2357		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2358		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2359		pmap_install(oldpmap);
2360		if (pte->pte_d)
2361			return 1;
2362	}
2363
2364	return 0;
2365}
2366
2367/*
2368 *	Clear the modify bits on the specified physical page.
2369 */
2370void
2371pmap_clear_modify(vm_page_t m)
2372{
2373	pv_entry_t pv;
2374
2375	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2376		return;
2377
2378	for (pv = TAILQ_FIRST(&m->md.pv_list);
2379		pv;
2380		pv = TAILQ_NEXT(pv, pv_list)) {
2381		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2382		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2383		if (pte->pte_d) {
2384			pte->pte_d = 0;
2385			pmap_update_vhpt(pte, pv->pv_va);
2386			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2387		}
2388		pmap_install(oldpmap);
2389	}
2390}
2391
2392/*
2393 *	pmap_clear_reference:
2394 *
2395 *	Clear the reference bit on the specified physical page.
2396 */
2397void
2398pmap_clear_reference(vm_page_t m)
2399{
2400	pv_entry_t pv;
2401
2402	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2403		return;
2404
2405	for (pv = TAILQ_FIRST(&m->md.pv_list);
2406		pv;
2407		pv = TAILQ_NEXT(pv, pv_list)) {
2408		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2409		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2410		if (pte->pte_a) {
2411			pte->pte_a = 0;
2412			pmap_update_vhpt(pte, pv->pv_va);
2413			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2414		}
2415		pmap_install(oldpmap);
2416	}
2417}
2418
2419/*
2420 * Miscellaneous support routines follow
2421 */
2422
2423static void
2424ia64_protection_init()
2425{
2426	int prot, *kp, *up;
2427
2428	kp = protection_codes[0];
2429	up = protection_codes[1];
2430
2431	for (prot = 0; prot < 8; prot++) {
2432		switch (prot) {
2433		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
2434			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2435			*up++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2436			break;
2437
2438		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
2439			*kp++ = (PTE_AR_X_RX << 2) | PTE_PL_KERN;
2440			*up++ = (PTE_AR_X_RX << 2) | PTE_PL_USER;
2441			break;
2442
2443		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
2444			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2445			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2446			break;
2447
2448		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
2449			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2450			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2451			break;
2452
2453		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
2454			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2455			*up++ = (PTE_AR_R << 2) | PTE_PL_USER;
2456			break;
2457
2458		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
2459			*kp++ = (PTE_AR_RX << 2) | PTE_PL_KERN;
2460			*up++ = (PTE_AR_RX << 2) | PTE_PL_USER;
2461			break;
2462
2463		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
2464			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2465			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2466			break;
2467
2468		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
2469			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2470			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2471			break;
2472		}
2473	}
2474}
2475
2476/*
2477 * Map a set of physical memory pages into the kernel virtual
2478 * address space. Return a pointer to where it is mapped. This
2479 * routine is intended to be used for mapping device memory,
2480 * NOT real memory.
2481 */
2482void *
2483pmap_mapdev(vm_offset_t pa, vm_size_t size)
2484{
2485	return (void*) IA64_PHYS_TO_RR6(pa);
2486}
2487
2488/*
2489 * 'Unmap' a range mapped by pmap_mapdev().
2490 */
2491void
2492pmap_unmapdev(vm_offset_t va, vm_size_t size)
2493{
2494	return;
2495}
2496
2497/*
2498 * perform the pmap work for mincore
2499 */
2500int
2501pmap_mincore(pmap_t pmap, vm_offset_t addr)
2502{
2503	pmap_t oldpmap;
2504	struct ia64_lpte *pte;
2505	int val = 0;
2506
2507	oldpmap = pmap_install(pmap);
2508	pte = pmap_find_vhpt(addr);
2509	pmap_install(oldpmap);
2510
2511	if (!pte)
2512		return 0;
2513
2514	if (pmap_pte_v(pte)) {
2515		vm_page_t m;
2516		vm_offset_t pa;
2517
2518		val = MINCORE_INCORE;
2519		if ((pte->pte_ig & PTE_IG_MANAGED) == 0)
2520			return val;
2521
2522		pa = pmap_pte_pa(pte);
2523
2524		m = PHYS_TO_VM_PAGE(pa);
2525
2526		/*
2527		 * Modified by us
2528		 */
2529		if (pte->pte_d)
2530			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2531		/*
2532		 * Modified by someone
2533		 */
2534		else if (pmap_is_modified(m))
2535			val |= MINCORE_MODIFIED_OTHER;
2536		/*
2537		 * Referenced by us
2538		 */
2539		if (pte->pte_a)
2540			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2541
2542		/*
2543		 * Referenced by someone
2544		 */
2545		else if (pmap_ts_referenced(m)) {
2546			val |= MINCORE_REFERENCED_OTHER;
2547			vm_page_flag_set(m, PG_REFERENCED);
2548		}
2549	}
2550	return val;
2551}
2552
2553void
2554pmap_activate(struct thread *td)
2555{
2556	pmap_install(vmspace_pmap(td->td_proc->p_vmspace));
2557}
2558
2559pmap_t
2560pmap_install(pmap_t pmap)
2561{
2562	pmap_t oldpmap;
2563	int i;
2564
2565	oldpmap = PCPU_GET(current_pmap);
2566
2567	if (pmap == oldpmap || pmap == kernel_pmap)
2568		return pmap;
2569
2570	PCPU_SET(current_pmap, pmap);
2571	if (!pmap) {
2572		/*
2573		 * RIDs 0..4 have no mappings to make sure we generate
2574		 * page faults on accesses.
2575		 */
2576		ia64_set_rr(IA64_RR_BASE(0), (0 << 8)|(PAGE_SHIFT << 2)|1);
2577		ia64_set_rr(IA64_RR_BASE(1), (1 << 8)|(PAGE_SHIFT << 2)|1);
2578		ia64_set_rr(IA64_RR_BASE(2), (2 << 8)|(PAGE_SHIFT << 2)|1);
2579		ia64_set_rr(IA64_RR_BASE(3), (3 << 8)|(PAGE_SHIFT << 2)|1);
2580		ia64_set_rr(IA64_RR_BASE(4), (4 << 8)|(PAGE_SHIFT << 2)|1);
2581		return oldpmap;
2582	}
2583
2584	pmap->pm_active = 1;	/* XXX use bitmap for SMP */
2585
2586	for (i = 0; i < 5; i++)
2587		ia64_set_rr(IA64_RR_BASE(i),
2588			    (pmap->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2589
2590	return oldpmap;
2591}
2592
2593vm_offset_t
2594pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2595{
2596
2597	return addr;
2598}
2599
2600#include "opt_ddb.h"
2601
2602#ifdef DDB
2603
2604#include <ddb/ddb.h>
2605
2606static const char*	psnames[] = {
2607	"1B",	"2B",	"4B",	"8B",
2608	"16B",	"32B",	"64B",	"128B",
2609	"256B",	"512B",	"1K",	"2K",
2610	"4K",	"8K",	"16K",	"32K",
2611	"64K",	"128K",	"256K",	"512K",
2612	"1M",	"2M",	"4M",	"8M",
2613	"16M",	"32M",	"64M",	"128M",
2614	"256M",	"512M",	"1G",	"2G"
2615};
2616
2617static void
2618print_trs(int type)
2619{
2620	struct ia64_pal_result	res;
2621	int			i, maxtr;
2622	struct {
2623		struct ia64_pte	pte;
2624		struct ia64_itir itir;
2625		struct ia64_ifa ifa;
2626		struct ia64_rr	rr;
2627	}			buf;
2628	static const char*	manames[] = {
2629		"WB",	"bad",	"bad",	"bad",
2630		"UC",	"UCE",	"WC",	"NaT",
2631
2632	};
2633
2634	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2635	if (res.pal_status != 0) {
2636		db_printf("Can't get VM summary\n");
2637		return;
2638	}
2639
2640	if (type == 0)
2641		maxtr = (res.pal_result[0] >> 40) & 0xff;
2642	else
2643		maxtr = (res.pal_result[0] >> 32) & 0xff;
2644
2645	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2646	for (i = 0; i <= maxtr; i++) {
2647		bzero(&buf, sizeof(buf));
2648		res = ia64_call_pal_stacked_physical
2649			(PAL_VM_TR_READ, i, type, ia64_tpa((u_int64_t) &buf));
2650		if (!(res.pal_result[0] & 1))
2651			buf.pte.pte_ar = 0;
2652		if (!(res.pal_result[0] & 2))
2653			buf.pte.pte_pl = 0;
2654		if (!(res.pal_result[0] & 4))
2655			buf.pte.pte_d = 0;
2656		if (!(res.pal_result[0] & 8))
2657			buf.pte.pte_ma = 0;
2658		db_printf(
2659			"%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s %d %06x\n",
2660			buf.ifa.ifa_ig & 1,
2661			buf.rr.rr_rid,
2662			buf.ifa.ifa_vpn,
2663			buf.pte.pte_ppn,
2664			psnames[buf.itir.itir_ps],
2665			buf.pte.pte_ed,
2666			buf.pte.pte_ar,
2667			buf.pte.pte_pl,
2668			buf.pte.pte_d,
2669			buf.pte.pte_a,
2670			manames[buf.pte.pte_ma],
2671			buf.pte.pte_p,
2672			buf.itir.itir_key);
2673	}
2674}
2675
2676DB_COMMAND(itr, db_itr)
2677{
2678	print_trs(0);
2679}
2680
2681DB_COMMAND(dtr, db_dtr)
2682{
2683	print_trs(1);
2684}
2685
2686DB_COMMAND(rr, db_rr)
2687{
2688	int i;
2689	u_int64_t t;
2690	struct ia64_rr rr;
2691
2692	printf("RR RID    PgSz VE\n");
2693	for (i = 0; i < 8; i++) {
2694		__asm __volatile ("mov %0=rr[%1]"
2695				  : "=r"(t)
2696				  : "r"(IA64_RR_BASE(i)));
2697		*(u_int64_t *) &rr = t;
2698		printf("%d  %06x %4s %d\n",
2699		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2700	}
2701}
2702
2703DB_COMMAND(thash, db_thash)
2704{
2705	if (!have_addr)
2706		return;
2707
2708	db_printf("%p\n", (void *) ia64_thash(addr));
2709}
2710
2711DB_COMMAND(ttag, db_ttag)
2712{
2713	if (!have_addr)
2714		return;
2715
2716	db_printf("0x%lx\n", ia64_ttag(addr));
2717}
2718
2719#endif
2720