pmap.c revision 115057
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 * $FreeBSD: head/sys/ia64/ia64/pmap.c 115057 2003-05-16 06:40:40Z marcel $
47 */
48
49/*
50 *	Manages physical address maps.
51 *
52 *	In addition to hardware address maps, this
53 *	module is called upon to provide software-use-only
54 *	maps which may or may not be stored in the same
55 *	form as hardware maps.  These pseudo-maps are
56 *	used to store intermediate results from copy
57 *	operations to and from address spaces.
58 *
59 *	Since the information managed by this module is
60 *	also stored by the logical address mapping module,
61 *	this module may throw away valid virtual-to-physical
62 *	mappings at almost any time.  However, invalidations
63 *	of virtual-to-physical mappings must be done as
64 *	requested.
65 *
66 *	In order to cope with hardware architectures which
67 *	make virtual-to-physical map invalidates expensive,
68 *	this module may delay invalidate or reduced protection
69 *	operations until such time as they are actually
70 *	necessary.  This module is given full information as
71 *	to which processors are currently using which maps,
72 *	and to when physical maps must be made correct.
73 */
74
75/*
76 * Following the Linux model, region IDs are allocated in groups of
77 * eight so that a single region ID can be used for as many RRs as we
78 * want by encoding the RR number into the low bits of the ID.
79 *
80 * We reserve region ID 0 for the kernel and allocate the remaining
81 * IDs for user pmaps.
82 *
83 * Region 0..4
84 *	User virtually mapped
85 *
86 * Region 5
87 *	Kernel virtually mapped
88 *
89 * Region 6
90 *	Kernel physically mapped uncacheable
91 *
92 * Region 7
93 *	Kernel physically mapped cacheable
94 */
95
96#include <sys/param.h>
97#include <sys/kernel.h>
98#include <sys/lock.h>
99#include <sys/malloc.h>
100#include <sys/mman.h>
101#include <sys/msgbuf.h>
102#include <sys/mutex.h>
103#include <sys/proc.h>
104#include <sys/sx.h>
105#include <sys/systm.h>
106#include <sys/vmmeter.h>
107#include <sys/smp.h>
108#include <sys/sysctl.h>
109
110#include <vm/vm.h>
111#include <vm/vm_param.h>
112#include <vm/vm_kern.h>
113#include <vm/vm_page.h>
114#include <vm/vm_map.h>
115#include <vm/vm_object.h>
116#include <vm/vm_extern.h>
117#include <vm/vm_pageout.h>
118#include <vm/vm_pager.h>
119#include <vm/uma.h>
120#include <vm/uma_int.h>
121
122#include <sys/user.h>
123
124#include <machine/pal.h>
125#include <machine/md_var.h>
126
127MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
128
129#ifndef KSTACK_MAX_PAGES
130#define	KSTACK_MAX_PAGES 32
131#endif
132
133#ifndef PMAP_SHPGPERPROC
134#define PMAP_SHPGPERPROC 200
135#endif
136
137#if defined(DIAGNOSTIC)
138#define PMAP_DIAGNOSTIC
139#endif
140
141#define MINPV 2048	/* Preallocate at least this many */
142#define MAXPV 20480	/* But no more than this */
143
144#if 0
145#define PMAP_DIAGNOSTIC
146#define PMAP_DEBUG
147#endif
148
149#if !defined(PMAP_DIAGNOSTIC)
150#define PMAP_INLINE __inline
151#else
152#define PMAP_INLINE
153#endif
154
155/*
156 * Get PDEs and PTEs for user/kernel address space
157 */
158#define pmap_pte_w(pte)		((pte)->pte_ig & PTE_IG_WIRED)
159#define pmap_pte_managed(pte)	((pte)->pte_ig & PTE_IG_MANAGED)
160#define pmap_pte_v(pte)		((pte)->pte_p)
161#define pmap_pte_pa(pte)	(((pte)->pte_ppn) << 12)
162#define pmap_pte_prot(pte)	(((pte)->pte_ar << 2) | (pte)->pte_pl)
163
164#define pmap_pte_set_w(pte, v) ((v)?((pte)->pte_ig |= PTE_IG_WIRED) \
165				:((pte)->pte_ig &= ~PTE_IG_WIRED))
166#define pmap_pte_set_prot(pte, v) do {		\
167    (pte)->pte_ar = v >> 2;			\
168    (pte)->pte_pl = v & 3;			\
169} while (0)
170
171/*
172 * Given a map and a machine independent protection code,
173 * convert to an ia64 protection code.
174 */
175#define pte_prot(m, p)		(protection_codes[m == kernel_pmap ? 0 : 1][p])
176#define pte_prot_pl(m, p)	(pte_prot(m, p) & 3)
177#define pte_prot_ar(m, p)	(pte_prot(m, p) >> 2)
178int	protection_codes[2][8];
179
180/*
181 * Return non-zero if this pmap is currently active
182 */
183#define pmap_isactive(pmap)	(pmap->pm_active)
184
185/*
186 * Statically allocated kernel pmap
187 */
188struct pmap kernel_pmap_store;
189
190vm_offset_t avail_start;	/* PA of first available physical page */
191vm_offset_t avail_end;		/* PA of last available physical page */
192vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
193vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
194static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
195
196vm_offset_t vhpt_base, vhpt_size;
197
198/*
199 * We use an object to own the kernel's 'page tables'. For simplicity,
200 * we use one page directory to index a set of pages containing
201 * ia64_lptes. This gives us up to 2Gb of kernel virtual space.
202 */
203static int nkpt;
204static struct ia64_lpte **kptdir;
205#define KPTE_DIR_INDEX(va) \
206	((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
207#define KPTE_PTE_INDEX(va) \
208	((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
209#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
210
211vm_offset_t kernel_vm_end;
212
213/* Values for ptc.e. XXX values for SKI. */
214static u_int64_t pmap_ptc_e_base = 0x100000000;
215static u_int64_t pmap_ptc_e_count1 = 3;
216static u_int64_t pmap_ptc_e_count2 = 2;
217static u_int64_t pmap_ptc_e_stride1 = 0x2000;
218static u_int64_t pmap_ptc_e_stride2 = 0x100000000;
219
220/*
221 * Data for the RID allocator
222 */
223static int pmap_ridcount;
224static int pmap_rididx;
225static int pmap_ridmapsz;
226static int pmap_ridmax;
227static u_int64_t *pmap_ridmap;
228struct mtx pmap_ridmutex;
229
230/*
231 * Data for the pv entry allocation mechanism
232 */
233static uma_zone_t pvzone;
234static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
235int pmap_pagedaemon_waken;
236static struct pv_entry *pvbootentries;
237static int pvbootnext, pvbootmax;
238
239/*
240 * Data for allocating PTEs for user processes.
241 */
242static uma_zone_t ptezone;
243
244/*
245 * VHPT instrumentation.
246 */
247static int pmap_vhpt_inserts;
248static int pmap_vhpt_collisions;
249static int pmap_vhpt_resident;
250SYSCTL_DECL(_vm_stats);
251SYSCTL_NODE(_vm_stats, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
252SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
253	   &pmap_vhpt_inserts, 0, "");
254SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, collisions, CTLFLAG_RD,
255	   &pmap_vhpt_collisions, 0, "");
256SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, resident, CTLFLAG_RD,
257	   &pmap_vhpt_resident, 0, "");
258
259static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
260static pv_entry_t get_pv_entry(void);
261static void	ia64_protection_init(void);
262
263static void	pmap_invalidate_all(pmap_t pmap);
264static void	pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m);
265
266vm_offset_t
267pmap_steal_memory(vm_size_t size)
268{
269	vm_size_t bank_size;
270	vm_offset_t pa, va;
271
272	size = round_page(size);
273
274	bank_size = phys_avail[1] - phys_avail[0];
275	while (size > bank_size) {
276		int i;
277		for (i = 0; phys_avail[i+2]; i+= 2) {
278			phys_avail[i] = phys_avail[i+2];
279			phys_avail[i+1] = phys_avail[i+3];
280		}
281		phys_avail[i] = 0;
282		phys_avail[i+1] = 0;
283		if (!phys_avail[0])
284			panic("pmap_steal_memory: out of memory");
285		bank_size = phys_avail[1] - phys_avail[0];
286	}
287
288	pa = phys_avail[0];
289	phys_avail[0] += size;
290
291	va = IA64_PHYS_TO_RR7(pa);
292	bzero((caddr_t) va, size);
293	return va;
294}
295
296/*
297 *	Bootstrap the system enough to run with virtual memory.
298 */
299void
300pmap_bootstrap()
301{
302	int i, j, count, ridbits;
303	struct ia64_pal_result res;
304
305	/*
306	 * Query the PAL Code to find the loop parameters for the
307	 * ptc.e instruction.
308	 */
309	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
310	if (res.pal_status != 0)
311		panic("Can't configure ptc.e parameters");
312	pmap_ptc_e_base = res.pal_result[0];
313	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
314	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
315	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
316	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
317	if (bootverbose)
318		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
319		       "stride1=0x%lx, stride2=0x%lx\n",
320		       pmap_ptc_e_base,
321		       pmap_ptc_e_count1,
322		       pmap_ptc_e_count2,
323		       pmap_ptc_e_stride1,
324		       pmap_ptc_e_stride2);
325
326	/*
327	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
328	 *
329	 * We currently need at least 19 bits in the RID because PID_MAX
330	 * can only be encoded in 17 bits and we need RIDs for 5 regions
331	 * per process. With PID_MAX equalling 99999 this means that we
332	 * need to be able to encode 499995 (=5*PID_MAX).
333	 * The Itanium processor only has 18 bits and the architected
334	 * minimum is exactly that. So, we cannot use a PID based scheme
335	 * in those cases. Enter pmap_ridmap...
336	 * We should avoid the map when running on a processor that has
337	 * implemented enough bits. This means that we should pass the
338	 * process/thread ID to pmap. This we currently don't do, so we
339	 * use the map anyway. However, we don't want to allocate a map
340	 * that is large enough to cover the range dictated by the number
341	 * of bits in the RID, because that may result in a RID map of
342	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
343	 * The bottomline: we create a 32KB map when the processor only
344	 * implements 18 bits (or when we can't figure it out). Otherwise
345	 * we create a 64KB map.
346	 */
347	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
348	if (res.pal_status != 0) {
349		if (bootverbose)
350			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
351		ridbits = 18; /* guaranteed minimum */
352	} else {
353		ridbits = (res.pal_result[1] >> 8) & 0xff;
354		if (bootverbose)
355			printf("Processor supports %d Region ID bits\n",
356			    ridbits);
357	}
358	if (ridbits > 19)
359		ridbits = 19;
360
361	pmap_ridmax = (1 << ridbits);
362	pmap_ridmapsz = pmap_ridmax / 64;
363	pmap_ridmap = (u_int64_t *)pmap_steal_memory(pmap_ridmax / 8);
364	pmap_ridmap[0] |= 0xff;
365	pmap_rididx = 0;
366	pmap_ridcount = 8;
367	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
368
369	/*
370	 * Allocate some memory for initial kernel 'page tables'.
371	 */
372	kptdir = (struct ia64_lpte **) pmap_steal_memory(PAGE_SIZE);
373	for (i = 0; i < NKPT; i++) {
374		kptdir[i] = (struct ia64_lpte *) pmap_steal_memory(PAGE_SIZE);
375	}
376	nkpt = NKPT;
377
378	avail_start = phys_avail[0];
379	for (i = 0; phys_avail[i+2]; i+= 2) ;
380	avail_end = phys_avail[i+1];
381	count = i+2;
382
383	/*
384	 * Figure out a useful size for the VHPT, based on the size of
385	 * physical memory and try to locate a region which is large
386	 * enough to contain the VHPT (which must be a power of two in
387	 * size and aligned to a natural boundary).
388	 * Don't use the difference between avail_start and avail_end
389	 * as a measure for memory size. The address space is often
390	 * enough sparse, causing us to (try to) create a huge VHPT.
391	 */
392	vhpt_size = 15;
393	while ((1<<vhpt_size) < Maxmem * 32)
394		vhpt_size++;
395
396	vhpt_base = 0;
397	while (!vhpt_base) {
398		vm_offset_t mask;
399		if (bootverbose)
400			printf("Trying VHPT size 0x%lx\n", (1L<<vhpt_size));
401		mask = (1L << vhpt_size) - 1;
402		for (i = 0; i < count; i += 2) {
403			vm_offset_t base, limit;
404			base = (phys_avail[i] + mask) & ~mask;
405			limit = base + (1L << vhpt_size);
406			if (limit <= phys_avail[i+1])
407				/*
408				 * VHPT can fit in this region
409				 */
410				break;
411		}
412		if (!phys_avail[i]) {
413			/*
414			 * Can't fit, try next smaller size.
415			 */
416			vhpt_size--;
417		} else {
418			vhpt_base = (phys_avail[i] + mask) & ~mask;
419		}
420	}
421	if (vhpt_size < 15)
422		panic("Can't find space for VHPT");
423
424	if (bootverbose)
425		printf("Putting VHPT at %p\n", (void *) vhpt_base);
426	if (vhpt_base != phys_avail[i]) {
427		/*
428		 * Split this region.
429		 */
430		if (bootverbose)
431			printf("Splitting [%p-%p]\n",
432			       (void *) phys_avail[i],
433			       (void *) phys_avail[i+1]);
434		for (j = count; j > i; j -= 2) {
435			phys_avail[j] = phys_avail[j-2];
436			phys_avail[j+1] = phys_avail[j-2+1];
437		}
438		phys_avail[count+2] = 0;
439		phys_avail[count+3] = 0;
440		phys_avail[i+1] = vhpt_base;
441		phys_avail[i+2] = vhpt_base + (1L << vhpt_size);
442	} else {
443		phys_avail[i] = vhpt_base + (1L << vhpt_size);
444	}
445
446	vhpt_base = IA64_PHYS_TO_RR7(vhpt_base);
447	bzero((void *) vhpt_base, (1L << vhpt_size));
448	__asm __volatile("mov cr.pta=%0;; srlz.i;;"
449			 :: "r" (vhpt_base + (1<<8) + (vhpt_size<<2) + 1));
450
451	virtual_avail = IA64_RR_BASE(5);
452	virtual_end = IA64_RR_BASE(6)-1;
453
454	/*
455	 * Initialize protection array.
456	 */
457	ia64_protection_init();
458
459	/*
460	 * Initialize the kernel pmap (which is statically allocated).
461	 */
462	for (i = 0; i < 5; i++)
463		kernel_pmap->pm_rid[i] = 0;
464	kernel_pmap->pm_active = 1;
465	TAILQ_INIT(&kernel_pmap->pm_pvlist);
466	PCPU_SET(current_pmap, kernel_pmap);
467
468	/*
469	 * Region 5 is mapped via the vhpt.
470	 */
471	ia64_set_rr(IA64_RR_BASE(5),
472		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
473
474	/*
475	 * Region 6 is direct mapped UC and region 7 is direct mapped
476	 * WC. The details of this is controlled by the Alt {I,D}TLB
477	 * handlers. Here we just make sure that they have the largest
478	 * possible page size to minimise TLB usage.
479	 */
480	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (28 << 2));
481	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (28 << 2));
482
483	/*
484	 * Reserve some memory for allocating pvs while bootstrapping
485	 * the pv allocator. We need to have enough to cover mapping
486	 * the kmem_alloc region used to allocate the initial_pvs in
487	 * pmap_init. In general, the size of this region is
488	 * approximately (# physical pages) * (size of pv entry).
489	 */
490	pvbootmax = ((physmem * sizeof(struct pv_entry)) >> PAGE_SHIFT) + 128;
491	pvbootentries = (struct pv_entry *)
492		pmap_steal_memory(pvbootmax * sizeof(struct pv_entry));
493	pvbootnext = 0;
494
495	/*
496	 * Clear out any random TLB entries left over from booting.
497	 */
498	pmap_invalidate_all(kernel_pmap);
499}
500
501void *
502uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
503{
504	static vm_pindex_t color;
505	vm_page_t m;
506	int pflags;
507	void *va;
508
509	*flags = UMA_SLAB_PRIV;
510	if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT)
511		pflags = VM_ALLOC_INTERRUPT;
512	else
513		pflags = VM_ALLOC_SYSTEM;
514	if (wait & M_ZERO)
515		pflags |= VM_ALLOC_ZERO;
516
517	for (;;) {
518		m = vm_page_alloc(NULL, color++, pflags | VM_ALLOC_NOOBJ);
519		if (m == NULL) {
520			if (wait & M_NOWAIT)
521				return (NULL);
522			else
523				VM_WAIT;
524		} else
525			break;
526	}
527
528	va = (void *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
529	if ((m->flags & PG_ZERO) == 0)
530		bzero(va, PAGE_SIZE);
531	return (va);
532}
533
534void
535uma_small_free(void *mem, int size, u_int8_t flags)
536{
537	vm_page_t m;
538
539	m = PHYS_TO_VM_PAGE(IA64_RR_MASK((u_int64_t)mem));
540	vm_page_lock_queues();
541	vm_page_free(m);
542	vm_page_unlock_queues();
543}
544
545/*
546 *	Initialize the pmap module.
547 *	Called by vm_init, to initialize any structures that the pmap
548 *	system needs to map virtual memory.
549 *	pmap_init has been enhanced to support in a fairly consistant
550 *	way, discontiguous physical memory.
551 */
552void
553pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
554{
555	int i;
556	int initial_pvs;
557
558	/*
559	 * Allocate memory for random pmap data structures.  Includes the
560	 * pv_head_table.
561	 */
562
563	for(i = 0; i < vm_page_array_size; i++) {
564		vm_page_t m;
565
566		m = &vm_page_array[i];
567		TAILQ_INIT(&m->md.pv_list);
568		m->md.pv_list_count = 0;
569 	}
570
571	/*
572	 * Init the pv free list and the PTE free list.
573	 */
574	initial_pvs = vm_page_array_size;
575	if (initial_pvs < MINPV)
576		initial_pvs = MINPV;
577	if (initial_pvs > MAXPV)
578		initial_pvs = MAXPV;
579	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry),
580	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
581	uma_prealloc(pvzone, initial_pvs);
582
583	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
584	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
585	uma_prealloc(ptezone, initial_pvs);
586
587	/*
588	 * Now it is safe to enable pv_table recording.
589	 */
590	pmap_initialized = TRUE;
591}
592
593/*
594 * Initialize the address space (zone) for the pv_entries.  Set a
595 * high water mark so that the system can recover from excessive
596 * numbers of pv entries.
597 */
598void
599pmap_init2()
600{
601	int shpgperproc = PMAP_SHPGPERPROC;
602
603	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
604	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
605	pv_entry_high_water = 9 * (pv_entry_max / 10);
606}
607
608
609/***************************************************
610 * Manipulate TLBs for a pmap
611 ***************************************************/
612
613static void
614pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
615{
616	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
617		("invalidating TLB for non-current pmap"));
618	ia64_ptc_g(va, PAGE_SHIFT << 2);
619}
620
621static void
622pmap_invalidate_all_1(void *arg)
623{
624	u_int64_t addr;
625	int i, j;
626	register_t psr;
627
628	psr = intr_disable();
629	addr = pmap_ptc_e_base;
630	for (i = 0; i < pmap_ptc_e_count1; i++) {
631		for (j = 0; j < pmap_ptc_e_count2; j++) {
632			ia64_ptc_e(addr);
633			addr += pmap_ptc_e_stride2;
634		}
635		addr += pmap_ptc_e_stride1;
636	}
637	intr_restore(psr);
638}
639
640static void
641pmap_invalidate_all(pmap_t pmap)
642{
643	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
644		("invalidating TLB for non-current pmap"));
645
646
647#ifdef SMP
648	smp_rendezvous(0, pmap_invalidate_all_1, 0, 0);
649#else
650	pmap_invalidate_all_1(0);
651#endif
652}
653
654static u_int32_t
655pmap_allocate_rid(void)
656{
657	uint64_t bit, bits;
658	int rid;
659
660	mtx_lock(&pmap_ridmutex);
661	if (pmap_ridcount == pmap_ridmax)
662		panic("pmap_allocate_rid: All Region IDs used");
663
664	/* Find an index with a free bit. */
665	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
666		pmap_rididx++;
667		if (pmap_rididx == pmap_ridmapsz)
668			pmap_rididx = 0;
669	}
670	rid = pmap_rididx * 64;
671
672	/* Find a free bit. */
673	bit = 1UL;
674	while (bits & bit) {
675		rid++;
676		bit <<= 1;
677	}
678
679	pmap_ridmap[pmap_rididx] |= bit;
680	pmap_ridcount++;
681	mtx_unlock(&pmap_ridmutex);
682
683	return rid;
684}
685
686static void
687pmap_free_rid(u_int32_t rid)
688{
689	uint64_t bit;
690	int idx;
691
692	idx = rid / 64;
693	bit = ~(1UL << (rid & 63));
694
695	mtx_lock(&pmap_ridmutex);
696	pmap_ridmap[idx] &= bit;
697	pmap_ridcount--;
698	mtx_unlock(&pmap_ridmutex);
699}
700
701/***************************************************
702 * Low level helper routines.....
703 ***************************************************/
704
705/*
706 * Install a pte into the VHPT
707 */
708static PMAP_INLINE void
709pmap_install_pte(struct ia64_lpte *vhpte, struct ia64_lpte *pte)
710{
711	u_int64_t *vhp, *p;
712
713	/* invalidate the pte */
714	atomic_set_64(&vhpte->pte_tag, 1L << 63);
715	ia64_mf();			/* make sure everyone sees */
716
717	vhp = (u_int64_t *) vhpte;
718	p = (u_int64_t *) pte;
719
720	vhp[0] = p[0];
721	vhp[1] = p[1];
722	vhp[2] = p[2];			/* sets ti to one */
723
724	ia64_mf();
725}
726
727/*
728 * Compare essential parts of pte.
729 */
730static PMAP_INLINE int
731pmap_equal_pte(struct ia64_lpte *pte1, struct ia64_lpte *pte2)
732{
733	return *(u_int64_t *) pte1 == *(u_int64_t *) pte2;
734}
735
736/*
737 * this routine defines the region(s) of memory that should
738 * not be tested for the modified bit.
739 */
740static PMAP_INLINE int
741pmap_track_modified(vm_offset_t va)
742{
743	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
744		return 1;
745	else
746		return 0;
747}
748
749/*
750 * Create the KSTACK for a new thread.
751 * This routine directly affects the fork perf for a process/thread.
752 */
753void
754pmap_new_thread(struct thread *td, int pages)
755{
756	vm_offset_t *ks;
757
758	/* Bounds check */
759	if (pages <= 1)
760		pages = KSTACK_PAGES;
761	else if (pages > KSTACK_MAX_PAGES)
762		pages = KSTACK_MAX_PAGES;
763
764	/*
765	 * Use contigmalloc for user area so that we can use a region
766	 * 7 address for it which makes it impossible to accidentally
767	 * lose when recording a trapframe.
768	 */
769	ks = contigmalloc(pages * PAGE_SIZE, M_PMAP, M_WAITOK, 0ul,
770	    256*1024*1024 - 1, PAGE_SIZE, 256*1024*1024);
771	if (ks == NULL)
772		panic("pmap_new_thread: could not contigmalloc %d pages\n",
773		    pages);
774
775	td->td_md.md_kstackvirt = ks;
776	td->td_kstack = IA64_PHYS_TO_RR7(ia64_tpa((u_int64_t)ks));
777	td->td_kstack_pages = pages;
778}
779
780/*
781 * Dispose the KSTACK for a thread that has exited.
782 * This routine directly impacts the exit perf of a process/thread.
783 */
784void
785pmap_dispose_thread(struct thread *td)
786{
787	int pages;
788
789	pages = td->td_kstack_pages;
790	contigfree(td->td_md.md_kstackvirt, pages * PAGE_SIZE, M_PMAP);
791	td->td_md.md_kstackvirt = NULL;
792	td->td_kstack = 0;
793}
794
795/*
796 * Set up a variable sized alternate kstack.  This appears to be MI.
797 */
798void
799pmap_new_altkstack(struct thread *td, int pages)
800{
801
802	/*
803	 * Shuffle the original stack. Save the virtual kstack address
804	 * instead of the physical address because 1) we can derive the
805	 * physical address from the virtual address and 2) we need the
806	 * virtual address in pmap_dispose_thread.
807	 */
808	td->td_altkstack_obj = td->td_kstack_obj;
809	td->td_altkstack = (vm_offset_t)td->td_md.md_kstackvirt;
810	td->td_altkstack_pages = td->td_kstack_pages;
811
812	pmap_new_thread(td, pages);
813}
814
815void
816pmap_dispose_altkstack(struct thread *td)
817{
818
819	pmap_dispose_thread(td);
820
821	/*
822	 * Restore the original kstack. Note that td_altkstack holds the
823	 * virtual kstack address of the previous kstack.
824	 */
825	td->td_md.md_kstackvirt = (void*)td->td_altkstack;
826	td->td_kstack = IA64_PHYS_TO_RR7(ia64_tpa(td->td_altkstack));
827	td->td_kstack_obj = td->td_altkstack_obj;
828	td->td_kstack_pages = td->td_altkstack_pages;
829	td->td_altkstack = 0;
830	td->td_altkstack_obj = NULL;
831	td->td_altkstack_pages = 0;
832}
833
834/*
835 * Allow the KSTACK for a thread to be prejudicially paged out.
836 */
837void
838pmap_swapout_thread(struct thread *td)
839{
840}
841
842/*
843 * Bring the KSTACK for a specified thread back in.
844 */
845void
846pmap_swapin_thread(struct thread *td)
847{
848}
849
850/***************************************************
851 * Page table page management routines.....
852 ***************************************************/
853
854void
855pmap_pinit0(struct pmap *pmap)
856{
857	/* kernel_pmap is the same as any other pmap. */
858	pmap_pinit(pmap);
859}
860
861/*
862 * Initialize a preallocated and zeroed pmap structure,
863 * such as one in a vmspace structure.
864 */
865void
866pmap_pinit(struct pmap *pmap)
867{
868	int i;
869
870	pmap->pm_flags = 0;
871	for (i = 0; i < 5; i++)
872		pmap->pm_rid[i] = 0;
873	pmap->pm_ptphint = NULL;
874	pmap->pm_active = 0;
875	TAILQ_INIT(&pmap->pm_pvlist);
876	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
877}
878
879/*
880 * Wire in kernel global address entries.  To avoid a race condition
881 * between pmap initialization and pmap_growkernel, this procedure
882 * should be called after the vmspace is attached to the process
883 * but before this pmap is activated.
884 */
885void
886pmap_pinit2(struct pmap *pmap)
887{
888	int i;
889
890	for (i = 0; i < 5; i++)
891		pmap->pm_rid[i] = pmap_allocate_rid();
892}
893
894/***************************************************
895* Pmap allocation/deallocation routines.
896 ***************************************************/
897
898/*
899 * Release any resources held by the given physical map.
900 * Called when a pmap initialized by pmap_pinit is being released.
901 * Should only be called if the map contains no valid mappings.
902 */
903void
904pmap_release(pmap_t pmap)
905{
906	int i;
907
908	for (i = 0; i < 5; i++)
909		if (pmap->pm_rid[i])
910			pmap_free_rid(pmap->pm_rid[i]);
911}
912
913/*
914 * grow the number of kernel page table entries, if needed
915 */
916void
917pmap_growkernel(vm_offset_t addr)
918{
919	struct ia64_lpte *ptepage;
920	vm_page_t nkpg;
921
922	if (kernel_vm_end == 0) {
923		kernel_vm_end = nkpt * PAGE_SIZE * NKPTEPG
924			+ IA64_RR_BASE(5);
925	}
926	addr = (addr + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
927	while (kernel_vm_end < addr) {
928		if (kptdir[KPTE_DIR_INDEX(kernel_vm_end)]) {
929			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG)
930				& ~(PAGE_SIZE * NKPTEPG - 1);
931			continue;
932		}
933
934		/*
935		 * We could handle more by increasing the size of kptdir.
936		 */
937		if (nkpt == MAXKPT)
938			panic("pmap_growkernel: out of kernel address space");
939
940		/*
941		 * This index is bogus, but out of the way
942		 */
943		nkpg = vm_page_alloc(NULL, nkpt,
944		    VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
945		if (!nkpg)
946			panic("pmap_growkernel: no memory to grow kernel");
947
948		nkpt++;
949		ptepage = (struct ia64_lpte *)
950			IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
951		bzero(ptepage, PAGE_SIZE);
952		kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage;
953
954		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NKPTEPG) & ~(PAGE_SIZE * NKPTEPG - 1);
955	}
956}
957
958/***************************************************
959 * page management routines.
960 ***************************************************/
961
962/*
963 * free the pv_entry back to the free list
964 */
965static PMAP_INLINE void
966free_pv_entry(pv_entry_t pv)
967{
968	pv_entry_count--;
969	uma_zfree(pvzone, pv);
970}
971
972/*
973 * get a new pv_entry, allocating a block from the system
974 * when needed.
975 * the memory allocation is performed bypassing the malloc code
976 * because of the possibility of allocations at interrupt time.
977 */
978static pv_entry_t
979get_pv_entry(void)
980{
981	pv_entry_count++;
982	if (pv_entry_high_water &&
983		(pv_entry_count > pv_entry_high_water) &&
984		(pmap_pagedaemon_waken == 0)) {
985		pmap_pagedaemon_waken = 1;
986		wakeup (&vm_pages_needed);
987	}
988	return uma_zalloc(pvzone, M_NOWAIT);
989}
990
991/*
992 * Add an ia64_lpte to the VHPT.
993 */
994static void
995pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
996{
997	struct ia64_lpte *vhpte;
998
999	pmap_vhpt_inserts++;
1000	pmap_vhpt_resident++;
1001
1002	vhpte = (struct ia64_lpte *) ia64_thash(va);
1003
1004	if (vhpte->pte_chain)
1005		pmap_vhpt_collisions++;
1006
1007	pte->pte_chain = vhpte->pte_chain;
1008	vhpte->pte_chain = ia64_tpa((vm_offset_t) pte);
1009
1010	if (!vhpte->pte_p && pte->pte_p)
1011		pmap_install_pte(vhpte, pte);
1012	else
1013		ia64_mf();
1014}
1015
1016/*
1017 * Update VHPT after a pte has changed.
1018 */
1019static void
1020pmap_update_vhpt(struct ia64_lpte *pte, vm_offset_t va)
1021{
1022	struct ia64_lpte *vhpte;
1023
1024	vhpte = (struct ia64_lpte *) ia64_thash(va);
1025
1026	if ((!vhpte->pte_p || vhpte->pte_tag == pte->pte_tag)
1027	    && pte->pte_p)
1028		pmap_install_pte(vhpte, pte);
1029}
1030
1031/*
1032 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
1033 * worked or an appropriate error code otherwise.
1034 */
1035static int
1036pmap_remove_vhpt(vm_offset_t va)
1037{
1038	struct ia64_lpte *pte;
1039	struct ia64_lpte *lpte;
1040	struct ia64_lpte *vhpte;
1041	u_int64_t tag;
1042	int error = ENOENT;
1043
1044	vhpte = (struct ia64_lpte *) ia64_thash(va);
1045
1046	/*
1047	 * If the VHPTE is invalid, there can't be a collision chain.
1048	 */
1049	if (!vhpte->pte_p) {
1050		KASSERT(!vhpte->pte_chain, ("bad vhpte"));
1051		printf("can't remove vhpt entry for 0x%lx\n", va);
1052		goto done;
1053	}
1054
1055	lpte = vhpte;
1056	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(vhpte->pte_chain);
1057	tag = ia64_ttag(va);
1058
1059	while (pte->pte_tag != tag) {
1060		lpte = pte;
1061		if (pte->pte_chain)
1062			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1063		else {
1064			printf("can't remove vhpt entry for 0x%lx\n", va);
1065			goto done;
1066		}
1067	}
1068
1069	/*
1070	 * Snip this pv_entry out of the collision chain.
1071	 */
1072	lpte->pte_chain = pte->pte_chain;
1073
1074	/*
1075	 * If the VHPTE matches as well, change it to map the first
1076	 * element from the chain if there is one.
1077	 */
1078	if (vhpte->pte_tag == tag) {
1079		if (vhpte->pte_chain) {
1080			pte = (struct ia64_lpte *)
1081				IA64_PHYS_TO_RR7(vhpte->pte_chain);
1082			pmap_install_pte(vhpte, pte);
1083		} else {
1084			vhpte->pte_p = 0;
1085			ia64_mf();
1086		}
1087	}
1088
1089	pmap_vhpt_resident--;
1090	error = 0;
1091 done:
1092	return error;
1093}
1094
1095/*
1096 * Find the ia64_lpte for the given va, if any.
1097 */
1098static struct ia64_lpte *
1099pmap_find_vhpt(vm_offset_t va)
1100{
1101	struct ia64_lpte *pte;
1102	u_int64_t tag;
1103
1104	pte = (struct ia64_lpte *) ia64_thash(va);
1105	if (!pte->pte_chain) {
1106		pte = 0;
1107		goto done;
1108	}
1109
1110	tag = ia64_ttag(va);
1111	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1112
1113	while (pte->pte_tag != tag) {
1114		if (pte->pte_chain) {
1115			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1116		} else {
1117			pte = 0;
1118			break;
1119		}
1120	}
1121
1122 done:
1123	return pte;
1124}
1125
1126/*
1127 * Remove an entry from the list of managed mappings.
1128 */
1129static int
1130pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1131{
1132	if (!pv) {
1133		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1134			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1135				if (pmap == pv->pv_pmap && va == pv->pv_va)
1136					break;
1137			}
1138		} else {
1139			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1140				if (va == pv->pv_va)
1141					break;
1142			}
1143		}
1144	}
1145
1146	if (pv) {
1147		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1148		m->md.pv_list_count--;
1149		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1150			vm_page_flag_clear(m, PG_WRITEABLE);
1151
1152		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1153		free_pv_entry(pv);
1154		return 0;
1155	} else {
1156		return ENOENT;
1157	}
1158}
1159
1160/*
1161 * Create a pv entry for page at pa for
1162 * (pmap, va).
1163 */
1164static void
1165pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1166{
1167	pv_entry_t pv;
1168
1169	pv = get_pv_entry();
1170	pv->pv_pmap = pmap;
1171	pv->pv_va = va;
1172
1173	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1174	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1175	m->md.pv_list_count++;
1176}
1177
1178/*
1179 *	Routine:	pmap_extract
1180 *	Function:
1181 *		Extract the physical page address associated
1182 *		with the given map/virtual_address pair.
1183 */
1184vm_offset_t
1185pmap_extract(pmap, va)
1186	register pmap_t pmap;
1187	vm_offset_t va;
1188{
1189	struct ia64_lpte *pte;
1190	pmap_t oldpmap;
1191
1192	if (!pmap)
1193		return 0;
1194
1195	oldpmap = pmap_install(pmap);
1196	pte = pmap_find_vhpt(va);
1197	pmap_install(oldpmap);
1198
1199	if (!pte)
1200		return 0;
1201
1202	return pmap_pte_pa(pte);
1203}
1204
1205/***************************************************
1206 * Low level mapping routines.....
1207 ***************************************************/
1208
1209/*
1210 * Find the kernel lpte for mapping the given virtual address, which
1211 * must be in the part of region 5 which we can cover with our kernel
1212 * 'page tables'.
1213 */
1214static struct ia64_lpte *
1215pmap_find_kpte(vm_offset_t va)
1216{
1217	KASSERT((va >> 61) == 5,
1218		("kernel mapping 0x%lx not in region 5", va));
1219	KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG),
1220		("kernel mapping 0x%lx out of range", va));
1221	return &kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)];
1222}
1223
1224/*
1225 * Find a pte suitable for mapping a user-space address. If one exists
1226 * in the VHPT, that one will be returned, otherwise a new pte is
1227 * allocated.
1228 */
1229static struct ia64_lpte *
1230pmap_find_pte(vm_offset_t va)
1231{
1232	struct ia64_lpte *pte;
1233
1234	if (va >= VM_MAXUSER_ADDRESS)
1235		return pmap_find_kpte(va);
1236
1237	pte = pmap_find_vhpt(va);
1238	if (!pte) {
1239		pte = uma_zalloc(ptezone, M_WAITOK);
1240		pte->pte_p = 0;
1241	}
1242	return pte;
1243}
1244
1245/*
1246 * Free a pte which is now unused. This simply returns it to the zone
1247 * allocator if it is a user mapping. For kernel mappings, clear the
1248 * valid bit to make it clear that the mapping is not currently used.
1249 */
1250static void
1251pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1252{
1253	if (va < VM_MAXUSER_ADDRESS)
1254		uma_zfree(ptezone, pte);
1255	else
1256		pte->pte_p = 0;
1257}
1258
1259/*
1260 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1261 * the pte was orginally valid, then its assumed to already be in the
1262 * VHPT.
1263 */
1264static void
1265pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1266	     int ig, int pl, int ar)
1267{
1268	int wasvalid = pte->pte_p;
1269
1270	pte->pte_p = 1;
1271	pte->pte_ma = PTE_MA_WB;
1272	if (ig & PTE_IG_MANAGED) {
1273		pte->pte_a = 0;
1274		pte->pte_d = 0;
1275	} else {
1276		pte->pte_a = 1;
1277		pte->pte_d = 1;
1278	}
1279	pte->pte_pl = pl;
1280	pte->pte_ar = ar;
1281	pte->pte_ppn = pa >> 12;
1282	pte->pte_ed = 0;
1283	pte->pte_ig = ig;
1284
1285	pte->pte_ps = PAGE_SHIFT;
1286	pte->pte_key = 0;
1287
1288	pte->pte_tag = ia64_ttag(va);
1289
1290	if (wasvalid) {
1291		pmap_update_vhpt(pte, va);
1292	} else {
1293		pmap_enter_vhpt(pte, va);
1294	}
1295}
1296
1297/*
1298 * If a pte contains a valid mapping, clear it and update the VHPT.
1299 */
1300static void
1301pmap_clear_pte(struct ia64_lpte *pte, vm_offset_t va)
1302{
1303	if (pte->pte_p) {
1304		pmap_remove_vhpt(va);
1305		ia64_ptc_g(va, PAGE_SHIFT << 2);
1306		pte->pte_p = 0;
1307	}
1308}
1309
1310/*
1311 * Remove the (possibly managed) mapping represented by pte from the
1312 * given pmap.
1313 */
1314static int
1315pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1316		pv_entry_t pv, int freepte)
1317{
1318	int error;
1319	vm_page_t m;
1320
1321	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1322		("removing pte for non-current pmap"));
1323
1324	/*
1325	 * First remove from the VHPT.
1326	 */
1327	error = pmap_remove_vhpt(va);
1328	if (error)
1329		return error;
1330
1331	/*
1332	 * Make sure pmap_set_pte() knows it isn't in the VHPT.
1333	 */
1334	pte->pte_p = 0;
1335
1336	if (pte->pte_ig & PTE_IG_WIRED)
1337		pmap->pm_stats.wired_count -= 1;
1338
1339	pmap->pm_stats.resident_count -= 1;
1340	if (pte->pte_ig & PTE_IG_MANAGED) {
1341		m = PHYS_TO_VM_PAGE(pmap_pte_pa(pte));
1342		if (pte->pte_d)
1343			if (pmap_track_modified(va))
1344				vm_page_dirty(m);
1345		if (pte->pte_a)
1346			vm_page_flag_set(m, PG_REFERENCED);
1347
1348		if (freepte)
1349			pmap_free_pte(pte, va);
1350		return pmap_remove_entry(pmap, m, va, pv);
1351	} else {
1352		if (freepte)
1353			pmap_free_pte(pte, va);
1354		return 0;
1355	}
1356}
1357
1358/*
1359 * Extract the physical page address associated with a kernel
1360 * virtual address.
1361 */
1362vm_paddr_t
1363pmap_kextract(vm_offset_t va)
1364{
1365	struct ia64_lpte *pte;
1366
1367	KASSERT(va >= IA64_RR_BASE(5), ("Must be kernel VA"));
1368
1369	/* Regions 6 and 7 are direct mapped. */
1370	if (va >= IA64_RR_BASE(6))
1371		return (IA64_RR_MASK(va));
1372
1373	/* Bail out if the virtual address is beyond our limits. */
1374	if (IA64_RR_MASK(va) >= nkpt * PAGE_SIZE * NKPTEPG)
1375		return (0);
1376
1377	pte = pmap_find_kpte(va);
1378	if (!pte->pte_p)
1379		return (0);
1380	return ((pte->pte_ppn << 12) | (va & PAGE_MASK));
1381}
1382
1383/*
1384 * Add a list of wired pages to the kva
1385 * this routine is only used for temporary
1386 * kernel mappings that do not need to have
1387 * page modification or references recorded.
1388 * Note that old mappings are simply written
1389 * over.  The page *must* be wired.
1390 */
1391void
1392pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1393{
1394	int i;
1395	struct ia64_lpte *pte;
1396
1397	for (i = 0; i < count; i++) {
1398		vm_offset_t tva = va + i * PAGE_SIZE;
1399		int wasvalid;
1400		pte = pmap_find_kpte(tva);
1401		wasvalid = pte->pte_p;
1402		pmap_set_pte(pte, tva, VM_PAGE_TO_PHYS(m[i]),
1403			     0, PTE_PL_KERN, PTE_AR_RWX);
1404		if (wasvalid)
1405			ia64_ptc_g(tva, PAGE_SHIFT << 2);
1406	}
1407}
1408
1409/*
1410 * this routine jerks page mappings from the
1411 * kernel -- it is meant only for temporary mappings.
1412 */
1413void
1414pmap_qremove(vm_offset_t va, int count)
1415{
1416	int i;
1417	struct ia64_lpte *pte;
1418
1419	for (i = 0; i < count; i++) {
1420		pte = pmap_find_kpte(va);
1421		pmap_clear_pte(pte, va);
1422		va += PAGE_SIZE;
1423	}
1424}
1425
1426/*
1427 * Add a wired page to the kva.
1428 */
1429void
1430pmap_kenter(vm_offset_t va, vm_offset_t pa)
1431{
1432	struct ia64_lpte *pte;
1433	int wasvalid;
1434
1435	pte = pmap_find_kpte(va);
1436	wasvalid = pte->pte_p;
1437	pmap_set_pte(pte, va, pa, 0, PTE_PL_KERN, PTE_AR_RWX);
1438	if (wasvalid)
1439		ia64_ptc_g(va, PAGE_SHIFT << 2);
1440}
1441
1442/*
1443 * Remove a page from the kva
1444 */
1445void
1446pmap_kremove(vm_offset_t va)
1447{
1448	struct ia64_lpte *pte;
1449
1450	pte = pmap_find_kpte(va);
1451	pmap_clear_pte(pte, va);
1452}
1453
1454/*
1455 *	Used to map a range of physical addresses into kernel
1456 *	virtual address space.
1457 *
1458 *	The value passed in '*virt' is a suggested virtual address for
1459 *	the mapping. Architectures which can support a direct-mapped
1460 *	physical to virtual region can return the appropriate address
1461 *	within that region, leaving '*virt' unchanged. Other
1462 *	architectures should map the pages starting at '*virt' and
1463 *	update '*virt' with the first usable address after the mapped
1464 *	region.
1465 */
1466vm_offset_t
1467pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1468{
1469	return IA64_PHYS_TO_RR7(start);
1470}
1471
1472/*
1473 * Remove a single page from a process address space
1474 */
1475static void
1476pmap_remove_page(pmap_t pmap, vm_offset_t va)
1477{
1478	struct ia64_lpte *pte;
1479
1480	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1481		("removing page for non-current pmap"));
1482
1483	pte = pmap_find_vhpt(va);
1484	if (pte) {
1485		pmap_remove_pte(pmap, pte, va, 0, 1);
1486		pmap_invalidate_page(pmap, va);
1487	}
1488	return;
1489}
1490
1491/*
1492 *	Remove the given range of addresses from the specified map.
1493 *
1494 *	It is assumed that the start and end are properly
1495 *	rounded to the page size.
1496 */
1497void
1498pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1499{
1500	pmap_t oldpmap;
1501	vm_offset_t va;
1502	pv_entry_t pv;
1503	struct ia64_lpte *pte;
1504
1505	if (pmap == NULL)
1506		return;
1507
1508	if (pmap->pm_stats.resident_count == 0)
1509		return;
1510
1511	oldpmap = pmap_install(pmap);
1512
1513	/*
1514	 * special handling of removing one page.  a very
1515	 * common operation and easy to short circuit some
1516	 * code.
1517	 */
1518	if (sva + PAGE_SIZE == eva) {
1519		pmap_remove_page(pmap, sva);
1520		pmap_install(oldpmap);
1521		return;
1522	}
1523
1524	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1525		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1526			va = pv->pv_va;
1527			if (va >= sva && va < eva) {
1528				pte = pmap_find_vhpt(va);
1529				pmap_remove_pte(pmap, pte, va, pv, 1);
1530				pmap_invalidate_page(pmap, va);
1531			}
1532		}
1533
1534	} else {
1535		for (va = sva; va < eva; va = va += PAGE_SIZE) {
1536			pte = pmap_find_vhpt(va);
1537			if (pte) {
1538				pmap_remove_pte(pmap, pte, va, 0, 1);
1539				pmap_invalidate_page(pmap, va);
1540			}
1541		}
1542	}
1543
1544	pmap_install(oldpmap);
1545}
1546
1547/*
1548 *	Routine:	pmap_remove_all
1549 *	Function:
1550 *		Removes this physical page from
1551 *		all physical maps in which it resides.
1552 *		Reflects back modify bits to the pager.
1553 *
1554 *	Notes:
1555 *		Original versions of this routine were very
1556 *		inefficient because they iteratively called
1557 *		pmap_remove (slow...)
1558 */
1559
1560void
1561pmap_remove_all(vm_page_t m)
1562{
1563	pmap_t oldpmap;
1564	pv_entry_t pv;
1565	int s;
1566
1567#if defined(PMAP_DIAGNOSTIC)
1568	/*
1569	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1570	 * pages!
1571	 */
1572	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1573		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1574	}
1575#endif
1576
1577	s = splvm();
1578
1579	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1580		struct ia64_lpte *pte;
1581		pmap_t pmap = pv->pv_pmap;
1582		vm_offset_t va = pv->pv_va;
1583
1584		oldpmap = pmap_install(pmap);
1585		pte = pmap_find_vhpt(va);
1586		if (pmap_pte_pa(pte) != VM_PAGE_TO_PHYS(m))
1587			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1588		pmap_remove_pte(pmap, pte, va, pv, 1);
1589		pmap_invalidate_page(pmap, va);
1590		pmap_install(oldpmap);
1591	}
1592
1593	vm_page_flag_clear(m, PG_WRITEABLE);
1594
1595	splx(s);
1596	return;
1597}
1598
1599/*
1600 *	Set the physical protection on the
1601 *	specified range of this map as requested.
1602 */
1603void
1604pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1605{
1606	pmap_t oldpmap;
1607	struct ia64_lpte *pte;
1608	int newprot;
1609
1610	if (pmap == NULL)
1611		return;
1612
1613	oldpmap = pmap_install(pmap);
1614
1615	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1616		pmap_remove(pmap, sva, eva);
1617		pmap_install(oldpmap);
1618		return;
1619	}
1620
1621	if (prot & VM_PROT_WRITE) {
1622		pmap_install(oldpmap);
1623		return;
1624	}
1625
1626	newprot = pte_prot(pmap, prot);
1627
1628	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1629		panic("pmap_protect: unaligned addresses");
1630
1631	while (sva < eva) {
1632		/*
1633		 * If page is invalid, skip this page
1634		 */
1635		pte = pmap_find_vhpt(sva);
1636		if (!pte) {
1637			sva += PAGE_SIZE;
1638			continue;
1639		}
1640
1641		if (pmap_pte_prot(pte) != newprot) {
1642			if (pte->pte_ig & PTE_IG_MANAGED) {
1643				vm_offset_t pa = pmap_pte_pa(pte);
1644				vm_page_t m = PHYS_TO_VM_PAGE(pa);
1645				if (pte->pte_d) {
1646					if (pmap_track_modified(sva))
1647						vm_page_dirty(m);
1648					pte->pte_d = 0;
1649				}
1650				if (pte->pte_a) {
1651					vm_page_flag_set(m, PG_REFERENCED);
1652					pte->pte_a = 0;
1653				}
1654			}
1655			pmap_pte_set_prot(pte, newprot);
1656			pmap_update_vhpt(pte, sva);
1657			pmap_invalidate_page(pmap, sva);
1658		}
1659
1660		sva += PAGE_SIZE;
1661	}
1662	pmap_install(oldpmap);
1663}
1664
1665/*
1666 *	Insert the given physical page (p) at
1667 *	the specified virtual address (v) in the
1668 *	target physical map with the protection requested.
1669 *
1670 *	If specified, the page will be wired down, meaning
1671 *	that the related pte can not be reclaimed.
1672 *
1673 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1674 *	or lose information.  That is, this routine must actually
1675 *	insert this page into the given map NOW.
1676 */
1677void
1678pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1679	   boolean_t wired)
1680{
1681	pmap_t oldpmap;
1682	vm_offset_t pa;
1683	vm_offset_t opa;
1684	struct ia64_lpte origpte;
1685	struct ia64_lpte *pte;
1686	int managed;
1687
1688	if (pmap == NULL)
1689		return;
1690
1691	oldpmap = pmap_install(pmap);
1692
1693	va &= ~PAGE_MASK;
1694#ifdef PMAP_DIAGNOSTIC
1695	if (va > VM_MAX_KERNEL_ADDRESS)
1696		panic("pmap_enter: toobig");
1697#endif
1698
1699	/*
1700	 * Find (or create) a pte for the given mapping.
1701	 */
1702	pte = pmap_find_pte(va);
1703	origpte = *pte;
1704
1705	if (origpte.pte_p)
1706		opa = pmap_pte_pa(&origpte);
1707	else
1708		opa = 0;
1709	managed = 0;
1710
1711	pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK;
1712
1713	/*
1714	 * Mapping has not changed, must be protection or wiring change.
1715	 */
1716	if (origpte.pte_p && (opa == pa)) {
1717		/*
1718		 * Wiring change, just update stats. We don't worry about
1719		 * wiring PT pages as they remain resident as long as there
1720		 * are valid mappings in them. Hence, if a user page is wired,
1721		 * the PT page will be also.
1722		 */
1723		if (wired && ((origpte.pte_ig & PTE_IG_WIRED) == 0))
1724			pmap->pm_stats.wired_count++;
1725		else if (!wired && (origpte.pte_ig & PTE_IG_WIRED))
1726			pmap->pm_stats.wired_count--;
1727
1728		/*
1729		 * We might be turning off write access to the page,
1730		 * so we go ahead and sense modify status.
1731		 */
1732		if (origpte.pte_ig & PTE_IG_MANAGED) {
1733			if (origpte.pte_d && pmap_track_modified(va)) {
1734				vm_page_t om;
1735				om = PHYS_TO_VM_PAGE(opa);
1736				vm_page_dirty(om);
1737			}
1738		}
1739
1740		managed = origpte.pte_ig & PTE_IG_MANAGED;
1741		goto validate;
1742	}
1743	/*
1744	 * Mapping has changed, invalidate old range and fall
1745	 * through to handle validating new mapping.
1746	 */
1747	if (opa) {
1748		int error;
1749		vm_page_lock_queues();
1750		error = pmap_remove_pte(pmap, pte, va, 0, 0);
1751		vm_page_unlock_queues();
1752		if (error)
1753			panic("pmap_enter: pte vanished, va: 0x%lx", va);
1754	}
1755
1756	/*
1757	 * Enter on the PV list if part of our managed memory.
1758	 */
1759	if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
1760		pmap_insert_entry(pmap, va, m);
1761		managed |= PTE_IG_MANAGED;
1762	}
1763
1764	/*
1765	 * Increment counters
1766	 */
1767	pmap->pm_stats.resident_count++;
1768	if (wired)
1769		pmap->pm_stats.wired_count++;
1770
1771validate:
1772
1773	/*
1774	 * Now validate mapping with desired protection/wiring. This
1775	 * adds the pte to the VHPT if necessary.
1776	 */
1777	pmap_set_pte(pte, va, pa, managed | (wired ? PTE_IG_WIRED : 0),
1778		     pte_prot_pl(pmap, prot), pte_prot_ar(pmap, prot));
1779
1780	/*
1781	 * if the mapping or permission bits are different, we need
1782	 * to invalidate the page.
1783	 */
1784	if (!pmap_equal_pte(&origpte, pte))
1785		pmap_invalidate_page(pmap, va);
1786
1787	pmap_install(oldpmap);
1788}
1789
1790/*
1791 * this code makes some *MAJOR* assumptions:
1792 * 1. Current pmap & pmap exists.
1793 * 2. Not wired.
1794 * 3. Read access.
1795 * 4. No page table pages.
1796 * 5. Tlbflush is deferred to calling procedure.
1797 * 6. Page IS managed.
1798 * but is *MUCH* faster than pmap_enter...
1799 */
1800
1801static void
1802pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
1803{
1804	struct ia64_lpte *pte;
1805	pmap_t oldpmap;
1806
1807	oldpmap = pmap_install(pmap);
1808
1809	pte = pmap_find_pte(va);
1810	if (pte->pte_p)
1811		return;
1812
1813	/*
1814	 * Enter on the PV list since its part of our managed memory.
1815	 */
1816	pmap_insert_entry(pmap, va, m);
1817
1818	/*
1819	 * Increment counters
1820	 */
1821	pmap->pm_stats.resident_count++;
1822
1823	/*
1824	 * Initialise PTE with read-only protection and enter into VHPT.
1825	 */
1826	pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m),
1827		     PTE_IG_MANAGED,
1828		     PTE_PL_USER, PTE_AR_R);
1829
1830	pmap_install(oldpmap);
1831}
1832
1833/*
1834 * Make temporary mapping for a physical address. This is called
1835 * during dump.
1836 */
1837void *
1838pmap_kenter_temporary(vm_offset_t pa, int i)
1839{
1840	return (void *) IA64_PHYS_TO_RR7(pa - (i * PAGE_SIZE));
1841}
1842
1843#define MAX_INIT_PT (96)
1844/*
1845 * pmap_object_init_pt preloads the ptes for a given object
1846 * into the specified pmap.  This eliminates the blast of soft
1847 * faults on process startup and immediately after an mmap.
1848 */
1849void
1850pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1851		    vm_object_t object, vm_pindex_t pindex,
1852		    vm_size_t size, int limit)
1853{
1854	pmap_t oldpmap;
1855	vm_offset_t tmpidx;
1856	int psize;
1857	vm_page_t p;
1858	int objpgs;
1859
1860	if (pmap == NULL || object == NULL)
1861		return;
1862
1863	oldpmap = pmap_install(pmap);
1864
1865	psize = ia64_btop(size);
1866
1867	if ((object->type != OBJT_VNODE) ||
1868		((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
1869			(object->resident_page_count > MAX_INIT_PT))) {
1870		pmap_install(oldpmap);
1871		return;
1872	}
1873
1874	if (psize + pindex > object->size) {
1875		if (object->size < pindex)
1876			return;
1877		psize = object->size - pindex;
1878	}
1879
1880	/*
1881	 * if we are processing a major portion of the object, then scan the
1882	 * entire thing.
1883	 */
1884	if (psize > (object->resident_page_count >> 2)) {
1885		objpgs = psize;
1886
1887		for (p = TAILQ_FIRST(&object->memq);
1888		    ((objpgs > 0) && (p != NULL));
1889		    p = TAILQ_NEXT(p, listq)) {
1890
1891			tmpidx = p->pindex;
1892			if (tmpidx < pindex) {
1893				continue;
1894			}
1895			tmpidx -= pindex;
1896			if (tmpidx >= psize) {
1897				continue;
1898			}
1899			/*
1900			 * don't allow an madvise to blow away our really
1901			 * free pages allocating pv entries.
1902			 */
1903			if ((limit & MAP_PREFAULT_MADVISE) &&
1904			    cnt.v_free_count < cnt.v_free_reserved) {
1905				break;
1906			}
1907			vm_page_lock_queues();
1908			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1909				(p->busy == 0) &&
1910			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1911				if ((p->queue - p->pc) == PQ_CACHE)
1912					vm_page_deactivate(p);
1913				vm_page_busy(p);
1914				vm_page_unlock_queues();
1915				pmap_enter_quick(pmap,
1916						 addr + ia64_ptob(tmpidx), p);
1917				vm_page_lock_queues();
1918				vm_page_wakeup(p);
1919			}
1920			vm_page_unlock_queues();
1921			objpgs -= 1;
1922		}
1923	} else {
1924		/*
1925		 * else lookup the pages one-by-one.
1926		 */
1927		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
1928			/*
1929			 * don't allow an madvise to blow away our really
1930			 * free pages allocating pv entries.
1931			 */
1932			if ((limit & MAP_PREFAULT_MADVISE) &&
1933			    cnt.v_free_count < cnt.v_free_reserved) {
1934				break;
1935			}
1936			p = vm_page_lookup(object, tmpidx + pindex);
1937			if (p == NULL)
1938				continue;
1939			vm_page_lock_queues();
1940			if ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL &&
1941				(p->busy == 0) &&
1942			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1943				if ((p->queue - p->pc) == PQ_CACHE)
1944					vm_page_deactivate(p);
1945				vm_page_busy(p);
1946				vm_page_unlock_queues();
1947				pmap_enter_quick(pmap,
1948						 addr + ia64_ptob(tmpidx), p);
1949				vm_page_lock_queues();
1950				vm_page_wakeup(p);
1951			}
1952			vm_page_unlock_queues();
1953		}
1954	}
1955	pmap_install(oldpmap);
1956	return;
1957}
1958
1959/*
1960 * pmap_prefault provides a quick way of clustering
1961 * pagefaults into a processes address space.  It is a "cousin"
1962 * of pmap_object_init_pt, except it runs at page fault time instead
1963 * of mmap time.
1964 */
1965#define PFBAK 4
1966#define PFFOR 4
1967#define PAGEORDER_SIZE (PFBAK+PFFOR)
1968
1969static int pmap_prefault_pageorder[] = {
1970	-1 * PAGE_SIZE, 1 * PAGE_SIZE,
1971	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
1972	-3 * PAGE_SIZE, 3 * PAGE_SIZE,
1973	-4 * PAGE_SIZE, 4 * PAGE_SIZE
1974};
1975
1976void
1977pmap_prefault(pmap, addra, entry)
1978	pmap_t pmap;
1979	vm_offset_t addra;
1980	vm_map_entry_t entry;
1981{
1982	int i;
1983	vm_offset_t starta;
1984	vm_offset_t addr;
1985	vm_pindex_t pindex;
1986	vm_page_t m, mpte;
1987	vm_object_t object;
1988
1989	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)))
1990		return;
1991
1992	object = entry->object.vm_object;
1993
1994	starta = addra - PFBAK * PAGE_SIZE;
1995	if (starta < entry->start) {
1996		starta = entry->start;
1997	} else if (starta > addra) {
1998		starta = 0;
1999	}
2000
2001	mpte = NULL;
2002	for (i = 0; i < PAGEORDER_SIZE; i++) {
2003		vm_object_t lobject;
2004		struct ia64_lpte *pte;
2005
2006		addr = addra + pmap_prefault_pageorder[i];
2007		if (addr > addra + (PFFOR * PAGE_SIZE))
2008			addr = 0;
2009
2010		if (addr < starta || addr >= entry->end)
2011			continue;
2012
2013		pte = pmap_find_vhpt(addr);
2014		if (pte && pte->pte_p)
2015			continue;
2016
2017		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
2018		lobject = object;
2019		for (m = vm_page_lookup(lobject, pindex);
2020		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
2021		    lobject = lobject->backing_object) {
2022			if (lobject->backing_object_offset & PAGE_MASK)
2023				break;
2024			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
2025			m = vm_page_lookup(lobject->backing_object, pindex);
2026		}
2027
2028		/*
2029		 * give-up when a page is not in memory
2030		 */
2031		if (m == NULL)
2032			break;
2033		vm_page_lock_queues();
2034		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2035			(m->busy == 0) &&
2036		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2037
2038			if ((m->queue - m->pc) == PQ_CACHE) {
2039				vm_page_deactivate(m);
2040			}
2041			vm_page_busy(m);
2042			vm_page_unlock_queues();
2043			pmap_enter_quick(pmap, addr, m);
2044			vm_page_lock_queues();
2045			vm_page_wakeup(m);
2046		}
2047		vm_page_unlock_queues();
2048	}
2049}
2050
2051/*
2052 *	Routine:	pmap_change_wiring
2053 *	Function:	Change the wiring attribute for a map/virtual-address
2054 *			pair.
2055 *	In/out conditions:
2056 *			The mapping must already exist in the pmap.
2057 */
2058void
2059pmap_change_wiring(pmap, va, wired)
2060	register pmap_t pmap;
2061	vm_offset_t va;
2062	boolean_t wired;
2063{
2064	pmap_t oldpmap;
2065	struct ia64_lpte *pte;
2066
2067	if (pmap == NULL)
2068		return;
2069
2070	oldpmap = pmap_install(pmap);
2071
2072	pte = pmap_find_vhpt(va);
2073
2074	if (wired && !pmap_pte_w(pte))
2075		pmap->pm_stats.wired_count++;
2076	else if (!wired && pmap_pte_w(pte))
2077		pmap->pm_stats.wired_count--;
2078
2079	/*
2080	 * Wiring is not a hardware characteristic so there is no need to
2081	 * invalidate TLB.
2082	 */
2083	pmap_pte_set_w(pte, wired);
2084
2085	pmap_install(oldpmap);
2086}
2087
2088
2089
2090/*
2091 *	Copy the range specified by src_addr/len
2092 *	from the source map to the range dst_addr/len
2093 *	in the destination map.
2094 *
2095 *	This routine is only advisory and need not do anything.
2096 */
2097
2098void
2099pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2100	  vm_offset_t src_addr)
2101{
2102}
2103
2104
2105/*
2106 *	pmap_zero_page zeros the specified hardware page by
2107 *	mapping it into virtual memory and using bzero to clear
2108 *	its contents.
2109 */
2110
2111void
2112pmap_zero_page(vm_page_t m)
2113{
2114	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2115	bzero((caddr_t) va, PAGE_SIZE);
2116}
2117
2118
2119/*
2120 *	pmap_zero_page_area zeros the specified hardware page by
2121 *	mapping it into virtual memory and using bzero to clear
2122 *	its contents.
2123 *
2124 *	off and size must reside within a single page.
2125 */
2126
2127void
2128pmap_zero_page_area(vm_page_t m, int off, int size)
2129{
2130	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2131	bzero((char *)(caddr_t)va + off, size);
2132}
2133
2134
2135/*
2136 *	pmap_zero_page_idle zeros the specified hardware page by
2137 *	mapping it into virtual memory and using bzero to clear
2138 *	its contents.  This is for the vm_idlezero process.
2139 */
2140
2141void
2142pmap_zero_page_idle(vm_page_t m)
2143{
2144	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2145	bzero((caddr_t) va, PAGE_SIZE);
2146}
2147
2148
2149/*
2150 *	pmap_copy_page copies the specified (machine independent)
2151 *	page by mapping the page into virtual memory and using
2152 *	bcopy to copy the page, one machine dependent page at a
2153 *	time.
2154 */
2155void
2156pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2157{
2158	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
2159	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
2160	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
2161}
2162
2163/*
2164 * Returns true if the pmap's pv is one of the first
2165 * 16 pvs linked to from this page.  This count may
2166 * be changed upwards or downwards in the future; it
2167 * is only necessary that true be returned for a small
2168 * subset of pmaps for proper page aging.
2169 */
2170boolean_t
2171pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2172{
2173	pv_entry_t pv;
2174	int loops = 0;
2175	int s;
2176
2177	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2178		return FALSE;
2179
2180	s = splvm();
2181
2182	/*
2183	 * Not found, check current mappings returning immediately if found.
2184	 */
2185	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2186		if (pv->pv_pmap == pmap) {
2187			splx(s);
2188			return TRUE;
2189		}
2190		loops++;
2191		if (loops >= 16)
2192			break;
2193	}
2194	splx(s);
2195	return (FALSE);
2196}
2197
2198#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2199/*
2200 * Remove all pages from specified address space
2201 * this aids process exit speeds.  Also, this code
2202 * is special cased for current process only, but
2203 * can have the more generic (and slightly slower)
2204 * mode enabled.  This is much faster than pmap_remove
2205 * in the case of running down an entire address space.
2206 */
2207void
2208pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2209{
2210	pv_entry_t pv, npv;
2211	int s;
2212
2213#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2214	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
2215		printf("warning: pmap_remove_pages called with non-current pmap\n");
2216		return;
2217	}
2218#endif
2219
2220	s = splvm();
2221	for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
2222		pv;
2223		pv = npv) {
2224		struct ia64_lpte *pte;
2225
2226		npv = TAILQ_NEXT(pv, pv_plist);
2227
2228		if (pv->pv_va >= eva || pv->pv_va < sva) {
2229			continue;
2230		}
2231
2232		pte = pmap_find_vhpt(pv->pv_va);
2233		if (!pte)
2234			panic("pmap_remove_pages: page on pm_pvlist has no pte\n");
2235
2236
2237/*
2238 * We cannot remove wired pages from a process' mapping at this time
2239 */
2240		if (pte->pte_ig & PTE_IG_WIRED) {
2241			continue;
2242		}
2243
2244		pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
2245	}
2246	splx(s);
2247
2248	pmap_invalidate_all(pmap);
2249}
2250
2251/*
2252 *      pmap_page_protect:
2253 *
2254 *      Lower the permission for all mappings to a given page.
2255 */
2256void
2257pmap_page_protect(vm_page_t m, vm_prot_t prot)
2258{
2259	pv_entry_t pv;
2260
2261	if ((prot & VM_PROT_WRITE) != 0)
2262		return;
2263	if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
2264		if ((m->flags & PG_WRITEABLE) == 0)
2265			return;
2266		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2267			int newprot = pte_prot(pv->pv_pmap, prot);
2268			pmap_t oldpmap = pmap_install(pv->pv_pmap);
2269			struct ia64_lpte *pte;
2270			pte = pmap_find_vhpt(pv->pv_va);
2271			pmap_pte_set_prot(pte, newprot);
2272			pmap_update_vhpt(pte, pv->pv_va);
2273			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2274			pmap_install(oldpmap);
2275		}
2276		vm_page_flag_clear(m, PG_WRITEABLE);
2277	} else {
2278		pmap_remove_all(m);
2279	}
2280}
2281
2282/*
2283 *	pmap_ts_referenced:
2284 *
2285 *	Return a count of reference bits for a page, clearing those bits.
2286 *	It is not necessary for every reference bit to be cleared, but it
2287 *	is necessary that 0 only be returned when there are truly no
2288 *	reference bits set.
2289 *
2290 *	XXX: The exact number of bits to check and clear is a matter that
2291 *	should be tested and standardized at some point in the future for
2292 *	optimal aging of shared pages.
2293 */
2294int
2295pmap_ts_referenced(vm_page_t m)
2296{
2297	pv_entry_t pv;
2298	int count = 0;
2299
2300	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2301		return 0;
2302
2303	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2304		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2305		struct ia64_lpte *pte;
2306		pte = pmap_find_vhpt(pv->pv_va);
2307		if (pte->pte_a) {
2308			count++;
2309			pte->pte_a = 0;
2310			pmap_update_vhpt(pte, pv->pv_va);
2311			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2312		}
2313		pmap_install(oldpmap);
2314	}
2315
2316	return count;
2317}
2318
2319#if 0
2320/*
2321 *	pmap_is_referenced:
2322 *
2323 *	Return whether or not the specified physical page was referenced
2324 *	in any physical maps.
2325 */
2326static boolean_t
2327pmap_is_referenced(vm_page_t m)
2328{
2329	pv_entry_t pv;
2330
2331	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2332		return FALSE;
2333
2334	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2335		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2336		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2337		pmap_install(oldpmap);
2338		if (pte->pte_a)
2339			return 1;
2340	}
2341
2342	return 0;
2343}
2344#endif
2345
2346/*
2347 *	pmap_is_modified:
2348 *
2349 *	Return whether or not the specified physical page was modified
2350 *	in any physical maps.
2351 */
2352boolean_t
2353pmap_is_modified(vm_page_t m)
2354{
2355	pv_entry_t pv;
2356
2357	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2358		return FALSE;
2359
2360	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2361		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2362		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2363		pmap_install(oldpmap);
2364		if (pte->pte_d)
2365			return 1;
2366	}
2367
2368	return 0;
2369}
2370
2371/*
2372 *	Clear the modify bits on the specified physical page.
2373 */
2374void
2375pmap_clear_modify(vm_page_t m)
2376{
2377	pv_entry_t pv;
2378
2379	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2380		return;
2381
2382	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2383		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2384		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2385		if (pte->pte_d) {
2386			pte->pte_d = 0;
2387			pmap_update_vhpt(pte, pv->pv_va);
2388			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2389		}
2390		pmap_install(oldpmap);
2391	}
2392}
2393
2394/*
2395 *	pmap_clear_reference:
2396 *
2397 *	Clear the reference bit on the specified physical page.
2398 */
2399void
2400pmap_clear_reference(vm_page_t m)
2401{
2402	pv_entry_t pv;
2403
2404	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2405		return;
2406
2407	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2408		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2409		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2410		if (pte->pte_a) {
2411			pte->pte_a = 0;
2412			pmap_update_vhpt(pte, pv->pv_va);
2413			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2414		}
2415		pmap_install(oldpmap);
2416	}
2417}
2418
2419/*
2420 * Miscellaneous support routines follow
2421 */
2422
2423static void
2424ia64_protection_init()
2425{
2426	int prot, *kp, *up;
2427
2428	kp = protection_codes[0];
2429	up = protection_codes[1];
2430
2431	for (prot = 0; prot < 8; prot++) {
2432		switch (prot) {
2433		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
2434			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2435			*up++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2436			break;
2437
2438		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
2439			*kp++ = (PTE_AR_X_RX << 2) | PTE_PL_KERN;
2440			*up++ = (PTE_AR_X_RX << 2) | PTE_PL_USER;
2441			break;
2442
2443		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
2444			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2445			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2446			break;
2447
2448		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
2449			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2450			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2451			break;
2452
2453		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
2454			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2455			*up++ = (PTE_AR_R << 2) | PTE_PL_USER;
2456			break;
2457
2458		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
2459			*kp++ = (PTE_AR_RX << 2) | PTE_PL_KERN;
2460			*up++ = (PTE_AR_RX << 2) | PTE_PL_USER;
2461			break;
2462
2463		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
2464			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2465			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2466			break;
2467
2468		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
2469			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2470			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2471			break;
2472		}
2473	}
2474}
2475
2476/*
2477 * Map a set of physical memory pages into the kernel virtual
2478 * address space. Return a pointer to where it is mapped. This
2479 * routine is intended to be used for mapping device memory,
2480 * NOT real memory.
2481 */
2482void *
2483pmap_mapdev(vm_offset_t pa, vm_size_t size)
2484{
2485	return (void*) IA64_PHYS_TO_RR6(pa);
2486}
2487
2488/*
2489 * 'Unmap' a range mapped by pmap_mapdev().
2490 */
2491void
2492pmap_unmapdev(vm_offset_t va, vm_size_t size)
2493{
2494	return;
2495}
2496
2497/*
2498 * perform the pmap work for mincore
2499 */
2500int
2501pmap_mincore(pmap_t pmap, vm_offset_t addr)
2502{
2503	pmap_t oldpmap;
2504	struct ia64_lpte *pte;
2505	int val = 0;
2506
2507	oldpmap = pmap_install(pmap);
2508	pte = pmap_find_vhpt(addr);
2509	pmap_install(oldpmap);
2510
2511	if (!pte)
2512		return 0;
2513
2514	if (pmap_pte_v(pte)) {
2515		vm_page_t m;
2516		vm_offset_t pa;
2517
2518		val = MINCORE_INCORE;
2519		if ((pte->pte_ig & PTE_IG_MANAGED) == 0)
2520			return val;
2521
2522		pa = pmap_pte_pa(pte);
2523
2524		m = PHYS_TO_VM_PAGE(pa);
2525
2526		/*
2527		 * Modified by us
2528		 */
2529		if (pte->pte_d)
2530			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2531		/*
2532		 * Modified by someone
2533		 */
2534		else if (pmap_is_modified(m))
2535			val |= MINCORE_MODIFIED_OTHER;
2536		/*
2537		 * Referenced by us
2538		 */
2539		if (pte->pte_a)
2540			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2541
2542		/*
2543		 * Referenced by someone
2544		 */
2545		else if (pmap_ts_referenced(m)) {
2546			val |= MINCORE_REFERENCED_OTHER;
2547			vm_page_flag_set(m, PG_REFERENCED);
2548		}
2549	}
2550	return val;
2551}
2552
2553void
2554pmap_activate(struct thread *td)
2555{
2556	pmap_install(vmspace_pmap(td->td_proc->p_vmspace));
2557}
2558
2559pmap_t
2560pmap_install(pmap_t pmap)
2561{
2562	pmap_t oldpmap;
2563	int i;
2564
2565	critical_enter();
2566
2567	oldpmap = PCPU_GET(current_pmap);
2568
2569	if (pmap == oldpmap || pmap == kernel_pmap) {
2570		critical_exit();
2571		return pmap;
2572	}
2573
2574	if (oldpmap) {
2575		atomic_clear_32(&pmap->pm_active, PCPU_GET(cpumask));
2576	}
2577
2578	PCPU_SET(current_pmap, pmap);
2579	if (!pmap) {
2580		/*
2581		 * RIDs 0..4 have no mappings to make sure we generate
2582		 * page faults on accesses.
2583		 */
2584		ia64_set_rr(IA64_RR_BASE(0), (0 << 8)|(PAGE_SHIFT << 2)|1);
2585		ia64_set_rr(IA64_RR_BASE(1), (1 << 8)|(PAGE_SHIFT << 2)|1);
2586		ia64_set_rr(IA64_RR_BASE(2), (2 << 8)|(PAGE_SHIFT << 2)|1);
2587		ia64_set_rr(IA64_RR_BASE(3), (3 << 8)|(PAGE_SHIFT << 2)|1);
2588		ia64_set_rr(IA64_RR_BASE(4), (4 << 8)|(PAGE_SHIFT << 2)|1);
2589		critical_exit();
2590		return oldpmap;
2591	}
2592
2593	atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask));
2594
2595	for (i = 0; i < 5; i++)
2596		ia64_set_rr(IA64_RR_BASE(i),
2597			    (pmap->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2598
2599	critical_exit();
2600	return oldpmap;
2601}
2602
2603vm_offset_t
2604pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2605{
2606
2607	return addr;
2608}
2609
2610#include "opt_ddb.h"
2611
2612#ifdef DDB
2613
2614#include <ddb/ddb.h>
2615
2616static const char*	psnames[] = {
2617	"1B",	"2B",	"4B",	"8B",
2618	"16B",	"32B",	"64B",	"128B",
2619	"256B",	"512B",	"1K",	"2K",
2620	"4K",	"8K",	"16K",	"32K",
2621	"64K",	"128K",	"256K",	"512K",
2622	"1M",	"2M",	"4M",	"8M",
2623	"16M",	"32M",	"64M",	"128M",
2624	"256M",	"512M",	"1G",	"2G"
2625};
2626
2627static void
2628print_trs(int type)
2629{
2630	struct ia64_pal_result	res;
2631	int			i, maxtr;
2632	struct {
2633		struct ia64_pte	pte;
2634		struct ia64_itir itir;
2635		struct ia64_ifa ifa;
2636		struct ia64_rr	rr;
2637	}			buf;
2638	static const char*	manames[] = {
2639		"WB",	"bad",	"bad",	"bad",
2640		"UC",	"UCE",	"WC",	"NaT",
2641
2642	};
2643
2644	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2645	if (res.pal_status != 0) {
2646		db_printf("Can't get VM summary\n");
2647		return;
2648	}
2649
2650	if (type == 0)
2651		maxtr = (res.pal_result[0] >> 40) & 0xff;
2652	else
2653		maxtr = (res.pal_result[0] >> 32) & 0xff;
2654
2655	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2656	for (i = 0; i <= maxtr; i++) {
2657		bzero(&buf, sizeof(buf));
2658		res = ia64_call_pal_stacked_physical
2659			(PAL_VM_TR_READ, i, type, ia64_tpa((u_int64_t) &buf));
2660		if (!(res.pal_result[0] & 1))
2661			buf.pte.pte_ar = 0;
2662		if (!(res.pal_result[0] & 2))
2663			buf.pte.pte_pl = 0;
2664		if (!(res.pal_result[0] & 4))
2665			buf.pte.pte_d = 0;
2666		if (!(res.pal_result[0] & 8))
2667			buf.pte.pte_ma = 0;
2668		db_printf(
2669			"%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s %d %06x\n",
2670			buf.ifa.ifa_ig & 1,
2671			buf.rr.rr_rid,
2672			buf.ifa.ifa_vpn,
2673			buf.pte.pte_ppn,
2674			psnames[buf.itir.itir_ps],
2675			buf.pte.pte_ed,
2676			buf.pte.pte_ar,
2677			buf.pte.pte_pl,
2678			buf.pte.pte_d,
2679			buf.pte.pte_a,
2680			manames[buf.pte.pte_ma],
2681			buf.pte.pte_p,
2682			buf.itir.itir_key);
2683	}
2684}
2685
2686DB_COMMAND(itr, db_itr)
2687{
2688	print_trs(0);
2689}
2690
2691DB_COMMAND(dtr, db_dtr)
2692{
2693	print_trs(1);
2694}
2695
2696DB_COMMAND(rr, db_rr)
2697{
2698	int i;
2699	u_int64_t t;
2700	struct ia64_rr rr;
2701
2702	printf("RR RID    PgSz VE\n");
2703	for (i = 0; i < 8; i++) {
2704		__asm __volatile ("mov %0=rr[%1]"
2705				  : "=r"(t)
2706				  : "r"(IA64_RR_BASE(i)));
2707		*(u_int64_t *) &rr = t;
2708		printf("%d  %06x %4s %d\n",
2709		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2710	}
2711}
2712
2713DB_COMMAND(thash, db_thash)
2714{
2715	if (!have_addr)
2716		return;
2717
2718	db_printf("%p\n", (void *) ia64_thash(addr));
2719}
2720
2721DB_COMMAND(ttag, db_ttag)
2722{
2723	if (!have_addr)
2724		return;
2725
2726	db_printf("0x%lx\n", ia64_ttag(addr));
2727}
2728
2729#endif
2730