pmap.c revision 115084
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 * $FreeBSD: head/sys/ia64/ia64/pmap.c 115084 2003-05-16 21:26:42Z marcel $
47 */
48
49/*
50 *	Manages physical address maps.
51 *
52 *	In addition to hardware address maps, this
53 *	module is called upon to provide software-use-only
54 *	maps which may or may not be stored in the same
55 *	form as hardware maps.  These pseudo-maps are
56 *	used to store intermediate results from copy
57 *	operations to and from address spaces.
58 *
59 *	Since the information managed by this module is
60 *	also stored by the logical address mapping module,
61 *	this module may throw away valid virtual-to-physical
62 *	mappings at almost any time.  However, invalidations
63 *	of virtual-to-physical mappings must be done as
64 *	requested.
65 *
66 *	In order to cope with hardware architectures which
67 *	make virtual-to-physical map invalidates expensive,
68 *	this module may delay invalidate or reduced protection
69 *	operations until such time as they are actually
70 *	necessary.  This module is given full information as
71 *	to which processors are currently using which maps,
72 *	and to when physical maps must be made correct.
73 */
74
75/*
76 * Following the Linux model, region IDs are allocated in groups of
77 * eight so that a single region ID can be used for as many RRs as we
78 * want by encoding the RR number into the low bits of the ID.
79 *
80 * We reserve region ID 0 for the kernel and allocate the remaining
81 * IDs for user pmaps.
82 *
83 * Region 0..4
84 *	User virtually mapped
85 *
86 * Region 5
87 *	Kernel virtually mapped
88 *
89 * Region 6
90 *	Kernel physically mapped uncacheable
91 *
92 * Region 7
93 *	Kernel physically mapped cacheable
94 */
95
96#include <sys/param.h>
97#include <sys/kernel.h>
98#include <sys/lock.h>
99#include <sys/malloc.h>
100#include <sys/mman.h>
101#include <sys/msgbuf.h>
102#include <sys/mutex.h>
103#include <sys/proc.h>
104#include <sys/sx.h>
105#include <sys/systm.h>
106#include <sys/vmmeter.h>
107#include <sys/smp.h>
108#include <sys/sysctl.h>
109
110#include <vm/vm.h>
111#include <vm/vm_param.h>
112#include <vm/vm_kern.h>
113#include <vm/vm_page.h>
114#include <vm/vm_map.h>
115#include <vm/vm_object.h>
116#include <vm/vm_extern.h>
117#include <vm/vm_pageout.h>
118#include <vm/vm_pager.h>
119#include <vm/uma.h>
120#include <vm/uma_int.h>
121
122#include <sys/user.h>
123
124#include <machine/cpu.h>
125#include <machine/pal.h>
126#include <machine/md_var.h>
127
128/* XXX move to a header. */
129extern u_int64_t ia64_gateway_page[];
130
131MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
132
133#ifndef KSTACK_MAX_PAGES
134#define	KSTACK_MAX_PAGES 32
135#endif
136
137#ifndef PMAP_SHPGPERPROC
138#define PMAP_SHPGPERPROC 200
139#endif
140
141#if defined(DIAGNOSTIC)
142#define PMAP_DIAGNOSTIC
143#endif
144
145#define MINPV 2048	/* Preallocate at least this many */
146#define MAXPV 20480	/* But no more than this */
147
148#if 0
149#define PMAP_DIAGNOSTIC
150#define PMAP_DEBUG
151#endif
152
153#if !defined(PMAP_DIAGNOSTIC)
154#define PMAP_INLINE __inline
155#else
156#define PMAP_INLINE
157#endif
158
159/*
160 * Get PDEs and PTEs for user/kernel address space
161 */
162#define pmap_pte_w(pte)		((pte)->pte_ig & PTE_IG_WIRED)
163#define pmap_pte_managed(pte)	((pte)->pte_ig & PTE_IG_MANAGED)
164#define pmap_pte_v(pte)		((pte)->pte_p)
165#define pmap_pte_pa(pte)	(((pte)->pte_ppn) << 12)
166#define pmap_pte_prot(pte)	(((pte)->pte_ar << 2) | (pte)->pte_pl)
167
168#define pmap_pte_set_w(pte, v) ((v)?((pte)->pte_ig |= PTE_IG_WIRED) \
169				:((pte)->pte_ig &= ~PTE_IG_WIRED))
170#define pmap_pte_set_prot(pte, v) do {		\
171    (pte)->pte_ar = v >> 2;			\
172    (pte)->pte_pl = v & 3;			\
173} while (0)
174
175/*
176 * Given a map and a machine independent protection code,
177 * convert to an ia64 protection code.
178 */
179#define pte_prot(m, p)		(protection_codes[m == kernel_pmap ? 0 : 1][p])
180#define pte_prot_pl(m, p)	(pte_prot(m, p) & 3)
181#define pte_prot_ar(m, p)	(pte_prot(m, p) >> 2)
182int	protection_codes[2][8];
183
184/*
185 * Return non-zero if this pmap is currently active
186 */
187#define pmap_isactive(pmap)	(pmap->pm_active)
188
189/*
190 * Statically allocated kernel pmap
191 */
192struct pmap kernel_pmap_store;
193
194vm_offset_t avail_start;	/* PA of first available physical page */
195vm_offset_t avail_end;		/* PA of last available physical page */
196vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
197vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
198static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
199
200vm_offset_t vhpt_base, vhpt_size;
201
202/*
203 * We use an object to own the kernel's 'page tables'. For simplicity,
204 * we use one page directory to index a set of pages containing
205 * ia64_lptes. This gives us up to 2Gb of kernel virtual space.
206 */
207static int nkpt;
208struct ia64_lpte **ia64_kptdir;
209#define KPTE_DIR_INDEX(va) \
210	((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
211#define KPTE_PTE_INDEX(va) \
212	((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
213#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
214
215vm_offset_t kernel_vm_end;
216
217/* Values for ptc.e. XXX values for SKI. */
218static u_int64_t pmap_ptc_e_base = 0x100000000;
219static u_int64_t pmap_ptc_e_count1 = 3;
220static u_int64_t pmap_ptc_e_count2 = 2;
221static u_int64_t pmap_ptc_e_stride1 = 0x2000;
222static u_int64_t pmap_ptc_e_stride2 = 0x100000000;
223
224/*
225 * Data for the RID allocator
226 */
227static int pmap_ridcount;
228static int pmap_rididx;
229static int pmap_ridmapsz;
230static int pmap_ridmax;
231static u_int64_t *pmap_ridmap;
232struct mtx pmap_ridmutex;
233
234/*
235 * Data for the pv entry allocation mechanism
236 */
237static uma_zone_t pvzone;
238static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
239int pmap_pagedaemon_waken;
240static struct pv_entry *pvbootentries;
241static int pvbootnext, pvbootmax;
242
243/*
244 * Data for allocating PTEs for user processes.
245 */
246static uma_zone_t ptezone;
247
248/*
249 * VHPT instrumentation.
250 */
251static int pmap_vhpt_inserts;
252static int pmap_vhpt_collisions;
253static int pmap_vhpt_resident;
254SYSCTL_DECL(_vm_stats);
255SYSCTL_NODE(_vm_stats, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
256SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
257	   &pmap_vhpt_inserts, 0, "");
258SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, collisions, CTLFLAG_RD,
259	   &pmap_vhpt_collisions, 0, "");
260SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, resident, CTLFLAG_RD,
261	   &pmap_vhpt_resident, 0, "");
262
263static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
264static pv_entry_t get_pv_entry(void);
265static void	ia64_protection_init(void);
266
267static void	pmap_invalidate_all(pmap_t pmap);
268static void	pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m);
269
270vm_offset_t
271pmap_steal_memory(vm_size_t size)
272{
273	vm_size_t bank_size;
274	vm_offset_t pa, va;
275
276	size = round_page(size);
277
278	bank_size = phys_avail[1] - phys_avail[0];
279	while (size > bank_size) {
280		int i;
281		for (i = 0; phys_avail[i+2]; i+= 2) {
282			phys_avail[i] = phys_avail[i+2];
283			phys_avail[i+1] = phys_avail[i+3];
284		}
285		phys_avail[i] = 0;
286		phys_avail[i+1] = 0;
287		if (!phys_avail[0])
288			panic("pmap_steal_memory: out of memory");
289		bank_size = phys_avail[1] - phys_avail[0];
290	}
291
292	pa = phys_avail[0];
293	phys_avail[0] += size;
294
295	va = IA64_PHYS_TO_RR7(pa);
296	bzero((caddr_t) va, size);
297	return va;
298}
299
300/*
301 *	Bootstrap the system enough to run with virtual memory.
302 */
303void
304pmap_bootstrap()
305{
306	int i, j, count, ridbits;
307	struct ia64_pal_result res;
308
309	/*
310	 * Query the PAL Code to find the loop parameters for the
311	 * ptc.e instruction.
312	 */
313	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
314	if (res.pal_status != 0)
315		panic("Can't configure ptc.e parameters");
316	pmap_ptc_e_base = res.pal_result[0];
317	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
318	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
319	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
320	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
321	if (bootverbose)
322		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
323		       "stride1=0x%lx, stride2=0x%lx\n",
324		       pmap_ptc_e_base,
325		       pmap_ptc_e_count1,
326		       pmap_ptc_e_count2,
327		       pmap_ptc_e_stride1,
328		       pmap_ptc_e_stride2);
329
330	/*
331	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
332	 *
333	 * We currently need at least 19 bits in the RID because PID_MAX
334	 * can only be encoded in 17 bits and we need RIDs for 5 regions
335	 * per process. With PID_MAX equalling 99999 this means that we
336	 * need to be able to encode 499995 (=5*PID_MAX).
337	 * The Itanium processor only has 18 bits and the architected
338	 * minimum is exactly that. So, we cannot use a PID based scheme
339	 * in those cases. Enter pmap_ridmap...
340	 * We should avoid the map when running on a processor that has
341	 * implemented enough bits. This means that we should pass the
342	 * process/thread ID to pmap. This we currently don't do, so we
343	 * use the map anyway. However, we don't want to allocate a map
344	 * that is large enough to cover the range dictated by the number
345	 * of bits in the RID, because that may result in a RID map of
346	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
347	 * The bottomline: we create a 32KB map when the processor only
348	 * implements 18 bits (or when we can't figure it out). Otherwise
349	 * we create a 64KB map.
350	 */
351	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
352	if (res.pal_status != 0) {
353		if (bootverbose)
354			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
355		ridbits = 18; /* guaranteed minimum */
356	} else {
357		ridbits = (res.pal_result[1] >> 8) & 0xff;
358		if (bootverbose)
359			printf("Processor supports %d Region ID bits\n",
360			    ridbits);
361	}
362	if (ridbits > 19)
363		ridbits = 19;
364
365	pmap_ridmax = (1 << ridbits);
366	pmap_ridmapsz = pmap_ridmax / 64;
367	pmap_ridmap = (u_int64_t *)pmap_steal_memory(pmap_ridmax / 8);
368	pmap_ridmap[0] |= 0xff;
369	pmap_rididx = 0;
370	pmap_ridcount = 8;
371	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
372
373	/*
374	 * Allocate some memory for initial kernel 'page tables'.
375	 */
376	ia64_kptdir = (void *)pmap_steal_memory(PAGE_SIZE);
377	for (i = 0; i < NKPT; i++) {
378		ia64_kptdir[i] = (void*)pmap_steal_memory(PAGE_SIZE);
379	}
380	nkpt = NKPT;
381	kernel_vm_end = NKPT * PAGE_SIZE * NKPTEPG + VM_MIN_KERNEL_ADDRESS -
382	    VM_GATEWAY_SIZE;
383
384	avail_start = phys_avail[0];
385	for (i = 0; phys_avail[i+2]; i+= 2) ;
386	avail_end = phys_avail[i+1];
387	count = i+2;
388
389	/*
390	 * Figure out a useful size for the VHPT, based on the size of
391	 * physical memory and try to locate a region which is large
392	 * enough to contain the VHPT (which must be a power of two in
393	 * size and aligned to a natural boundary).
394	 * Don't use the difference between avail_start and avail_end
395	 * as a measure for memory size. The address space is often
396	 * enough sparse, causing us to (try to) create a huge VHPT.
397	 */
398	vhpt_size = 15;
399	while ((1<<vhpt_size) < Maxmem * 32)
400		vhpt_size++;
401
402	vhpt_base = 0;
403	while (!vhpt_base) {
404		vm_offset_t mask;
405		if (bootverbose)
406			printf("Trying VHPT size 0x%lx\n", (1L<<vhpt_size));
407		mask = (1L << vhpt_size) - 1;
408		for (i = 0; i < count; i += 2) {
409			vm_offset_t base, limit;
410			base = (phys_avail[i] + mask) & ~mask;
411			limit = base + (1L << vhpt_size);
412			if (limit <= phys_avail[i+1])
413				/*
414				 * VHPT can fit in this region
415				 */
416				break;
417		}
418		if (!phys_avail[i]) {
419			/*
420			 * Can't fit, try next smaller size.
421			 */
422			vhpt_size--;
423		} else {
424			vhpt_base = (phys_avail[i] + mask) & ~mask;
425		}
426	}
427	if (vhpt_size < 15)
428		panic("Can't find space for VHPT");
429
430	if (bootverbose)
431		printf("Putting VHPT at %p\n", (void *) vhpt_base);
432	if (vhpt_base != phys_avail[i]) {
433		/*
434		 * Split this region.
435		 */
436		if (bootverbose)
437			printf("Splitting [%p-%p]\n",
438			       (void *) phys_avail[i],
439			       (void *) phys_avail[i+1]);
440		for (j = count; j > i; j -= 2) {
441			phys_avail[j] = phys_avail[j-2];
442			phys_avail[j+1] = phys_avail[j-2+1];
443		}
444		phys_avail[count+2] = 0;
445		phys_avail[count+3] = 0;
446		phys_avail[i+1] = vhpt_base;
447		phys_avail[i+2] = vhpt_base + (1L << vhpt_size);
448	} else {
449		phys_avail[i] = vhpt_base + (1L << vhpt_size);
450	}
451
452	vhpt_base = IA64_PHYS_TO_RR7(vhpt_base);
453	bzero((void *) vhpt_base, (1L << vhpt_size));
454	__asm __volatile("mov cr.pta=%0;; srlz.i;;"
455			 :: "r" (vhpt_base + (1<<8) + (vhpt_size<<2) + 1));
456
457	virtual_avail = VM_MIN_KERNEL_ADDRESS;
458	virtual_end = VM_MAX_KERNEL_ADDRESS;
459
460	/*
461	 * Initialize protection array.
462	 */
463	ia64_protection_init();
464
465	/*
466	 * Initialize the kernel pmap (which is statically allocated).
467	 */
468	for (i = 0; i < 5; i++)
469		kernel_pmap->pm_rid[i] = 0;
470	kernel_pmap->pm_active = 1;
471	TAILQ_INIT(&kernel_pmap->pm_pvlist);
472	PCPU_SET(current_pmap, kernel_pmap);
473
474	/*
475	 * Region 5 is mapped via the vhpt.
476	 */
477	ia64_set_rr(IA64_RR_BASE(5),
478		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
479
480	/*
481	 * Region 6 is direct mapped UC and region 7 is direct mapped
482	 * WC. The details of this is controlled by the Alt {I,D}TLB
483	 * handlers. Here we just make sure that they have the largest
484	 * possible page size to minimise TLB usage.
485	 */
486	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (28 << 2));
487	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (28 << 2));
488
489	/*
490	 * Reserve some memory for allocating pvs while bootstrapping
491	 * the pv allocator. We need to have enough to cover mapping
492	 * the kmem_alloc region used to allocate the initial_pvs in
493	 * pmap_init. In general, the size of this region is
494	 * approximately (# physical pages) * (size of pv entry).
495	 */
496	pvbootmax = ((physmem * sizeof(struct pv_entry)) >> PAGE_SHIFT) + 128;
497	pvbootentries = (struct pv_entry *)
498		pmap_steal_memory(pvbootmax * sizeof(struct pv_entry));
499	pvbootnext = 0;
500
501	/*
502	 * Clear out any random TLB entries left over from booting.
503	 */
504	pmap_invalidate_all(kernel_pmap);
505
506	map_gateway_page();
507}
508
509void *
510uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
511{
512	static vm_pindex_t color;
513	vm_page_t m;
514	int pflags;
515	void *va;
516
517	*flags = UMA_SLAB_PRIV;
518	if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT)
519		pflags = VM_ALLOC_INTERRUPT;
520	else
521		pflags = VM_ALLOC_SYSTEM;
522	if (wait & M_ZERO)
523		pflags |= VM_ALLOC_ZERO;
524
525	for (;;) {
526		m = vm_page_alloc(NULL, color++, pflags | VM_ALLOC_NOOBJ);
527		if (m == NULL) {
528			if (wait & M_NOWAIT)
529				return (NULL);
530			else
531				VM_WAIT;
532		} else
533			break;
534	}
535
536	va = (void *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
537	if ((m->flags & PG_ZERO) == 0)
538		bzero(va, PAGE_SIZE);
539	return (va);
540}
541
542void
543uma_small_free(void *mem, int size, u_int8_t flags)
544{
545	vm_page_t m;
546
547	m = PHYS_TO_VM_PAGE(IA64_RR_MASK((u_int64_t)mem));
548	vm_page_lock_queues();
549	vm_page_free(m);
550	vm_page_unlock_queues();
551}
552
553/*
554 *	Initialize the pmap module.
555 *	Called by vm_init, to initialize any structures that the pmap
556 *	system needs to map virtual memory.
557 *	pmap_init has been enhanced to support in a fairly consistant
558 *	way, discontiguous physical memory.
559 */
560void
561pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
562{
563	int i;
564	int initial_pvs;
565
566	/*
567	 * Allocate memory for random pmap data structures.  Includes the
568	 * pv_head_table.
569	 */
570
571	for(i = 0; i < vm_page_array_size; i++) {
572		vm_page_t m;
573
574		m = &vm_page_array[i];
575		TAILQ_INIT(&m->md.pv_list);
576		m->md.pv_list_count = 0;
577 	}
578
579	/*
580	 * Init the pv free list and the PTE free list.
581	 */
582	initial_pvs = vm_page_array_size;
583	if (initial_pvs < MINPV)
584		initial_pvs = MINPV;
585	if (initial_pvs > MAXPV)
586		initial_pvs = MAXPV;
587	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry),
588	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
589	uma_prealloc(pvzone, initial_pvs);
590
591	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
592	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
593	uma_prealloc(ptezone, initial_pvs);
594
595	/*
596	 * Now it is safe to enable pv_table recording.
597	 */
598	pmap_initialized = TRUE;
599}
600
601/*
602 * Initialize the address space (zone) for the pv_entries.  Set a
603 * high water mark so that the system can recover from excessive
604 * numbers of pv entries.
605 */
606void
607pmap_init2()
608{
609	int shpgperproc = PMAP_SHPGPERPROC;
610
611	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
612	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
613	pv_entry_high_water = 9 * (pv_entry_max / 10);
614}
615
616
617/***************************************************
618 * Manipulate TLBs for a pmap
619 ***************************************************/
620
621static void
622pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
623{
624	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
625		("invalidating TLB for non-current pmap"));
626	ia64_ptc_g(va, PAGE_SHIFT << 2);
627}
628
629static void
630pmap_invalidate_all_1(void *arg)
631{
632	u_int64_t addr;
633	int i, j;
634	register_t psr;
635
636	psr = intr_disable();
637	addr = pmap_ptc_e_base;
638	for (i = 0; i < pmap_ptc_e_count1; i++) {
639		for (j = 0; j < pmap_ptc_e_count2; j++) {
640			ia64_ptc_e(addr);
641			addr += pmap_ptc_e_stride2;
642		}
643		addr += pmap_ptc_e_stride1;
644	}
645	intr_restore(psr);
646}
647
648static void
649pmap_invalidate_all(pmap_t pmap)
650{
651	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
652		("invalidating TLB for non-current pmap"));
653
654
655#ifdef SMP
656	smp_rendezvous(0, pmap_invalidate_all_1, 0, 0);
657#else
658	pmap_invalidate_all_1(0);
659#endif
660}
661
662static u_int32_t
663pmap_allocate_rid(void)
664{
665	uint64_t bit, bits;
666	int rid;
667
668	mtx_lock(&pmap_ridmutex);
669	if (pmap_ridcount == pmap_ridmax)
670		panic("pmap_allocate_rid: All Region IDs used");
671
672	/* Find an index with a free bit. */
673	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
674		pmap_rididx++;
675		if (pmap_rididx == pmap_ridmapsz)
676			pmap_rididx = 0;
677	}
678	rid = pmap_rididx * 64;
679
680	/* Find a free bit. */
681	bit = 1UL;
682	while (bits & bit) {
683		rid++;
684		bit <<= 1;
685	}
686
687	pmap_ridmap[pmap_rididx] |= bit;
688	pmap_ridcount++;
689	mtx_unlock(&pmap_ridmutex);
690
691	return rid;
692}
693
694static void
695pmap_free_rid(u_int32_t rid)
696{
697	uint64_t bit;
698	int idx;
699
700	idx = rid / 64;
701	bit = ~(1UL << (rid & 63));
702
703	mtx_lock(&pmap_ridmutex);
704	pmap_ridmap[idx] &= bit;
705	pmap_ridcount--;
706	mtx_unlock(&pmap_ridmutex);
707}
708
709/***************************************************
710 * Low level helper routines.....
711 ***************************************************/
712
713/*
714 * Install a pte into the VHPT
715 */
716static PMAP_INLINE void
717pmap_install_pte(struct ia64_lpte *vhpte, struct ia64_lpte *pte)
718{
719	u_int64_t *vhp, *p;
720
721	/* invalidate the pte */
722	atomic_set_64(&vhpte->pte_tag, 1L << 63);
723	ia64_mf();			/* make sure everyone sees */
724
725	vhp = (u_int64_t *) vhpte;
726	p = (u_int64_t *) pte;
727
728	vhp[0] = p[0];
729	vhp[1] = p[1];
730	vhp[2] = p[2];			/* sets ti to one */
731
732	ia64_mf();
733}
734
735/*
736 * Compare essential parts of pte.
737 */
738static PMAP_INLINE int
739pmap_equal_pte(struct ia64_lpte *pte1, struct ia64_lpte *pte2)
740{
741	return *(u_int64_t *) pte1 == *(u_int64_t *) pte2;
742}
743
744/*
745 * this routine defines the region(s) of memory that should
746 * not be tested for the modified bit.
747 */
748static PMAP_INLINE int
749pmap_track_modified(vm_offset_t va)
750{
751	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
752		return 1;
753	else
754		return 0;
755}
756
757/*
758 * Create the KSTACK for a new thread.
759 * This routine directly affects the fork perf for a process/thread.
760 */
761void
762pmap_new_thread(struct thread *td, int pages)
763{
764
765	/* Bounds check */
766	if (pages <= 1)
767		pages = KSTACK_PAGES;
768	else if (pages > KSTACK_MAX_PAGES)
769		pages = KSTACK_MAX_PAGES;
770	td->td_kstack = (vm_offset_t)malloc(pages * PAGE_SIZE, M_PMAP,
771	    M_WAITOK);
772	td->td_kstack_pages = pages;
773}
774
775/*
776 * Dispose the KSTACK for a thread that has exited.
777 * This routine directly impacts the exit perf of a process/thread.
778 */
779void
780pmap_dispose_thread(struct thread *td)
781{
782
783	free((void*)td->td_kstack, M_PMAP);
784	td->td_kstack = 0;
785	td->td_kstack_pages = 0;
786}
787
788/*
789 * Set up a variable sized alternate kstack.  This appears to be MI.
790 */
791void
792pmap_new_altkstack(struct thread *td, int pages)
793{
794
795	td->td_altkstack = td->td_kstack;
796	td->td_altkstack_obj = td->td_kstack_obj;
797	td->td_altkstack_pages = td->td_kstack_pages;
798	pmap_new_thread(td, pages);
799}
800
801void
802pmap_dispose_altkstack(struct thread *td)
803{
804
805	pmap_dispose_thread(td);
806	td->td_kstack = td->td_altkstack;
807	td->td_kstack_obj = td->td_altkstack_obj;
808	td->td_kstack_pages = td->td_altkstack_pages;
809	td->td_altkstack = 0;
810	td->td_altkstack_obj = NULL;
811	td->td_altkstack_pages = 0;
812}
813
814/*
815 * Allow the KSTACK for a thread to be prejudicially paged out.
816 */
817void
818pmap_swapout_thread(struct thread *td)
819{
820}
821
822/*
823 * Bring the KSTACK for a specified thread back in.
824 */
825void
826pmap_swapin_thread(struct thread *td)
827{
828}
829
830/***************************************************
831 * Page table page management routines.....
832 ***************************************************/
833
834void
835pmap_pinit0(struct pmap *pmap)
836{
837	/* kernel_pmap is the same as any other pmap. */
838	pmap_pinit(pmap);
839}
840
841/*
842 * Initialize a preallocated and zeroed pmap structure,
843 * such as one in a vmspace structure.
844 */
845void
846pmap_pinit(struct pmap *pmap)
847{
848	int i;
849
850	pmap->pm_flags = 0;
851	for (i = 0; i < 5; i++)
852		pmap->pm_rid[i] = 0;
853	pmap->pm_ptphint = NULL;
854	pmap->pm_active = 0;
855	TAILQ_INIT(&pmap->pm_pvlist);
856	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
857}
858
859/*
860 * Wire in kernel global address entries.  To avoid a race condition
861 * between pmap initialization and pmap_growkernel, this procedure
862 * should be called after the vmspace is attached to the process
863 * but before this pmap is activated.
864 */
865void
866pmap_pinit2(struct pmap *pmap)
867{
868	int i;
869
870	for (i = 0; i < 5; i++)
871		pmap->pm_rid[i] = pmap_allocate_rid();
872}
873
874/***************************************************
875 * Pmap allocation/deallocation routines.
876 ***************************************************/
877
878/*
879 * Release any resources held by the given physical map.
880 * Called when a pmap initialized by pmap_pinit is being released.
881 * Should only be called if the map contains no valid mappings.
882 */
883void
884pmap_release(pmap_t pmap)
885{
886	int i;
887
888	for (i = 0; i < 5; i++)
889		if (pmap->pm_rid[i])
890			pmap_free_rid(pmap->pm_rid[i]);
891}
892
893/*
894 * grow the number of kernel page table entries, if needed
895 */
896void
897pmap_growkernel(vm_offset_t addr)
898{
899	struct ia64_lpte *ptepage;
900	vm_page_t nkpg;
901
902	if (kernel_vm_end >= addr)
903		return;
904
905	critical_enter();
906
907	while (kernel_vm_end < addr) {
908		/* We could handle more by increasing the size of kptdir. */
909		if (nkpt == MAXKPT)
910			panic("pmap_growkernel: out of kernel address space");
911
912		nkpg = vm_page_alloc(NULL, nkpt,
913		    VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
914		if (!nkpg)
915			panic("pmap_growkernel: no memory to grow kernel");
916
917		ptepage = (struct ia64_lpte *)
918		    IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
919		bzero(ptepage, PAGE_SIZE);
920		ia64_kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage;
921
922		nkpt++;
923		kernel_vm_end += PAGE_SIZE * NKPTEPG;
924	}
925
926	critical_exit();
927}
928
929/***************************************************
930 * page management routines.
931 ***************************************************/
932
933/*
934 * free the pv_entry back to the free list
935 */
936static PMAP_INLINE void
937free_pv_entry(pv_entry_t pv)
938{
939	pv_entry_count--;
940	uma_zfree(pvzone, pv);
941}
942
943/*
944 * get a new pv_entry, allocating a block from the system
945 * when needed.
946 * the memory allocation is performed bypassing the malloc code
947 * because of the possibility of allocations at interrupt time.
948 */
949static pv_entry_t
950get_pv_entry(void)
951{
952	pv_entry_count++;
953	if (pv_entry_high_water &&
954		(pv_entry_count > pv_entry_high_water) &&
955		(pmap_pagedaemon_waken == 0)) {
956		pmap_pagedaemon_waken = 1;
957		wakeup (&vm_pages_needed);
958	}
959	return uma_zalloc(pvzone, M_NOWAIT);
960}
961
962/*
963 * Add an ia64_lpte to the VHPT.
964 */
965static void
966pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
967{
968	struct ia64_lpte *vhpte;
969
970	pmap_vhpt_inserts++;
971	pmap_vhpt_resident++;
972
973	vhpte = (struct ia64_lpte *) ia64_thash(va);
974
975	if (vhpte->pte_chain)
976		pmap_vhpt_collisions++;
977
978	pte->pte_chain = vhpte->pte_chain;
979	vhpte->pte_chain = ia64_tpa((vm_offset_t) pte);
980
981	if (!vhpte->pte_p && pte->pte_p)
982		pmap_install_pte(vhpte, pte);
983	else
984		ia64_mf();
985}
986
987/*
988 * Update VHPT after a pte has changed.
989 */
990static void
991pmap_update_vhpt(struct ia64_lpte *pte, vm_offset_t va)
992{
993	struct ia64_lpte *vhpte;
994
995	vhpte = (struct ia64_lpte *) ia64_thash(va);
996
997	if ((!vhpte->pte_p || vhpte->pte_tag == pte->pte_tag)
998	    && pte->pte_p)
999		pmap_install_pte(vhpte, pte);
1000}
1001
1002/*
1003 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
1004 * worked or an appropriate error code otherwise.
1005 */
1006static int
1007pmap_remove_vhpt(vm_offset_t va)
1008{
1009	struct ia64_lpte *pte;
1010	struct ia64_lpte *lpte;
1011	struct ia64_lpte *vhpte;
1012	u_int64_t tag;
1013	int error = ENOENT;
1014
1015	vhpte = (struct ia64_lpte *) ia64_thash(va);
1016
1017	/*
1018	 * If the VHPTE is invalid, there can't be a collision chain.
1019	 */
1020	if (!vhpte->pte_p) {
1021		KASSERT(!vhpte->pte_chain, ("bad vhpte"));
1022		printf("can't remove vhpt entry for 0x%lx\n", va);
1023		goto done;
1024	}
1025
1026	lpte = vhpte;
1027	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(vhpte->pte_chain);
1028	tag = ia64_ttag(va);
1029
1030	while (pte->pte_tag != tag) {
1031		lpte = pte;
1032		if (pte->pte_chain)
1033			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1034		else {
1035			printf("can't remove vhpt entry for 0x%lx\n", va);
1036			goto done;
1037		}
1038	}
1039
1040	/*
1041	 * Snip this pv_entry out of the collision chain.
1042	 */
1043	lpte->pte_chain = pte->pte_chain;
1044
1045	/*
1046	 * If the VHPTE matches as well, change it to map the first
1047	 * element from the chain if there is one.
1048	 */
1049	if (vhpte->pte_tag == tag) {
1050		if (vhpte->pte_chain) {
1051			pte = (struct ia64_lpte *)
1052				IA64_PHYS_TO_RR7(vhpte->pte_chain);
1053			pmap_install_pte(vhpte, pte);
1054		} else {
1055			vhpte->pte_p = 0;
1056			ia64_mf();
1057		}
1058	}
1059
1060	pmap_vhpt_resident--;
1061	error = 0;
1062 done:
1063	return error;
1064}
1065
1066/*
1067 * Find the ia64_lpte for the given va, if any.
1068 */
1069static struct ia64_lpte *
1070pmap_find_vhpt(vm_offset_t va)
1071{
1072	struct ia64_lpte *pte;
1073	u_int64_t tag;
1074
1075	pte = (struct ia64_lpte *) ia64_thash(va);
1076	if (!pte->pte_chain) {
1077		pte = 0;
1078		goto done;
1079	}
1080
1081	tag = ia64_ttag(va);
1082	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1083
1084	while (pte->pte_tag != tag) {
1085		if (pte->pte_chain) {
1086			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1087		} else {
1088			pte = 0;
1089			break;
1090		}
1091	}
1092
1093 done:
1094	return pte;
1095}
1096
1097/*
1098 * Remove an entry from the list of managed mappings.
1099 */
1100static int
1101pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1102{
1103	if (!pv) {
1104		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1105			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1106				if (pmap == pv->pv_pmap && va == pv->pv_va)
1107					break;
1108			}
1109		} else {
1110			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1111				if (va == pv->pv_va)
1112					break;
1113			}
1114		}
1115	}
1116
1117	if (pv) {
1118		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1119		m->md.pv_list_count--;
1120		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1121			vm_page_flag_clear(m, PG_WRITEABLE);
1122
1123		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1124		free_pv_entry(pv);
1125		return 0;
1126	} else {
1127		return ENOENT;
1128	}
1129}
1130
1131/*
1132 * Create a pv entry for page at pa for
1133 * (pmap, va).
1134 */
1135static void
1136pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1137{
1138	pv_entry_t pv;
1139
1140	pv = get_pv_entry();
1141	pv->pv_pmap = pmap;
1142	pv->pv_va = va;
1143
1144	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1145	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1146	m->md.pv_list_count++;
1147}
1148
1149/*
1150 *	Routine:	pmap_extract
1151 *	Function:
1152 *		Extract the physical page address associated
1153 *		with the given map/virtual_address pair.
1154 */
1155vm_offset_t
1156pmap_extract(pmap, va)
1157	register pmap_t pmap;
1158	vm_offset_t va;
1159{
1160	struct ia64_lpte *pte;
1161	pmap_t oldpmap;
1162
1163	if (!pmap)
1164		return 0;
1165
1166	oldpmap = pmap_install(pmap);
1167	pte = pmap_find_vhpt(va);
1168	pmap_install(oldpmap);
1169
1170	if (!pte)
1171		return 0;
1172
1173	return pmap_pte_pa(pte);
1174}
1175
1176/***************************************************
1177 * Low level mapping routines.....
1178 ***************************************************/
1179
1180/*
1181 * Find the kernel lpte for mapping the given virtual address, which
1182 * must be in the part of region 5 which we can cover with our kernel
1183 * 'page tables'.
1184 */
1185static struct ia64_lpte *
1186pmap_find_kpte(vm_offset_t va)
1187{
1188	KASSERT((va >> 61) == 5,
1189		("kernel mapping 0x%lx not in region 5", va));
1190	KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG),
1191		("kernel mapping 0x%lx out of range", va));
1192	return (&ia64_kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)]);
1193}
1194
1195/*
1196 * Find a pte suitable for mapping a user-space address. If one exists
1197 * in the VHPT, that one will be returned, otherwise a new pte is
1198 * allocated.
1199 */
1200static struct ia64_lpte *
1201pmap_find_pte(vm_offset_t va)
1202{
1203	struct ia64_lpte *pte;
1204
1205	if (va >= VM_MAXUSER_ADDRESS)
1206		return pmap_find_kpte(va);
1207
1208	pte = pmap_find_vhpt(va);
1209	if (!pte) {
1210		pte = uma_zalloc(ptezone, M_WAITOK);
1211		pte->pte_p = 0;
1212	}
1213	return pte;
1214}
1215
1216/*
1217 * Free a pte which is now unused. This simply returns it to the zone
1218 * allocator if it is a user mapping. For kernel mappings, clear the
1219 * valid bit to make it clear that the mapping is not currently used.
1220 */
1221static void
1222pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1223{
1224	if (va < VM_MAXUSER_ADDRESS)
1225		uma_zfree(ptezone, pte);
1226	else
1227		pte->pte_p = 0;
1228}
1229
1230/*
1231 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1232 * the pte was orginally valid, then its assumed to already be in the
1233 * VHPT.
1234 */
1235static void
1236pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1237	     int ig, int pl, int ar)
1238{
1239	int wasvalid = pte->pte_p;
1240
1241	pte->pte_p = 1;
1242	pte->pte_ma = PTE_MA_WB;
1243	if (ig & PTE_IG_MANAGED) {
1244		pte->pte_a = 0;
1245		pte->pte_d = 0;
1246	} else {
1247		pte->pte_a = 1;
1248		pte->pte_d = 1;
1249	}
1250	pte->pte_pl = pl;
1251	pte->pte_ar = ar;
1252	pte->pte_ppn = pa >> 12;
1253	pte->pte_ed = 0;
1254	pte->pte_ig = ig;
1255
1256	pte->pte_ps = PAGE_SHIFT;
1257	pte->pte_key = 0;
1258
1259	pte->pte_tag = ia64_ttag(va);
1260
1261	if (wasvalid) {
1262		pmap_update_vhpt(pte, va);
1263	} else {
1264		pmap_enter_vhpt(pte, va);
1265	}
1266}
1267
1268/*
1269 * If a pte contains a valid mapping, clear it and update the VHPT.
1270 */
1271static void
1272pmap_clear_pte(struct ia64_lpte *pte, vm_offset_t va)
1273{
1274	if (pte->pte_p) {
1275		pmap_remove_vhpt(va);
1276		ia64_ptc_g(va, PAGE_SHIFT << 2);
1277		pte->pte_p = 0;
1278	}
1279}
1280
1281/*
1282 * Remove the (possibly managed) mapping represented by pte from the
1283 * given pmap.
1284 */
1285static int
1286pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1287		pv_entry_t pv, int freepte)
1288{
1289	int error;
1290	vm_page_t m;
1291
1292	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1293		("removing pte for non-current pmap"));
1294
1295	/*
1296	 * First remove from the VHPT.
1297	 */
1298	error = pmap_remove_vhpt(va);
1299	if (error)
1300		return error;
1301
1302	/*
1303	 * Make sure pmap_set_pte() knows it isn't in the VHPT.
1304	 */
1305	pte->pte_p = 0;
1306
1307	if (pte->pte_ig & PTE_IG_WIRED)
1308		pmap->pm_stats.wired_count -= 1;
1309
1310	pmap->pm_stats.resident_count -= 1;
1311	if (pte->pte_ig & PTE_IG_MANAGED) {
1312		m = PHYS_TO_VM_PAGE(pmap_pte_pa(pte));
1313		if (pte->pte_d)
1314			if (pmap_track_modified(va))
1315				vm_page_dirty(m);
1316		if (pte->pte_a)
1317			vm_page_flag_set(m, PG_REFERENCED);
1318
1319		if (freepte)
1320			pmap_free_pte(pte, va);
1321		return pmap_remove_entry(pmap, m, va, pv);
1322	} else {
1323		if (freepte)
1324			pmap_free_pte(pte, va);
1325		return 0;
1326	}
1327}
1328
1329/*
1330 * Extract the physical page address associated with a kernel
1331 * virtual address.
1332 */
1333vm_paddr_t
1334pmap_kextract(vm_offset_t va)
1335{
1336	struct ia64_lpte *pte;
1337	vm_offset_t gwpage;
1338
1339	KASSERT(va >= IA64_RR_BASE(5), ("Must be kernel VA"));
1340
1341	/* Regions 6 and 7 are direct mapped. */
1342	if (va >= IA64_RR_BASE(6))
1343		return (IA64_RR_MASK(va));
1344
1345	/* EPC gateway page? */
1346	gwpage = (vm_offset_t)ia64_get_k5();
1347	if (va >= gwpage && va < gwpage + VM_GATEWAY_SIZE)
1348		return (IA64_RR_MASK((vm_offset_t)ia64_gateway_page));
1349
1350	/* Bail out if the virtual address is beyond our limits. */
1351	if (IA64_RR_MASK(va) >= nkpt * PAGE_SIZE * NKPTEPG)
1352		return (0);
1353
1354	pte = pmap_find_kpte(va);
1355	if (!pte->pte_p)
1356		return (0);
1357	return ((pte->pte_ppn << 12) | (va & PAGE_MASK));
1358}
1359
1360/*
1361 * Add a list of wired pages to the kva
1362 * this routine is only used for temporary
1363 * kernel mappings that do not need to have
1364 * page modification or references recorded.
1365 * Note that old mappings are simply written
1366 * over.  The page *must* be wired.
1367 */
1368void
1369pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1370{
1371	int i;
1372	struct ia64_lpte *pte;
1373
1374	for (i = 0; i < count; i++) {
1375		vm_offset_t tva = va + i * PAGE_SIZE;
1376		int wasvalid;
1377		pte = pmap_find_kpte(tva);
1378		wasvalid = pte->pte_p;
1379		pmap_set_pte(pte, tva, VM_PAGE_TO_PHYS(m[i]),
1380			     0, PTE_PL_KERN, PTE_AR_RWX);
1381		if (wasvalid)
1382			ia64_ptc_g(tva, PAGE_SHIFT << 2);
1383	}
1384}
1385
1386/*
1387 * this routine jerks page mappings from the
1388 * kernel -- it is meant only for temporary mappings.
1389 */
1390void
1391pmap_qremove(vm_offset_t va, int count)
1392{
1393	int i;
1394	struct ia64_lpte *pte;
1395
1396	for (i = 0; i < count; i++) {
1397		pte = pmap_find_kpte(va);
1398		pmap_clear_pte(pte, va);
1399		va += PAGE_SIZE;
1400	}
1401}
1402
1403/*
1404 * Add a wired page to the kva.
1405 */
1406void
1407pmap_kenter(vm_offset_t va, vm_offset_t pa)
1408{
1409	struct ia64_lpte *pte;
1410	int wasvalid;
1411
1412	pte = pmap_find_kpte(va);
1413	wasvalid = pte->pte_p;
1414	pmap_set_pte(pte, va, pa, 0, PTE_PL_KERN, PTE_AR_RWX);
1415	if (wasvalid)
1416		ia64_ptc_g(va, PAGE_SHIFT << 2);
1417}
1418
1419/*
1420 * Remove a page from the kva
1421 */
1422void
1423pmap_kremove(vm_offset_t va)
1424{
1425	struct ia64_lpte *pte;
1426
1427	pte = pmap_find_kpte(va);
1428	pmap_clear_pte(pte, va);
1429}
1430
1431/*
1432 *	Used to map a range of physical addresses into kernel
1433 *	virtual address space.
1434 *
1435 *	The value passed in '*virt' is a suggested virtual address for
1436 *	the mapping. Architectures which can support a direct-mapped
1437 *	physical to virtual region can return the appropriate address
1438 *	within that region, leaving '*virt' unchanged. Other
1439 *	architectures should map the pages starting at '*virt' and
1440 *	update '*virt' with the first usable address after the mapped
1441 *	region.
1442 */
1443vm_offset_t
1444pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1445{
1446	return IA64_PHYS_TO_RR7(start);
1447}
1448
1449/*
1450 * Remove a single page from a process address space
1451 */
1452static void
1453pmap_remove_page(pmap_t pmap, vm_offset_t va)
1454{
1455	struct ia64_lpte *pte;
1456
1457	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1458		("removing page for non-current pmap"));
1459
1460	pte = pmap_find_vhpt(va);
1461	if (pte) {
1462		pmap_remove_pte(pmap, pte, va, 0, 1);
1463		pmap_invalidate_page(pmap, va);
1464	}
1465	return;
1466}
1467
1468/*
1469 *	Remove the given range of addresses from the specified map.
1470 *
1471 *	It is assumed that the start and end are properly
1472 *	rounded to the page size.
1473 */
1474void
1475pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1476{
1477	pmap_t oldpmap;
1478	vm_offset_t va;
1479	pv_entry_t pv;
1480	struct ia64_lpte *pte;
1481
1482	if (pmap == NULL)
1483		return;
1484
1485	if (pmap->pm_stats.resident_count == 0)
1486		return;
1487
1488	oldpmap = pmap_install(pmap);
1489
1490	/*
1491	 * special handling of removing one page.  a very
1492	 * common operation and easy to short circuit some
1493	 * code.
1494	 */
1495	if (sva + PAGE_SIZE == eva) {
1496		pmap_remove_page(pmap, sva);
1497		pmap_install(oldpmap);
1498		return;
1499	}
1500
1501	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1502		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1503			va = pv->pv_va;
1504			if (va >= sva && va < eva) {
1505				pte = pmap_find_vhpt(va);
1506				pmap_remove_pte(pmap, pte, va, pv, 1);
1507				pmap_invalidate_page(pmap, va);
1508			}
1509		}
1510
1511	} else {
1512		for (va = sva; va < eva; va = va += PAGE_SIZE) {
1513			pte = pmap_find_vhpt(va);
1514			if (pte) {
1515				pmap_remove_pte(pmap, pte, va, 0, 1);
1516				pmap_invalidate_page(pmap, va);
1517			}
1518		}
1519	}
1520
1521	pmap_install(oldpmap);
1522}
1523
1524/*
1525 *	Routine:	pmap_remove_all
1526 *	Function:
1527 *		Removes this physical page from
1528 *		all physical maps in which it resides.
1529 *		Reflects back modify bits to the pager.
1530 *
1531 *	Notes:
1532 *		Original versions of this routine were very
1533 *		inefficient because they iteratively called
1534 *		pmap_remove (slow...)
1535 */
1536
1537void
1538pmap_remove_all(vm_page_t m)
1539{
1540	pmap_t oldpmap;
1541	pv_entry_t pv;
1542	int s;
1543
1544#if defined(PMAP_DIAGNOSTIC)
1545	/*
1546	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1547	 * pages!
1548	 */
1549	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1550		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1551	}
1552#endif
1553
1554	s = splvm();
1555
1556	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1557		struct ia64_lpte *pte;
1558		pmap_t pmap = pv->pv_pmap;
1559		vm_offset_t va = pv->pv_va;
1560
1561		oldpmap = pmap_install(pmap);
1562		pte = pmap_find_vhpt(va);
1563		if (pmap_pte_pa(pte) != VM_PAGE_TO_PHYS(m))
1564			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1565		pmap_remove_pte(pmap, pte, va, pv, 1);
1566		pmap_invalidate_page(pmap, va);
1567		pmap_install(oldpmap);
1568	}
1569
1570	vm_page_flag_clear(m, PG_WRITEABLE);
1571
1572	splx(s);
1573	return;
1574}
1575
1576/*
1577 *	Set the physical protection on the
1578 *	specified range of this map as requested.
1579 */
1580void
1581pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1582{
1583	pmap_t oldpmap;
1584	struct ia64_lpte *pte;
1585	int newprot;
1586
1587	if (pmap == NULL)
1588		return;
1589
1590	oldpmap = pmap_install(pmap);
1591
1592	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1593		pmap_remove(pmap, sva, eva);
1594		pmap_install(oldpmap);
1595		return;
1596	}
1597
1598	if (prot & VM_PROT_WRITE) {
1599		pmap_install(oldpmap);
1600		return;
1601	}
1602
1603	newprot = pte_prot(pmap, prot);
1604
1605	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1606		panic("pmap_protect: unaligned addresses");
1607
1608	while (sva < eva) {
1609		/*
1610		 * If page is invalid, skip this page
1611		 */
1612		pte = pmap_find_vhpt(sva);
1613		if (!pte) {
1614			sva += PAGE_SIZE;
1615			continue;
1616		}
1617
1618		if (pmap_pte_prot(pte) != newprot) {
1619			if (pte->pte_ig & PTE_IG_MANAGED) {
1620				vm_offset_t pa = pmap_pte_pa(pte);
1621				vm_page_t m = PHYS_TO_VM_PAGE(pa);
1622				if (pte->pte_d) {
1623					if (pmap_track_modified(sva))
1624						vm_page_dirty(m);
1625					pte->pte_d = 0;
1626				}
1627				if (pte->pte_a) {
1628					vm_page_flag_set(m, PG_REFERENCED);
1629					pte->pte_a = 0;
1630				}
1631			}
1632			pmap_pte_set_prot(pte, newprot);
1633			pmap_update_vhpt(pte, sva);
1634			pmap_invalidate_page(pmap, sva);
1635		}
1636
1637		sva += PAGE_SIZE;
1638	}
1639	pmap_install(oldpmap);
1640}
1641
1642/*
1643 *	Insert the given physical page (p) at
1644 *	the specified virtual address (v) in the
1645 *	target physical map with the protection requested.
1646 *
1647 *	If specified, the page will be wired down, meaning
1648 *	that the related pte can not be reclaimed.
1649 *
1650 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1651 *	or lose information.  That is, this routine must actually
1652 *	insert this page into the given map NOW.
1653 */
1654void
1655pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1656	   boolean_t wired)
1657{
1658	pmap_t oldpmap;
1659	vm_offset_t pa;
1660	vm_offset_t opa;
1661	struct ia64_lpte origpte;
1662	struct ia64_lpte *pte;
1663	int managed;
1664
1665	if (pmap == NULL)
1666		return;
1667
1668	oldpmap = pmap_install(pmap);
1669
1670	va &= ~PAGE_MASK;
1671#ifdef PMAP_DIAGNOSTIC
1672	if (va > VM_MAX_KERNEL_ADDRESS)
1673		panic("pmap_enter: toobig");
1674#endif
1675
1676	/*
1677	 * Find (or create) a pte for the given mapping.
1678	 */
1679	pte = pmap_find_pte(va);
1680	origpte = *pte;
1681
1682	if (origpte.pte_p)
1683		opa = pmap_pte_pa(&origpte);
1684	else
1685		opa = 0;
1686	managed = 0;
1687
1688	pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK;
1689
1690	/*
1691	 * Mapping has not changed, must be protection or wiring change.
1692	 */
1693	if (origpte.pte_p && (opa == pa)) {
1694		/*
1695		 * Wiring change, just update stats. We don't worry about
1696		 * wiring PT pages as they remain resident as long as there
1697		 * are valid mappings in them. Hence, if a user page is wired,
1698		 * the PT page will be also.
1699		 */
1700		if (wired && ((origpte.pte_ig & PTE_IG_WIRED) == 0))
1701			pmap->pm_stats.wired_count++;
1702		else if (!wired && (origpte.pte_ig & PTE_IG_WIRED))
1703			pmap->pm_stats.wired_count--;
1704
1705		/*
1706		 * We might be turning off write access to the page,
1707		 * so we go ahead and sense modify status.
1708		 */
1709		if (origpte.pte_ig & PTE_IG_MANAGED) {
1710			if (origpte.pte_d && pmap_track_modified(va)) {
1711				vm_page_t om;
1712				om = PHYS_TO_VM_PAGE(opa);
1713				vm_page_dirty(om);
1714			}
1715		}
1716
1717		managed = origpte.pte_ig & PTE_IG_MANAGED;
1718		goto validate;
1719	}
1720	/*
1721	 * Mapping has changed, invalidate old range and fall
1722	 * through to handle validating new mapping.
1723	 */
1724	if (opa) {
1725		int error;
1726		vm_page_lock_queues();
1727		error = pmap_remove_pte(pmap, pte, va, 0, 0);
1728		vm_page_unlock_queues();
1729		if (error)
1730			panic("pmap_enter: pte vanished, va: 0x%lx", va);
1731	}
1732
1733	/*
1734	 * Enter on the PV list if part of our managed memory.
1735	 */
1736	if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
1737		pmap_insert_entry(pmap, va, m);
1738		managed |= PTE_IG_MANAGED;
1739	}
1740
1741	/*
1742	 * Increment counters
1743	 */
1744	pmap->pm_stats.resident_count++;
1745	if (wired)
1746		pmap->pm_stats.wired_count++;
1747
1748validate:
1749
1750	/*
1751	 * Now validate mapping with desired protection/wiring. This
1752	 * adds the pte to the VHPT if necessary.
1753	 */
1754	pmap_set_pte(pte, va, pa, managed | (wired ? PTE_IG_WIRED : 0),
1755		     pte_prot_pl(pmap, prot), pte_prot_ar(pmap, prot));
1756
1757	/*
1758	 * if the mapping or permission bits are different, we need
1759	 * to invalidate the page.
1760	 */
1761	if (!pmap_equal_pte(&origpte, pte))
1762		pmap_invalidate_page(pmap, va);
1763
1764	pmap_install(oldpmap);
1765}
1766
1767/*
1768 * this code makes some *MAJOR* assumptions:
1769 * 1. Current pmap & pmap exists.
1770 * 2. Not wired.
1771 * 3. Read access.
1772 * 4. No page table pages.
1773 * 5. Tlbflush is deferred to calling procedure.
1774 * 6. Page IS managed.
1775 * but is *MUCH* faster than pmap_enter...
1776 */
1777
1778static void
1779pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
1780{
1781	struct ia64_lpte *pte;
1782	pmap_t oldpmap;
1783
1784	oldpmap = pmap_install(pmap);
1785
1786	pte = pmap_find_pte(va);
1787	if (pte->pte_p)
1788		return;
1789
1790	/*
1791	 * Enter on the PV list since its part of our managed memory.
1792	 */
1793	pmap_insert_entry(pmap, va, m);
1794
1795	/*
1796	 * Increment counters
1797	 */
1798	pmap->pm_stats.resident_count++;
1799
1800	/*
1801	 * Initialise PTE with read-only protection and enter into VHPT.
1802	 */
1803	pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m),
1804		     PTE_IG_MANAGED,
1805		     PTE_PL_USER, PTE_AR_R);
1806
1807	pmap_install(oldpmap);
1808}
1809
1810/*
1811 * Make temporary mapping for a physical address. This is called
1812 * during dump.
1813 */
1814void *
1815pmap_kenter_temporary(vm_offset_t pa, int i)
1816{
1817	return (void *) IA64_PHYS_TO_RR7(pa - (i * PAGE_SIZE));
1818}
1819
1820#define MAX_INIT_PT (96)
1821/*
1822 * pmap_object_init_pt preloads the ptes for a given object
1823 * into the specified pmap.  This eliminates the blast of soft
1824 * faults on process startup and immediately after an mmap.
1825 */
1826void
1827pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1828		    vm_object_t object, vm_pindex_t pindex,
1829		    vm_size_t size, int limit)
1830{
1831	pmap_t oldpmap;
1832	vm_offset_t tmpidx;
1833	int psize;
1834	vm_page_t p;
1835	int objpgs;
1836
1837	if (pmap == NULL || object == NULL)
1838		return;
1839
1840	oldpmap = pmap_install(pmap);
1841
1842	psize = ia64_btop(size);
1843
1844	if ((object->type != OBJT_VNODE) ||
1845		((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
1846			(object->resident_page_count > MAX_INIT_PT))) {
1847		pmap_install(oldpmap);
1848		return;
1849	}
1850
1851	if (psize + pindex > object->size) {
1852		if (object->size < pindex)
1853			return;
1854		psize = object->size - pindex;
1855	}
1856
1857	/*
1858	 * if we are processing a major portion of the object, then scan the
1859	 * entire thing.
1860	 */
1861	if (psize > (object->resident_page_count >> 2)) {
1862		objpgs = psize;
1863
1864		for (p = TAILQ_FIRST(&object->memq);
1865		    ((objpgs > 0) && (p != NULL));
1866		    p = TAILQ_NEXT(p, listq)) {
1867
1868			tmpidx = p->pindex;
1869			if (tmpidx < pindex) {
1870				continue;
1871			}
1872			tmpidx -= pindex;
1873			if (tmpidx >= psize) {
1874				continue;
1875			}
1876			/*
1877			 * don't allow an madvise to blow away our really
1878			 * free pages allocating pv entries.
1879			 */
1880			if ((limit & MAP_PREFAULT_MADVISE) &&
1881			    cnt.v_free_count < cnt.v_free_reserved) {
1882				break;
1883			}
1884			vm_page_lock_queues();
1885			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1886				(p->busy == 0) &&
1887			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1888				if ((p->queue - p->pc) == PQ_CACHE)
1889					vm_page_deactivate(p);
1890				vm_page_busy(p);
1891				vm_page_unlock_queues();
1892				pmap_enter_quick(pmap,
1893						 addr + ia64_ptob(tmpidx), p);
1894				vm_page_lock_queues();
1895				vm_page_wakeup(p);
1896			}
1897			vm_page_unlock_queues();
1898			objpgs -= 1;
1899		}
1900	} else {
1901		/*
1902		 * else lookup the pages one-by-one.
1903		 */
1904		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
1905			/*
1906			 * don't allow an madvise to blow away our really
1907			 * free pages allocating pv entries.
1908			 */
1909			if ((limit & MAP_PREFAULT_MADVISE) &&
1910			    cnt.v_free_count < cnt.v_free_reserved) {
1911				break;
1912			}
1913			p = vm_page_lookup(object, tmpidx + pindex);
1914			if (p == NULL)
1915				continue;
1916			vm_page_lock_queues();
1917			if ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL &&
1918				(p->busy == 0) &&
1919			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1920				if ((p->queue - p->pc) == PQ_CACHE)
1921					vm_page_deactivate(p);
1922				vm_page_busy(p);
1923				vm_page_unlock_queues();
1924				pmap_enter_quick(pmap,
1925						 addr + ia64_ptob(tmpidx), p);
1926				vm_page_lock_queues();
1927				vm_page_wakeup(p);
1928			}
1929			vm_page_unlock_queues();
1930		}
1931	}
1932	pmap_install(oldpmap);
1933	return;
1934}
1935
1936/*
1937 * pmap_prefault provides a quick way of clustering
1938 * pagefaults into a processes address space.  It is a "cousin"
1939 * of pmap_object_init_pt, except it runs at page fault time instead
1940 * of mmap time.
1941 */
1942#define PFBAK 4
1943#define PFFOR 4
1944#define PAGEORDER_SIZE (PFBAK+PFFOR)
1945
1946static int pmap_prefault_pageorder[] = {
1947	-1 * PAGE_SIZE, 1 * PAGE_SIZE,
1948	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
1949	-3 * PAGE_SIZE, 3 * PAGE_SIZE,
1950	-4 * PAGE_SIZE, 4 * PAGE_SIZE
1951};
1952
1953void
1954pmap_prefault(pmap, addra, entry)
1955	pmap_t pmap;
1956	vm_offset_t addra;
1957	vm_map_entry_t entry;
1958{
1959	int i;
1960	vm_offset_t starta;
1961	vm_offset_t addr;
1962	vm_pindex_t pindex;
1963	vm_page_t m, mpte;
1964	vm_object_t object;
1965
1966	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)))
1967		return;
1968
1969	object = entry->object.vm_object;
1970
1971	starta = addra - PFBAK * PAGE_SIZE;
1972	if (starta < entry->start) {
1973		starta = entry->start;
1974	} else if (starta > addra) {
1975		starta = 0;
1976	}
1977
1978	mpte = NULL;
1979	for (i = 0; i < PAGEORDER_SIZE; i++) {
1980		vm_object_t lobject;
1981		struct ia64_lpte *pte;
1982
1983		addr = addra + pmap_prefault_pageorder[i];
1984		if (addr > addra + (PFFOR * PAGE_SIZE))
1985			addr = 0;
1986
1987		if (addr < starta || addr >= entry->end)
1988			continue;
1989
1990		pte = pmap_find_vhpt(addr);
1991		if (pte && pte->pte_p)
1992			continue;
1993
1994		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
1995		lobject = object;
1996		for (m = vm_page_lookup(lobject, pindex);
1997		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
1998		    lobject = lobject->backing_object) {
1999			if (lobject->backing_object_offset & PAGE_MASK)
2000				break;
2001			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
2002			m = vm_page_lookup(lobject->backing_object, pindex);
2003		}
2004
2005		/*
2006		 * give-up when a page is not in memory
2007		 */
2008		if (m == NULL)
2009			break;
2010		vm_page_lock_queues();
2011		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2012			(m->busy == 0) &&
2013		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2014
2015			if ((m->queue - m->pc) == PQ_CACHE) {
2016				vm_page_deactivate(m);
2017			}
2018			vm_page_busy(m);
2019			vm_page_unlock_queues();
2020			pmap_enter_quick(pmap, addr, m);
2021			vm_page_lock_queues();
2022			vm_page_wakeup(m);
2023		}
2024		vm_page_unlock_queues();
2025	}
2026}
2027
2028/*
2029 *	Routine:	pmap_change_wiring
2030 *	Function:	Change the wiring attribute for a map/virtual-address
2031 *			pair.
2032 *	In/out conditions:
2033 *			The mapping must already exist in the pmap.
2034 */
2035void
2036pmap_change_wiring(pmap, va, wired)
2037	register pmap_t pmap;
2038	vm_offset_t va;
2039	boolean_t wired;
2040{
2041	pmap_t oldpmap;
2042	struct ia64_lpte *pte;
2043
2044	if (pmap == NULL)
2045		return;
2046
2047	oldpmap = pmap_install(pmap);
2048
2049	pte = pmap_find_vhpt(va);
2050
2051	if (wired && !pmap_pte_w(pte))
2052		pmap->pm_stats.wired_count++;
2053	else if (!wired && pmap_pte_w(pte))
2054		pmap->pm_stats.wired_count--;
2055
2056	/*
2057	 * Wiring is not a hardware characteristic so there is no need to
2058	 * invalidate TLB.
2059	 */
2060	pmap_pte_set_w(pte, wired);
2061
2062	pmap_install(oldpmap);
2063}
2064
2065
2066
2067/*
2068 *	Copy the range specified by src_addr/len
2069 *	from the source map to the range dst_addr/len
2070 *	in the destination map.
2071 *
2072 *	This routine is only advisory and need not do anything.
2073 */
2074
2075void
2076pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2077	  vm_offset_t src_addr)
2078{
2079}
2080
2081
2082/*
2083 *	pmap_zero_page zeros the specified hardware page by
2084 *	mapping it into virtual memory and using bzero to clear
2085 *	its contents.
2086 */
2087
2088void
2089pmap_zero_page(vm_page_t m)
2090{
2091	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2092	bzero((caddr_t) va, PAGE_SIZE);
2093}
2094
2095
2096/*
2097 *	pmap_zero_page_area zeros the specified hardware page by
2098 *	mapping it into virtual memory and using bzero to clear
2099 *	its contents.
2100 *
2101 *	off and size must reside within a single page.
2102 */
2103
2104void
2105pmap_zero_page_area(vm_page_t m, int off, int size)
2106{
2107	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2108	bzero((char *)(caddr_t)va + off, size);
2109}
2110
2111
2112/*
2113 *	pmap_zero_page_idle zeros the specified hardware page by
2114 *	mapping it into virtual memory and using bzero to clear
2115 *	its contents.  This is for the vm_idlezero process.
2116 */
2117
2118void
2119pmap_zero_page_idle(vm_page_t m)
2120{
2121	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2122	bzero((caddr_t) va, PAGE_SIZE);
2123}
2124
2125
2126/*
2127 *	pmap_copy_page copies the specified (machine independent)
2128 *	page by mapping the page into virtual memory and using
2129 *	bcopy to copy the page, one machine dependent page at a
2130 *	time.
2131 */
2132void
2133pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2134{
2135	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
2136	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
2137	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
2138}
2139
2140/*
2141 * Returns true if the pmap's pv is one of the first
2142 * 16 pvs linked to from this page.  This count may
2143 * be changed upwards or downwards in the future; it
2144 * is only necessary that true be returned for a small
2145 * subset of pmaps for proper page aging.
2146 */
2147boolean_t
2148pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2149{
2150	pv_entry_t pv;
2151	int loops = 0;
2152	int s;
2153
2154	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2155		return FALSE;
2156
2157	s = splvm();
2158
2159	/*
2160	 * Not found, check current mappings returning immediately if found.
2161	 */
2162	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2163		if (pv->pv_pmap == pmap) {
2164			splx(s);
2165			return TRUE;
2166		}
2167		loops++;
2168		if (loops >= 16)
2169			break;
2170	}
2171	splx(s);
2172	return (FALSE);
2173}
2174
2175#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2176/*
2177 * Remove all pages from specified address space
2178 * this aids process exit speeds.  Also, this code
2179 * is special cased for current process only, but
2180 * can have the more generic (and slightly slower)
2181 * mode enabled.  This is much faster than pmap_remove
2182 * in the case of running down an entire address space.
2183 */
2184void
2185pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2186{
2187	pv_entry_t pv, npv;
2188	int s;
2189
2190#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2191	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
2192		printf("warning: pmap_remove_pages called with non-current pmap\n");
2193		return;
2194	}
2195#endif
2196
2197	s = splvm();
2198	for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
2199		pv;
2200		pv = npv) {
2201		struct ia64_lpte *pte;
2202
2203		npv = TAILQ_NEXT(pv, pv_plist);
2204
2205		if (pv->pv_va >= eva || pv->pv_va < sva) {
2206			continue;
2207		}
2208
2209		pte = pmap_find_vhpt(pv->pv_va);
2210		if (!pte)
2211			panic("pmap_remove_pages: page on pm_pvlist has no pte\n");
2212
2213
2214/*
2215 * We cannot remove wired pages from a process' mapping at this time
2216 */
2217		if (pte->pte_ig & PTE_IG_WIRED) {
2218			continue;
2219		}
2220
2221		pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
2222	}
2223	splx(s);
2224
2225	pmap_invalidate_all(pmap);
2226}
2227
2228/*
2229 *      pmap_page_protect:
2230 *
2231 *      Lower the permission for all mappings to a given page.
2232 */
2233void
2234pmap_page_protect(vm_page_t m, vm_prot_t prot)
2235{
2236	pv_entry_t pv;
2237
2238	if ((prot & VM_PROT_WRITE) != 0)
2239		return;
2240	if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
2241		if ((m->flags & PG_WRITEABLE) == 0)
2242			return;
2243		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2244			int newprot = pte_prot(pv->pv_pmap, prot);
2245			pmap_t oldpmap = pmap_install(pv->pv_pmap);
2246			struct ia64_lpte *pte;
2247			pte = pmap_find_vhpt(pv->pv_va);
2248			pmap_pte_set_prot(pte, newprot);
2249			pmap_update_vhpt(pte, pv->pv_va);
2250			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2251			pmap_install(oldpmap);
2252		}
2253		vm_page_flag_clear(m, PG_WRITEABLE);
2254	} else {
2255		pmap_remove_all(m);
2256	}
2257}
2258
2259/*
2260 *	pmap_ts_referenced:
2261 *
2262 *	Return a count of reference bits for a page, clearing those bits.
2263 *	It is not necessary for every reference bit to be cleared, but it
2264 *	is necessary that 0 only be returned when there are truly no
2265 *	reference bits set.
2266 *
2267 *	XXX: The exact number of bits to check and clear is a matter that
2268 *	should be tested and standardized at some point in the future for
2269 *	optimal aging of shared pages.
2270 */
2271int
2272pmap_ts_referenced(vm_page_t m)
2273{
2274	pv_entry_t pv;
2275	int count = 0;
2276
2277	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2278		return 0;
2279
2280	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2281		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2282		struct ia64_lpte *pte;
2283		pte = pmap_find_vhpt(pv->pv_va);
2284		if (pte->pte_a) {
2285			count++;
2286			pte->pte_a = 0;
2287			pmap_update_vhpt(pte, pv->pv_va);
2288			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2289		}
2290		pmap_install(oldpmap);
2291	}
2292
2293	return count;
2294}
2295
2296#if 0
2297/*
2298 *	pmap_is_referenced:
2299 *
2300 *	Return whether or not the specified physical page was referenced
2301 *	in any physical maps.
2302 */
2303static boolean_t
2304pmap_is_referenced(vm_page_t m)
2305{
2306	pv_entry_t pv;
2307
2308	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2309		return FALSE;
2310
2311	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2312		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2313		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2314		pmap_install(oldpmap);
2315		if (pte->pte_a)
2316			return 1;
2317	}
2318
2319	return 0;
2320}
2321#endif
2322
2323/*
2324 *	pmap_is_modified:
2325 *
2326 *	Return whether or not the specified physical page was modified
2327 *	in any physical maps.
2328 */
2329boolean_t
2330pmap_is_modified(vm_page_t m)
2331{
2332	pv_entry_t pv;
2333
2334	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2335		return FALSE;
2336
2337	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2338		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2339		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2340		pmap_install(oldpmap);
2341		if (pte->pte_d)
2342			return 1;
2343	}
2344
2345	return 0;
2346}
2347
2348/*
2349 *	Clear the modify bits on the specified physical page.
2350 */
2351void
2352pmap_clear_modify(vm_page_t m)
2353{
2354	pv_entry_t pv;
2355
2356	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2357		return;
2358
2359	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2360		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2361		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2362		if (pte->pte_d) {
2363			pte->pte_d = 0;
2364			pmap_update_vhpt(pte, pv->pv_va);
2365			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2366		}
2367		pmap_install(oldpmap);
2368	}
2369}
2370
2371/*
2372 *	pmap_clear_reference:
2373 *
2374 *	Clear the reference bit on the specified physical page.
2375 */
2376void
2377pmap_clear_reference(vm_page_t m)
2378{
2379	pv_entry_t pv;
2380
2381	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2382		return;
2383
2384	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2385		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2386		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2387		if (pte->pte_a) {
2388			pte->pte_a = 0;
2389			pmap_update_vhpt(pte, pv->pv_va);
2390			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2391		}
2392		pmap_install(oldpmap);
2393	}
2394}
2395
2396/*
2397 * Miscellaneous support routines follow
2398 */
2399
2400static void
2401ia64_protection_init()
2402{
2403	int prot, *kp, *up;
2404
2405	kp = protection_codes[0];
2406	up = protection_codes[1];
2407
2408	for (prot = 0; prot < 8; prot++) {
2409		switch (prot) {
2410		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
2411			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2412			*up++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2413			break;
2414
2415		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
2416			*kp++ = (PTE_AR_X_RX << 2) | PTE_PL_KERN;
2417			*up++ = (PTE_AR_X_RX << 2) | PTE_PL_USER;
2418			break;
2419
2420		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
2421			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2422			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2423			break;
2424
2425		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
2426			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2427			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2428			break;
2429
2430		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
2431			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2432			*up++ = (PTE_AR_R << 2) | PTE_PL_USER;
2433			break;
2434
2435		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
2436			*kp++ = (PTE_AR_RX << 2) | PTE_PL_KERN;
2437			*up++ = (PTE_AR_RX << 2) | PTE_PL_USER;
2438			break;
2439
2440		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
2441			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2442			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2443			break;
2444
2445		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
2446			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2447			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2448			break;
2449		}
2450	}
2451}
2452
2453/*
2454 * Map a set of physical memory pages into the kernel virtual
2455 * address space. Return a pointer to where it is mapped. This
2456 * routine is intended to be used for mapping device memory,
2457 * NOT real memory.
2458 */
2459void *
2460pmap_mapdev(vm_offset_t pa, vm_size_t size)
2461{
2462	return (void*) IA64_PHYS_TO_RR6(pa);
2463}
2464
2465/*
2466 * 'Unmap' a range mapped by pmap_mapdev().
2467 */
2468void
2469pmap_unmapdev(vm_offset_t va, vm_size_t size)
2470{
2471	return;
2472}
2473
2474/*
2475 * perform the pmap work for mincore
2476 */
2477int
2478pmap_mincore(pmap_t pmap, vm_offset_t addr)
2479{
2480	pmap_t oldpmap;
2481	struct ia64_lpte *pte;
2482	int val = 0;
2483
2484	oldpmap = pmap_install(pmap);
2485	pte = pmap_find_vhpt(addr);
2486	pmap_install(oldpmap);
2487
2488	if (!pte)
2489		return 0;
2490
2491	if (pmap_pte_v(pte)) {
2492		vm_page_t m;
2493		vm_offset_t pa;
2494
2495		val = MINCORE_INCORE;
2496		if ((pte->pte_ig & PTE_IG_MANAGED) == 0)
2497			return val;
2498
2499		pa = pmap_pte_pa(pte);
2500
2501		m = PHYS_TO_VM_PAGE(pa);
2502
2503		/*
2504		 * Modified by us
2505		 */
2506		if (pte->pte_d)
2507			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2508		/*
2509		 * Modified by someone
2510		 */
2511		else if (pmap_is_modified(m))
2512			val |= MINCORE_MODIFIED_OTHER;
2513		/*
2514		 * Referenced by us
2515		 */
2516		if (pte->pte_a)
2517			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2518
2519		/*
2520		 * Referenced by someone
2521		 */
2522		else if (pmap_ts_referenced(m)) {
2523			val |= MINCORE_REFERENCED_OTHER;
2524			vm_page_flag_set(m, PG_REFERENCED);
2525		}
2526	}
2527	return val;
2528}
2529
2530void
2531pmap_activate(struct thread *td)
2532{
2533	pmap_install(vmspace_pmap(td->td_proc->p_vmspace));
2534}
2535
2536pmap_t
2537pmap_install(pmap_t pmap)
2538{
2539	pmap_t oldpmap;
2540	int i;
2541
2542	critical_enter();
2543
2544	oldpmap = PCPU_GET(current_pmap);
2545	if (oldpmap == pmap) {
2546		critical_exit();
2547		return (oldpmap);
2548	}
2549
2550	if (oldpmap != NULL)
2551		atomic_clear_32(&oldpmap->pm_active, PCPU_GET(cpumask));
2552
2553	PCPU_SET(current_pmap, pmap);
2554
2555	if (pmap == NULL) {
2556		/* Invalidate regions 0-4. */
2557		ia64_set_rr(IA64_RR_BASE(0), (0 << 8)|(PAGE_SHIFT << 2)|1);
2558		ia64_set_rr(IA64_RR_BASE(1), (1 << 8)|(PAGE_SHIFT << 2)|1);
2559		ia64_set_rr(IA64_RR_BASE(2), (2 << 8)|(PAGE_SHIFT << 2)|1);
2560		ia64_set_rr(IA64_RR_BASE(3), (3 << 8)|(PAGE_SHIFT << 2)|1);
2561		ia64_set_rr(IA64_RR_BASE(4), (4 << 8)|(PAGE_SHIFT << 2)|1);
2562		critical_exit();
2563		return (oldpmap);
2564	}
2565
2566	atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask));
2567
2568	for (i = 0; i < 5; i++) {
2569		ia64_set_rr(IA64_RR_BASE(i),
2570		    (pmap->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2571	}
2572
2573	critical_exit();
2574
2575	return (oldpmap);
2576}
2577
2578vm_offset_t
2579pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2580{
2581
2582	return addr;
2583}
2584
2585#include "opt_ddb.h"
2586
2587#ifdef DDB
2588
2589#include <ddb/ddb.h>
2590
2591static const char*	psnames[] = {
2592	"1B",	"2B",	"4B",	"8B",
2593	"16B",	"32B",	"64B",	"128B",
2594	"256B",	"512B",	"1K",	"2K",
2595	"4K",	"8K",	"16K",	"32K",
2596	"64K",	"128K",	"256K",	"512K",
2597	"1M",	"2M",	"4M",	"8M",
2598	"16M",	"32M",	"64M",	"128M",
2599	"256M",	"512M",	"1G",	"2G"
2600};
2601
2602static void
2603print_trs(int type)
2604{
2605	struct ia64_pal_result	res;
2606	int			i, maxtr;
2607	struct {
2608		struct ia64_pte	pte;
2609		struct ia64_itir itir;
2610		struct ia64_ifa ifa;
2611		struct ia64_rr	rr;
2612	}			buf;
2613	static const char*	manames[] = {
2614		"WB",	"bad",	"bad",	"bad",
2615		"UC",	"UCE",	"WC",	"NaT",
2616
2617	};
2618
2619	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2620	if (res.pal_status != 0) {
2621		db_printf("Can't get VM summary\n");
2622		return;
2623	}
2624
2625	if (type == 0)
2626		maxtr = (res.pal_result[0] >> 40) & 0xff;
2627	else
2628		maxtr = (res.pal_result[0] >> 32) & 0xff;
2629
2630	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2631	for (i = 0; i <= maxtr; i++) {
2632		bzero(&buf, sizeof(buf));
2633		res = ia64_call_pal_stacked_physical
2634			(PAL_VM_TR_READ, i, type, ia64_tpa((u_int64_t) &buf));
2635		if (!(res.pal_result[0] & 1))
2636			buf.pte.pte_ar = 0;
2637		if (!(res.pal_result[0] & 2))
2638			buf.pte.pte_pl = 0;
2639		if (!(res.pal_result[0] & 4))
2640			buf.pte.pte_d = 0;
2641		if (!(res.pal_result[0] & 8))
2642			buf.pte.pte_ma = 0;
2643		db_printf(
2644			"%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s %d %06x\n",
2645			buf.ifa.ifa_ig & 1,
2646			buf.rr.rr_rid,
2647			buf.ifa.ifa_vpn,
2648			buf.pte.pte_ppn,
2649			psnames[buf.itir.itir_ps],
2650			buf.pte.pte_ed,
2651			buf.pte.pte_ar,
2652			buf.pte.pte_pl,
2653			buf.pte.pte_d,
2654			buf.pte.pte_a,
2655			manames[buf.pte.pte_ma],
2656			buf.pte.pte_p,
2657			buf.itir.itir_key);
2658	}
2659}
2660
2661DB_COMMAND(itr, db_itr)
2662{
2663	print_trs(0);
2664}
2665
2666DB_COMMAND(dtr, db_dtr)
2667{
2668	print_trs(1);
2669}
2670
2671DB_COMMAND(rr, db_rr)
2672{
2673	int i;
2674	u_int64_t t;
2675	struct ia64_rr rr;
2676
2677	printf("RR RID    PgSz VE\n");
2678	for (i = 0; i < 8; i++) {
2679		__asm __volatile ("mov %0=rr[%1]"
2680				  : "=r"(t)
2681				  : "r"(IA64_RR_BASE(i)));
2682		*(u_int64_t *) &rr = t;
2683		printf("%d  %06x %4s %d\n",
2684		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2685	}
2686}
2687
2688DB_COMMAND(thash, db_thash)
2689{
2690	if (!have_addr)
2691		return;
2692
2693	db_printf("%p\n", (void *) ia64_thash(addr));
2694}
2695
2696DB_COMMAND(ttag, db_ttag)
2697{
2698	if (!have_addr)
2699		return;
2700
2701	db_printf("0x%lx\n", ia64_ttag(addr));
2702}
2703
2704#endif
2705