pmap.c revision 115152
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 * Copyright (c) 1998,2000 Doug Rabson
9 * All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * the Systems Programming Group of the University of Utah Computer
13 * Science Department and William Jolitz of UUNET Technologies Inc.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 *    notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in the
22 *    documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 *    must display the following acknowledgement:
25 *	This product includes software developed by the University of
26 *	California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 *    may be used to endorse or promote products derived from this software
29 *    without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * SUCH DAMAGE.
42 *
43 *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
44 *	from:	i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
45 *		with some ideas from NetBSD's alpha pmap
46 * $FreeBSD: head/sys/ia64/ia64/pmap.c 115152 2003-05-19 08:02:36Z marcel $
47 */
48
49/*
50 *	Manages physical address maps.
51 *
52 *	In addition to hardware address maps, this
53 *	module is called upon to provide software-use-only
54 *	maps which may or may not be stored in the same
55 *	form as hardware maps.  These pseudo-maps are
56 *	used to store intermediate results from copy
57 *	operations to and from address spaces.
58 *
59 *	Since the information managed by this module is
60 *	also stored by the logical address mapping module,
61 *	this module may throw away valid virtual-to-physical
62 *	mappings at almost any time.  However, invalidations
63 *	of virtual-to-physical mappings must be done as
64 *	requested.
65 *
66 *	In order to cope with hardware architectures which
67 *	make virtual-to-physical map invalidates expensive,
68 *	this module may delay invalidate or reduced protection
69 *	operations until such time as they are actually
70 *	necessary.  This module is given full information as
71 *	to which processors are currently using which maps,
72 *	and to when physical maps must be made correct.
73 */
74
75/*
76 * Following the Linux model, region IDs are allocated in groups of
77 * eight so that a single region ID can be used for as many RRs as we
78 * want by encoding the RR number into the low bits of the ID.
79 *
80 * We reserve region ID 0 for the kernel and allocate the remaining
81 * IDs for user pmaps.
82 *
83 * Region 0..4
84 *	User virtually mapped
85 *
86 * Region 5
87 *	Kernel virtually mapped
88 *
89 * Region 6
90 *	Kernel physically mapped uncacheable
91 *
92 * Region 7
93 *	Kernel physically mapped cacheable
94 */
95
96#include <sys/param.h>
97#include <sys/kernel.h>
98#include <sys/lock.h>
99#include <sys/malloc.h>
100#include <sys/mman.h>
101#include <sys/msgbuf.h>
102#include <sys/mutex.h>
103#include <sys/proc.h>
104#include <sys/sx.h>
105#include <sys/systm.h>
106#include <sys/vmmeter.h>
107#include <sys/smp.h>
108#include <sys/sysctl.h>
109
110#include <vm/vm.h>
111#include <vm/vm_param.h>
112#include <vm/vm_kern.h>
113#include <vm/vm_page.h>
114#include <vm/vm_map.h>
115#include <vm/vm_object.h>
116#include <vm/vm_extern.h>
117#include <vm/vm_pageout.h>
118#include <vm/vm_pager.h>
119#include <vm/uma.h>
120#include <vm/uma_int.h>
121
122#include <sys/user.h>
123
124#include <machine/cpu.h>
125#include <machine/pal.h>
126#include <machine/md_var.h>
127
128/* XXX move to a header. */
129extern u_int64_t ia64_gateway_page[];
130
131MALLOC_DEFINE(M_PMAP, "PMAP", "PMAP Structures");
132
133#ifndef KSTACK_MAX_PAGES
134#define	KSTACK_MAX_PAGES 32
135#endif
136
137#ifndef PMAP_SHPGPERPROC
138#define PMAP_SHPGPERPROC 200
139#endif
140
141#if defined(DIAGNOSTIC)
142#define PMAP_DIAGNOSTIC
143#endif
144
145#define MINPV 2048	/* Preallocate at least this many */
146#define MAXPV 20480	/* But no more than this */
147
148#if 0
149#define PMAP_DIAGNOSTIC
150#define PMAP_DEBUG
151#endif
152
153#if !defined(PMAP_DIAGNOSTIC)
154#define PMAP_INLINE __inline
155#else
156#define PMAP_INLINE
157#endif
158
159/*
160 * Get PDEs and PTEs for user/kernel address space
161 */
162#define pmap_pte_w(pte)		((pte)->pte_ig & PTE_IG_WIRED)
163#define pmap_pte_managed(pte)	((pte)->pte_ig & PTE_IG_MANAGED)
164#define pmap_pte_v(pte)		((pte)->pte_p)
165#define pmap_pte_pa(pte)	(((pte)->pte_ppn) << 12)
166#define pmap_pte_prot(pte)	(((pte)->pte_ar << 2) | (pte)->pte_pl)
167
168#define pmap_pte_set_w(pte, v) ((v)?((pte)->pte_ig |= PTE_IG_WIRED) \
169				:((pte)->pte_ig &= ~PTE_IG_WIRED))
170#define pmap_pte_set_prot(pte, v) do {		\
171    (pte)->pte_ar = v >> 2;			\
172    (pte)->pte_pl = v & 3;			\
173} while (0)
174
175/*
176 * Given a map and a machine independent protection code,
177 * convert to an ia64 protection code.
178 */
179#define pte_prot(m, p)		(protection_codes[m == kernel_pmap ? 0 : 1][p])
180#define pte_prot_pl(m, p)	(pte_prot(m, p) & 3)
181#define pte_prot_ar(m, p)	(pte_prot(m, p) >> 2)
182int	protection_codes[2][8];
183
184/*
185 * Return non-zero if this pmap is currently active
186 */
187#define pmap_isactive(pmap)	(pmap->pm_active)
188
189/*
190 * Statically allocated kernel pmap
191 */
192struct pmap kernel_pmap_store;
193
194vm_offset_t avail_start;	/* PA of first available physical page */
195vm_offset_t avail_end;		/* PA of last available physical page */
196vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
197vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
198static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
199
200vm_offset_t vhpt_base, vhpt_size;
201
202/*
203 * We use an object to own the kernel's 'page tables'. For simplicity,
204 * we use one page directory to index a set of pages containing
205 * ia64_lptes. This gives us up to 2Gb of kernel virtual space.
206 */
207static int nkpt;
208struct ia64_lpte **ia64_kptdir;
209#define KPTE_DIR_INDEX(va) \
210	((va >> (2*PAGE_SHIFT-5)) & ((1<<(PAGE_SHIFT-3))-1))
211#define KPTE_PTE_INDEX(va) \
212	((va >> PAGE_SHIFT) & ((1<<(PAGE_SHIFT-5))-1))
213#define NKPTEPG		(PAGE_SIZE / sizeof(struct ia64_lpte))
214
215vm_offset_t kernel_vm_end;
216
217/* Values for ptc.e. XXX values for SKI. */
218static u_int64_t pmap_ptc_e_base = 0x100000000;
219static u_int64_t pmap_ptc_e_count1 = 3;
220static u_int64_t pmap_ptc_e_count2 = 2;
221static u_int64_t pmap_ptc_e_stride1 = 0x2000;
222static u_int64_t pmap_ptc_e_stride2 = 0x100000000;
223
224/*
225 * Data for the RID allocator
226 */
227static int pmap_ridcount;
228static int pmap_rididx;
229static int pmap_ridmapsz;
230static int pmap_ridmax;
231static u_int64_t *pmap_ridmap;
232struct mtx pmap_ridmutex;
233
234/*
235 * Data for the pv entry allocation mechanism
236 */
237static uma_zone_t pvzone;
238static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
239int pmap_pagedaemon_waken;
240static struct pv_entry *pvbootentries;
241static int pvbootnext, pvbootmax;
242
243/*
244 * Data for allocating PTEs for user processes.
245 */
246static uma_zone_t ptezone;
247
248/*
249 * VHPT instrumentation.
250 */
251static int pmap_vhpt_inserts;
252static int pmap_vhpt_collisions;
253static int pmap_vhpt_resident;
254SYSCTL_DECL(_vm_stats);
255SYSCTL_NODE(_vm_stats, OID_AUTO, vhpt, CTLFLAG_RD, 0, "");
256SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, inserts, CTLFLAG_RD,
257	   &pmap_vhpt_inserts, 0, "");
258SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, collisions, CTLFLAG_RD,
259	   &pmap_vhpt_collisions, 0, "");
260SYSCTL_INT(_vm_stats_vhpt, OID_AUTO, resident, CTLFLAG_RD,
261	   &pmap_vhpt_resident, 0, "");
262
263static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
264static pv_entry_t get_pv_entry(void);
265static void	ia64_protection_init(void);
266
267static pmap_t	pmap_install(pmap_t);
268static void	pmap_invalidate_all(pmap_t pmap);
269static void	pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m);
270
271vm_offset_t
272pmap_steal_memory(vm_size_t size)
273{
274	vm_size_t bank_size;
275	vm_offset_t pa, va;
276
277	size = round_page(size);
278
279	bank_size = phys_avail[1] - phys_avail[0];
280	while (size > bank_size) {
281		int i;
282		for (i = 0; phys_avail[i+2]; i+= 2) {
283			phys_avail[i] = phys_avail[i+2];
284			phys_avail[i+1] = phys_avail[i+3];
285		}
286		phys_avail[i] = 0;
287		phys_avail[i+1] = 0;
288		if (!phys_avail[0])
289			panic("pmap_steal_memory: out of memory");
290		bank_size = phys_avail[1] - phys_avail[0];
291	}
292
293	pa = phys_avail[0];
294	phys_avail[0] += size;
295
296	va = IA64_PHYS_TO_RR7(pa);
297	bzero((caddr_t) va, size);
298	return va;
299}
300
301/*
302 *	Bootstrap the system enough to run with virtual memory.
303 */
304void
305pmap_bootstrap()
306{
307	int i, j, count, ridbits;
308	struct ia64_pal_result res;
309
310	/*
311	 * Query the PAL Code to find the loop parameters for the
312	 * ptc.e instruction.
313	 */
314	res = ia64_call_pal_static(PAL_PTCE_INFO, 0, 0, 0);
315	if (res.pal_status != 0)
316		panic("Can't configure ptc.e parameters");
317	pmap_ptc_e_base = res.pal_result[0];
318	pmap_ptc_e_count1 = res.pal_result[1] >> 32;
319	pmap_ptc_e_count2 = res.pal_result[1] & ((1L<<32) - 1);
320	pmap_ptc_e_stride1 = res.pal_result[2] >> 32;
321	pmap_ptc_e_stride2 = res.pal_result[2] & ((1L<<32) - 1);
322	if (bootverbose)
323		printf("ptc.e base=0x%lx, count1=%ld, count2=%ld, "
324		       "stride1=0x%lx, stride2=0x%lx\n",
325		       pmap_ptc_e_base,
326		       pmap_ptc_e_count1,
327		       pmap_ptc_e_count2,
328		       pmap_ptc_e_stride1,
329		       pmap_ptc_e_stride2);
330
331	/*
332	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
333	 *
334	 * We currently need at least 19 bits in the RID because PID_MAX
335	 * can only be encoded in 17 bits and we need RIDs for 5 regions
336	 * per process. With PID_MAX equalling 99999 this means that we
337	 * need to be able to encode 499995 (=5*PID_MAX).
338	 * The Itanium processor only has 18 bits and the architected
339	 * minimum is exactly that. So, we cannot use a PID based scheme
340	 * in those cases. Enter pmap_ridmap...
341	 * We should avoid the map when running on a processor that has
342	 * implemented enough bits. This means that we should pass the
343	 * process/thread ID to pmap. This we currently don't do, so we
344	 * use the map anyway. However, we don't want to allocate a map
345	 * that is large enough to cover the range dictated by the number
346	 * of bits in the RID, because that may result in a RID map of
347	 * 2MB in size for a 24-bit RID. A 64KB map is enough.
348	 * The bottomline: we create a 32KB map when the processor only
349	 * implements 18 bits (or when we can't figure it out). Otherwise
350	 * we create a 64KB map.
351	 */
352	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
353	if (res.pal_status != 0) {
354		if (bootverbose)
355			printf("Can't read VM Summary - assuming 18 Region ID bits\n");
356		ridbits = 18; /* guaranteed minimum */
357	} else {
358		ridbits = (res.pal_result[1] >> 8) & 0xff;
359		if (bootverbose)
360			printf("Processor supports %d Region ID bits\n",
361			    ridbits);
362	}
363	if (ridbits > 19)
364		ridbits = 19;
365
366	pmap_ridmax = (1 << ridbits);
367	pmap_ridmapsz = pmap_ridmax / 64;
368	pmap_ridmap = (u_int64_t *)pmap_steal_memory(pmap_ridmax / 8);
369	pmap_ridmap[0] |= 0xff;
370	pmap_rididx = 0;
371	pmap_ridcount = 8;
372	mtx_init(&pmap_ridmutex, "RID allocator lock", NULL, MTX_DEF);
373
374	/*
375	 * Allocate some memory for initial kernel 'page tables'.
376	 */
377	ia64_kptdir = (void *)pmap_steal_memory(PAGE_SIZE);
378	for (i = 0; i < NKPT; i++) {
379		ia64_kptdir[i] = (void*)pmap_steal_memory(PAGE_SIZE);
380	}
381	nkpt = NKPT;
382	kernel_vm_end = NKPT * PAGE_SIZE * NKPTEPG + VM_MIN_KERNEL_ADDRESS -
383	    VM_GATEWAY_SIZE;
384
385	avail_start = phys_avail[0];
386	for (i = 0; phys_avail[i+2]; i+= 2) ;
387	avail_end = phys_avail[i+1];
388	count = i+2;
389
390	/*
391	 * Figure out a useful size for the VHPT, based on the size of
392	 * physical memory and try to locate a region which is large
393	 * enough to contain the VHPT (which must be a power of two in
394	 * size and aligned to a natural boundary).
395	 * Don't use the difference between avail_start and avail_end
396	 * as a measure for memory size. The address space is often
397	 * enough sparse, causing us to (try to) create a huge VHPT.
398	 */
399	vhpt_size = 15;
400	while ((1<<vhpt_size) < Maxmem * 32)
401		vhpt_size++;
402
403	vhpt_base = 0;
404	while (!vhpt_base) {
405		vm_offset_t mask;
406		if (bootverbose)
407			printf("Trying VHPT size 0x%lx\n", (1L<<vhpt_size));
408		mask = (1L << vhpt_size) - 1;
409		for (i = 0; i < count; i += 2) {
410			vm_offset_t base, limit;
411			base = (phys_avail[i] + mask) & ~mask;
412			limit = base + (1L << vhpt_size);
413			if (limit <= phys_avail[i+1])
414				/*
415				 * VHPT can fit in this region
416				 */
417				break;
418		}
419		if (!phys_avail[i]) {
420			/*
421			 * Can't fit, try next smaller size.
422			 */
423			vhpt_size--;
424		} else {
425			vhpt_base = (phys_avail[i] + mask) & ~mask;
426		}
427	}
428	if (vhpt_size < 15)
429		panic("Can't find space for VHPT");
430
431	if (bootverbose)
432		printf("Putting VHPT at %p\n", (void *) vhpt_base);
433	if (vhpt_base != phys_avail[i]) {
434		/*
435		 * Split this region.
436		 */
437		if (bootverbose)
438			printf("Splitting [%p-%p]\n",
439			       (void *) phys_avail[i],
440			       (void *) phys_avail[i+1]);
441		for (j = count; j > i; j -= 2) {
442			phys_avail[j] = phys_avail[j-2];
443			phys_avail[j+1] = phys_avail[j-2+1];
444		}
445		phys_avail[count+2] = 0;
446		phys_avail[count+3] = 0;
447		phys_avail[i+1] = vhpt_base;
448		phys_avail[i+2] = vhpt_base + (1L << vhpt_size);
449	} else {
450		phys_avail[i] = vhpt_base + (1L << vhpt_size);
451	}
452
453	vhpt_base = IA64_PHYS_TO_RR7(vhpt_base);
454	bzero((void *) vhpt_base, (1L << vhpt_size));
455	__asm __volatile("mov cr.pta=%0;; srlz.i;;"
456			 :: "r" (vhpt_base + (1<<8) + (vhpt_size<<2) + 1));
457
458	virtual_avail = VM_MIN_KERNEL_ADDRESS;
459	virtual_end = VM_MAX_KERNEL_ADDRESS;
460
461	/*
462	 * Initialize protection array.
463	 */
464	ia64_protection_init();
465
466	/*
467	 * Initialize the kernel pmap (which is statically allocated).
468	 */
469	for (i = 0; i < 5; i++)
470		kernel_pmap->pm_rid[i] = 0;
471	kernel_pmap->pm_active = 1;
472	TAILQ_INIT(&kernel_pmap->pm_pvlist);
473	PCPU_SET(current_pmap, kernel_pmap);
474
475	/*
476	 * Region 5 is mapped via the vhpt.
477	 */
478	ia64_set_rr(IA64_RR_BASE(5),
479		    (5 << 8) | (PAGE_SHIFT << 2) | 1);
480
481	/*
482	 * Region 6 is direct mapped UC and region 7 is direct mapped
483	 * WC. The details of this is controlled by the Alt {I,D}TLB
484	 * handlers. Here we just make sure that they have the largest
485	 * possible page size to minimise TLB usage.
486	 */
487	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (28 << 2));
488	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (28 << 2));
489
490	/*
491	 * Reserve some memory for allocating pvs while bootstrapping
492	 * the pv allocator. We need to have enough to cover mapping
493	 * the kmem_alloc region used to allocate the initial_pvs in
494	 * pmap_init. In general, the size of this region is
495	 * approximately (# physical pages) * (size of pv entry).
496	 */
497	pvbootmax = ((physmem * sizeof(struct pv_entry)) >> PAGE_SHIFT) + 128;
498	pvbootentries = (struct pv_entry *)
499		pmap_steal_memory(pvbootmax * sizeof(struct pv_entry));
500	pvbootnext = 0;
501
502	/*
503	 * Clear out any random TLB entries left over from booting.
504	 */
505	pmap_invalidate_all(kernel_pmap);
506
507	map_gateway_page();
508}
509
510void *
511uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
512{
513	static vm_pindex_t color;
514	vm_page_t m;
515	int pflags;
516	void *va;
517
518	*flags = UMA_SLAB_PRIV;
519	if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT)
520		pflags = VM_ALLOC_INTERRUPT;
521	else
522		pflags = VM_ALLOC_SYSTEM;
523	if (wait & M_ZERO)
524		pflags |= VM_ALLOC_ZERO;
525
526	for (;;) {
527		m = vm_page_alloc(NULL, color++, pflags | VM_ALLOC_NOOBJ);
528		if (m == NULL) {
529			if (wait & M_NOWAIT)
530				return (NULL);
531			else
532				VM_WAIT;
533		} else
534			break;
535	}
536
537	va = (void *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
538	if ((m->flags & PG_ZERO) == 0)
539		bzero(va, PAGE_SIZE);
540	return (va);
541}
542
543void
544uma_small_free(void *mem, int size, u_int8_t flags)
545{
546	vm_page_t m;
547
548	m = PHYS_TO_VM_PAGE(IA64_RR_MASK((u_int64_t)mem));
549	vm_page_lock_queues();
550	vm_page_free(m);
551	vm_page_unlock_queues();
552}
553
554/*
555 *	Initialize the pmap module.
556 *	Called by vm_init, to initialize any structures that the pmap
557 *	system needs to map virtual memory.
558 *	pmap_init has been enhanced to support in a fairly consistant
559 *	way, discontiguous physical memory.
560 */
561void
562pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
563{
564	int i;
565	int initial_pvs;
566
567	/*
568	 * Allocate memory for random pmap data structures.  Includes the
569	 * pv_head_table.
570	 */
571
572	for(i = 0; i < vm_page_array_size; i++) {
573		vm_page_t m;
574
575		m = &vm_page_array[i];
576		TAILQ_INIT(&m->md.pv_list);
577		m->md.pv_list_count = 0;
578 	}
579
580	/*
581	 * Init the pv free list and the PTE free list.
582	 */
583	initial_pvs = vm_page_array_size;
584	if (initial_pvs < MINPV)
585		initial_pvs = MINPV;
586	if (initial_pvs > MAXPV)
587		initial_pvs = MAXPV;
588	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry),
589	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
590	uma_prealloc(pvzone, initial_pvs);
591
592	ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
593	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
594	uma_prealloc(ptezone, initial_pvs);
595
596	/*
597	 * Now it is safe to enable pv_table recording.
598	 */
599	pmap_initialized = TRUE;
600}
601
602/*
603 * Initialize the address space (zone) for the pv_entries.  Set a
604 * high water mark so that the system can recover from excessive
605 * numbers of pv entries.
606 */
607void
608pmap_init2()
609{
610	int shpgperproc = PMAP_SHPGPERPROC;
611
612	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
613	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
614	pv_entry_high_water = 9 * (pv_entry_max / 10);
615}
616
617
618/***************************************************
619 * Manipulate TLBs for a pmap
620 ***************************************************/
621
622static void
623pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
624{
625	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
626		("invalidating TLB for non-current pmap"));
627	ia64_ptc_g(va, PAGE_SHIFT << 2);
628}
629
630static void
631pmap_invalidate_all_1(void *arg)
632{
633	u_int64_t addr;
634	int i, j;
635	register_t psr;
636
637	psr = intr_disable();
638	addr = pmap_ptc_e_base;
639	for (i = 0; i < pmap_ptc_e_count1; i++) {
640		for (j = 0; j < pmap_ptc_e_count2; j++) {
641			ia64_ptc_e(addr);
642			addr += pmap_ptc_e_stride2;
643		}
644		addr += pmap_ptc_e_stride1;
645	}
646	intr_restore(psr);
647}
648
649static void
650pmap_invalidate_all(pmap_t pmap)
651{
652	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
653		("invalidating TLB for non-current pmap"));
654
655
656#ifdef SMP
657	smp_rendezvous(0, pmap_invalidate_all_1, 0, 0);
658#else
659	pmap_invalidate_all_1(0);
660#endif
661}
662
663static u_int32_t
664pmap_allocate_rid(void)
665{
666	uint64_t bit, bits;
667	int rid;
668
669	mtx_lock(&pmap_ridmutex);
670	if (pmap_ridcount == pmap_ridmax)
671		panic("pmap_allocate_rid: All Region IDs used");
672
673	/* Find an index with a free bit. */
674	while ((bits = pmap_ridmap[pmap_rididx]) == ~0UL) {
675		pmap_rididx++;
676		if (pmap_rididx == pmap_ridmapsz)
677			pmap_rididx = 0;
678	}
679	rid = pmap_rididx * 64;
680
681	/* Find a free bit. */
682	bit = 1UL;
683	while (bits & bit) {
684		rid++;
685		bit <<= 1;
686	}
687
688	pmap_ridmap[pmap_rididx] |= bit;
689	pmap_ridcount++;
690	mtx_unlock(&pmap_ridmutex);
691
692	return rid;
693}
694
695static void
696pmap_free_rid(u_int32_t rid)
697{
698	uint64_t bit;
699	int idx;
700
701	idx = rid / 64;
702	bit = ~(1UL << (rid & 63));
703
704	mtx_lock(&pmap_ridmutex);
705	pmap_ridmap[idx] &= bit;
706	pmap_ridcount--;
707	mtx_unlock(&pmap_ridmutex);
708}
709
710/***************************************************
711 * Low level helper routines.....
712 ***************************************************/
713
714/*
715 * Install a pte into the VHPT
716 */
717static PMAP_INLINE void
718pmap_install_pte(struct ia64_lpte *vhpte, struct ia64_lpte *pte)
719{
720	u_int64_t *vhp, *p;
721
722	vhp = (u_int64_t *)vhpte;
723	p = (u_int64_t *)pte;
724
725	critical_enter();
726
727	/* Invalidate the tag so the VHPT walker will not match this entry. */
728	vhp[2] = 1UL << 63;
729	ia64_mf();
730
731	vhp[0] = p[0];
732	vhp[1] = p[1];
733	ia64_mf();
734
735	/* Install a proper tag now that we're done. */
736	vhp[2] = p[2];
737	ia64_mf();
738
739	critical_exit();
740}
741
742/*
743 * Compare essential parts of pte.
744 */
745static PMAP_INLINE int
746pmap_equal_pte(struct ia64_lpte *pte1, struct ia64_lpte *pte2)
747{
748	return *(u_int64_t *) pte1 == *(u_int64_t *) pte2;
749}
750
751/*
752 * this routine defines the region(s) of memory that should
753 * not be tested for the modified bit.
754 */
755static PMAP_INLINE int
756pmap_track_modified(vm_offset_t va)
757{
758	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva))
759		return 1;
760	else
761		return 0;
762}
763
764/*
765 * Create the KSTACK for a new thread.
766 * This routine directly affects the fork perf for a process/thread.
767 */
768void
769pmap_new_thread(struct thread *td, int pages)
770{
771
772	/* Bounds check */
773	if (pages <= 1)
774		pages = KSTACK_PAGES;
775	else if (pages > KSTACK_MAX_PAGES)
776		pages = KSTACK_MAX_PAGES;
777	td->td_kstack = (vm_offset_t)malloc(pages * PAGE_SIZE, M_PMAP,
778	    M_WAITOK);
779	td->td_kstack_pages = pages;
780}
781
782/*
783 * Dispose the KSTACK for a thread that has exited.
784 * This routine directly impacts the exit perf of a process/thread.
785 */
786void
787pmap_dispose_thread(struct thread *td)
788{
789
790	free((void*)td->td_kstack, M_PMAP);
791	td->td_kstack = 0;
792	td->td_kstack_pages = 0;
793}
794
795/*
796 * Set up a variable sized alternate kstack.  This appears to be MI.
797 */
798void
799pmap_new_altkstack(struct thread *td, int pages)
800{
801
802	td->td_altkstack = td->td_kstack;
803	td->td_altkstack_obj = td->td_kstack_obj;
804	td->td_altkstack_pages = td->td_kstack_pages;
805	pmap_new_thread(td, pages);
806}
807
808void
809pmap_dispose_altkstack(struct thread *td)
810{
811
812	pmap_dispose_thread(td);
813	td->td_kstack = td->td_altkstack;
814	td->td_kstack_obj = td->td_altkstack_obj;
815	td->td_kstack_pages = td->td_altkstack_pages;
816	td->td_altkstack = 0;
817	td->td_altkstack_obj = NULL;
818	td->td_altkstack_pages = 0;
819}
820
821/*
822 * Allow the KSTACK for a thread to be prejudicially paged out.
823 */
824void
825pmap_swapout_thread(struct thread *td)
826{
827}
828
829/*
830 * Bring the KSTACK for a specified thread back in.
831 */
832void
833pmap_swapin_thread(struct thread *td)
834{
835}
836
837/***************************************************
838 * Page table page management routines.....
839 ***************************************************/
840
841void
842pmap_pinit0(struct pmap *pmap)
843{
844	/* kernel_pmap is the same as any other pmap. */
845	pmap_pinit(pmap);
846}
847
848/*
849 * Initialize a preallocated and zeroed pmap structure,
850 * such as one in a vmspace structure.
851 */
852void
853pmap_pinit(struct pmap *pmap)
854{
855	int i;
856
857	pmap->pm_flags = 0;
858	for (i = 0; i < 5; i++)
859		pmap->pm_rid[i] = 0;
860	pmap->pm_ptphint = NULL;
861	pmap->pm_active = 0;
862	TAILQ_INIT(&pmap->pm_pvlist);
863	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
864}
865
866/*
867 * Wire in kernel global address entries.  To avoid a race condition
868 * between pmap initialization and pmap_growkernel, this procedure
869 * should be called after the vmspace is attached to the process
870 * but before this pmap is activated.
871 */
872void
873pmap_pinit2(struct pmap *pmap)
874{
875	int i;
876
877	for (i = 0; i < 5; i++)
878		pmap->pm_rid[i] = pmap_allocate_rid();
879}
880
881/***************************************************
882 * Pmap allocation/deallocation routines.
883 ***************************************************/
884
885/*
886 * Release any resources held by the given physical map.
887 * Called when a pmap initialized by pmap_pinit is being released.
888 * Should only be called if the map contains no valid mappings.
889 */
890void
891pmap_release(pmap_t pmap)
892{
893	int i;
894
895	for (i = 0; i < 5; i++)
896		if (pmap->pm_rid[i])
897			pmap_free_rid(pmap->pm_rid[i]);
898}
899
900/*
901 * grow the number of kernel page table entries, if needed
902 */
903void
904pmap_growkernel(vm_offset_t addr)
905{
906	struct ia64_lpte *ptepage;
907	vm_page_t nkpg;
908
909	if (kernel_vm_end >= addr)
910		return;
911
912	critical_enter();
913
914	while (kernel_vm_end < addr) {
915		/* We could handle more by increasing the size of kptdir. */
916		if (nkpt == MAXKPT)
917			panic("pmap_growkernel: out of kernel address space");
918
919		nkpg = vm_page_alloc(NULL, nkpt,
920		    VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
921		if (!nkpg)
922			panic("pmap_growkernel: no memory to grow kernel");
923
924		ptepage = (struct ia64_lpte *)
925		    IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(nkpg));
926		bzero(ptepage, PAGE_SIZE);
927		ia64_kptdir[KPTE_DIR_INDEX(kernel_vm_end)] = ptepage;
928
929		nkpt++;
930		kernel_vm_end += PAGE_SIZE * NKPTEPG;
931	}
932
933	critical_exit();
934}
935
936/***************************************************
937 * page management routines.
938 ***************************************************/
939
940/*
941 * free the pv_entry back to the free list
942 */
943static PMAP_INLINE void
944free_pv_entry(pv_entry_t pv)
945{
946	pv_entry_count--;
947	uma_zfree(pvzone, pv);
948}
949
950/*
951 * get a new pv_entry, allocating a block from the system
952 * when needed.
953 * the memory allocation is performed bypassing the malloc code
954 * because of the possibility of allocations at interrupt time.
955 */
956static pv_entry_t
957get_pv_entry(void)
958{
959	pv_entry_count++;
960	if (pv_entry_high_water &&
961		(pv_entry_count > pv_entry_high_water) &&
962		(pmap_pagedaemon_waken == 0)) {
963		pmap_pagedaemon_waken = 1;
964		wakeup (&vm_pages_needed);
965	}
966	return uma_zalloc(pvzone, M_NOWAIT);
967}
968
969/*
970 * Add an ia64_lpte to the VHPT.
971 */
972static void
973pmap_enter_vhpt(struct ia64_lpte *pte, vm_offset_t va)
974{
975	struct ia64_lpte *vhpte;
976
977	pmap_vhpt_inserts++;
978	pmap_vhpt_resident++;
979
980	vhpte = (struct ia64_lpte *) ia64_thash(va);
981
982	if (vhpte->pte_chain)
983		pmap_vhpt_collisions++;
984
985	pte->pte_chain = vhpte->pte_chain;
986	vhpte->pte_chain = ia64_tpa((vm_offset_t) pte);
987
988	if (!vhpte->pte_p && pte->pte_p)
989		pmap_install_pte(vhpte, pte);
990	else
991		ia64_mf();
992}
993
994/*
995 * Update VHPT after a pte has changed.
996 */
997static void
998pmap_update_vhpt(struct ia64_lpte *pte, vm_offset_t va)
999{
1000	struct ia64_lpte *vhpte;
1001
1002	vhpte = (struct ia64_lpte *) ia64_thash(va);
1003
1004	if ((!vhpte->pte_p || vhpte->pte_tag == pte->pte_tag)
1005	    && pte->pte_p)
1006		pmap_install_pte(vhpte, pte);
1007}
1008
1009/*
1010 * Remove the ia64_lpte matching va from the VHPT. Return zero if it
1011 * worked or an appropriate error code otherwise.
1012 */
1013static int
1014pmap_remove_vhpt(vm_offset_t va)
1015{
1016	struct ia64_lpte *pte;
1017	struct ia64_lpte *lpte;
1018	struct ia64_lpte *vhpte;
1019	u_int64_t tag;
1020	int error = ENOENT;
1021
1022	vhpte = (struct ia64_lpte *) ia64_thash(va);
1023
1024	/*
1025	 * If the VHPTE is invalid, there can't be a collision chain.
1026	 */
1027	if (!vhpte->pte_p) {
1028		KASSERT(!vhpte->pte_chain, ("bad vhpte"));
1029		printf("can't remove vhpt entry for 0x%lx\n", va);
1030		goto done;
1031	}
1032
1033	lpte = vhpte;
1034	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(vhpte->pte_chain);
1035	tag = ia64_ttag(va);
1036
1037	while (pte->pte_tag != tag) {
1038		lpte = pte;
1039		if (pte->pte_chain)
1040			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1041		else {
1042			printf("can't remove vhpt entry for 0x%lx\n", va);
1043			goto done;
1044		}
1045	}
1046
1047	/*
1048	 * Snip this pv_entry out of the collision chain.
1049	 */
1050	lpte->pte_chain = pte->pte_chain;
1051
1052	/*
1053	 * If the VHPTE matches as well, change it to map the first
1054	 * element from the chain if there is one.
1055	 */
1056	if (vhpte->pte_tag == tag) {
1057		if (vhpte->pte_chain) {
1058			pte = (struct ia64_lpte *)
1059				IA64_PHYS_TO_RR7(vhpte->pte_chain);
1060			pmap_install_pte(vhpte, pte);
1061		} else {
1062			vhpte->pte_p = 0;
1063			ia64_mf();
1064		}
1065	}
1066
1067	pmap_vhpt_resident--;
1068	error = 0;
1069 done:
1070	return error;
1071}
1072
1073/*
1074 * Find the ia64_lpte for the given va, if any.
1075 */
1076static struct ia64_lpte *
1077pmap_find_vhpt(vm_offset_t va)
1078{
1079	struct ia64_lpte *pte;
1080	u_int64_t tag;
1081
1082	pte = (struct ia64_lpte *) ia64_thash(va);
1083	if (!pte->pte_chain) {
1084		pte = 0;
1085		goto done;
1086	}
1087
1088	tag = ia64_ttag(va);
1089	pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1090
1091	while (pte->pte_tag != tag) {
1092		if (pte->pte_chain) {
1093			pte = (struct ia64_lpte *) IA64_PHYS_TO_RR7(pte->pte_chain);
1094		} else {
1095			pte = 0;
1096			break;
1097		}
1098	}
1099
1100 done:
1101	return pte;
1102}
1103
1104/*
1105 * Remove an entry from the list of managed mappings.
1106 */
1107static int
1108pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
1109{
1110	if (!pv) {
1111		if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
1112			TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
1113				if (pmap == pv->pv_pmap && va == pv->pv_va)
1114					break;
1115			}
1116		} else {
1117			TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1118				if (va == pv->pv_va)
1119					break;
1120			}
1121		}
1122	}
1123
1124	if (pv) {
1125		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1126		m->md.pv_list_count--;
1127		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
1128			vm_page_flag_clear(m, PG_WRITEABLE);
1129
1130		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1131		free_pv_entry(pv);
1132		return 0;
1133	} else {
1134		return ENOENT;
1135	}
1136}
1137
1138/*
1139 * Create a pv entry for page at pa for
1140 * (pmap, va).
1141 */
1142static void
1143pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
1144{
1145	pv_entry_t pv;
1146
1147	pv = get_pv_entry();
1148	pv->pv_pmap = pmap;
1149	pv->pv_va = va;
1150
1151	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1152	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1153	m->md.pv_list_count++;
1154}
1155
1156/*
1157 *	Routine:	pmap_extract
1158 *	Function:
1159 *		Extract the physical page address associated
1160 *		with the given map/virtual_address pair.
1161 */
1162vm_offset_t
1163pmap_extract(pmap, va)
1164	register pmap_t pmap;
1165	vm_offset_t va;
1166{
1167	struct ia64_lpte *pte;
1168	pmap_t oldpmap;
1169
1170	if (!pmap)
1171		return 0;
1172
1173	oldpmap = pmap_install(pmap);
1174	pte = pmap_find_vhpt(va);
1175	pmap_install(oldpmap);
1176
1177	if (!pte)
1178		return 0;
1179
1180	return pmap_pte_pa(pte);
1181}
1182
1183/***************************************************
1184 * Low level mapping routines.....
1185 ***************************************************/
1186
1187/*
1188 * Find the kernel lpte for mapping the given virtual address, which
1189 * must be in the part of region 5 which we can cover with our kernel
1190 * 'page tables'.
1191 */
1192static struct ia64_lpte *
1193pmap_find_kpte(vm_offset_t va)
1194{
1195	KASSERT((va >> 61) == 5,
1196		("kernel mapping 0x%lx not in region 5", va));
1197	KASSERT(IA64_RR_MASK(va) < (nkpt * PAGE_SIZE * NKPTEPG),
1198		("kernel mapping 0x%lx out of range", va));
1199	return (&ia64_kptdir[KPTE_DIR_INDEX(va)][KPTE_PTE_INDEX(va)]);
1200}
1201
1202/*
1203 * Find a pte suitable for mapping a user-space address. If one exists
1204 * in the VHPT, that one will be returned, otherwise a new pte is
1205 * allocated.
1206 */
1207static struct ia64_lpte *
1208pmap_find_pte(vm_offset_t va)
1209{
1210	struct ia64_lpte *pte;
1211
1212	if (va >= VM_MAXUSER_ADDRESS)
1213		return pmap_find_kpte(va);
1214
1215	pte = pmap_find_vhpt(va);
1216	if (!pte) {
1217		pte = uma_zalloc(ptezone, M_WAITOK);
1218		pte->pte_p = 0;
1219	}
1220	return pte;
1221}
1222
1223/*
1224 * Free a pte which is now unused. This simply returns it to the zone
1225 * allocator if it is a user mapping. For kernel mappings, clear the
1226 * valid bit to make it clear that the mapping is not currently used.
1227 */
1228static void
1229pmap_free_pte(struct ia64_lpte *pte, vm_offset_t va)
1230{
1231	if (va < VM_MAXUSER_ADDRESS)
1232		uma_zfree(ptezone, pte);
1233	else
1234		pte->pte_p = 0;
1235}
1236
1237/*
1238 * Set a pte to contain a valid mapping and enter it in the VHPT. If
1239 * the pte was orginally valid, then its assumed to already be in the
1240 * VHPT.
1241 */
1242static void
1243pmap_set_pte(struct ia64_lpte *pte, vm_offset_t va, vm_offset_t pa,
1244	     int ig, int pl, int ar)
1245{
1246	int wasvalid = pte->pte_p;
1247
1248	pte->pte_p = 1;
1249	pte->pte_ma = PTE_MA_WB;
1250	if (ig & PTE_IG_MANAGED) {
1251		pte->pte_a = 0;
1252		pte->pte_d = 0;
1253	} else {
1254		pte->pte_a = 1;
1255		pte->pte_d = 1;
1256	}
1257	pte->pte_pl = pl;
1258	pte->pte_ar = ar;
1259	pte->pte_ppn = pa >> 12;
1260	pte->pte_ed = 0;
1261	pte->pte_ig = ig;
1262
1263	pte->pte_ps = PAGE_SHIFT;
1264	pte->pte_key = 0;
1265
1266	pte->pte_tag = ia64_ttag(va);
1267
1268	if (wasvalid) {
1269		pmap_update_vhpt(pte, va);
1270	} else {
1271		pmap_enter_vhpt(pte, va);
1272	}
1273}
1274
1275/*
1276 * If a pte contains a valid mapping, clear it and update the VHPT.
1277 */
1278static void
1279pmap_clear_pte(struct ia64_lpte *pte, vm_offset_t va)
1280{
1281	if (pte->pte_p) {
1282		pmap_remove_vhpt(va);
1283		ia64_ptc_g(va, PAGE_SHIFT << 2);
1284		pte->pte_p = 0;
1285	}
1286}
1287
1288/*
1289 * Remove the (possibly managed) mapping represented by pte from the
1290 * given pmap.
1291 */
1292static int
1293pmap_remove_pte(pmap_t pmap, struct ia64_lpte *pte, vm_offset_t va,
1294		pv_entry_t pv, int freepte)
1295{
1296	int error;
1297	vm_page_t m;
1298
1299	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1300		("removing pte for non-current pmap"));
1301
1302	/*
1303	 * First remove from the VHPT.
1304	 */
1305	error = pmap_remove_vhpt(va);
1306	if (error)
1307		return error;
1308
1309	/*
1310	 * Make sure pmap_set_pte() knows it isn't in the VHPT.
1311	 */
1312	pte->pte_p = 0;
1313
1314	if (pte->pte_ig & PTE_IG_WIRED)
1315		pmap->pm_stats.wired_count -= 1;
1316
1317	pmap->pm_stats.resident_count -= 1;
1318	if (pte->pte_ig & PTE_IG_MANAGED) {
1319		m = PHYS_TO_VM_PAGE(pmap_pte_pa(pte));
1320		if (pte->pte_d)
1321			if (pmap_track_modified(va))
1322				vm_page_dirty(m);
1323		if (pte->pte_a)
1324			vm_page_flag_set(m, PG_REFERENCED);
1325
1326		if (freepte)
1327			pmap_free_pte(pte, va);
1328		return pmap_remove_entry(pmap, m, va, pv);
1329	} else {
1330		if (freepte)
1331			pmap_free_pte(pte, va);
1332		return 0;
1333	}
1334}
1335
1336/*
1337 * Extract the physical page address associated with a kernel
1338 * virtual address.
1339 */
1340vm_paddr_t
1341pmap_kextract(vm_offset_t va)
1342{
1343	struct ia64_lpte *pte;
1344	vm_offset_t gwpage;
1345
1346	KASSERT(va >= IA64_RR_BASE(5), ("Must be kernel VA"));
1347
1348	/* Regions 6 and 7 are direct mapped. */
1349	if (va >= IA64_RR_BASE(6))
1350		return (IA64_RR_MASK(va));
1351
1352	/* EPC gateway page? */
1353	gwpage = (vm_offset_t)ia64_get_k5();
1354	if (va >= gwpage && va < gwpage + VM_GATEWAY_SIZE)
1355		return (IA64_RR_MASK((vm_offset_t)ia64_gateway_page));
1356
1357	/* Bail out if the virtual address is beyond our limits. */
1358	if (IA64_RR_MASK(va) >= nkpt * PAGE_SIZE * NKPTEPG)
1359		return (0);
1360
1361	pte = pmap_find_kpte(va);
1362	if (!pte->pte_p)
1363		return (0);
1364	return ((pte->pte_ppn << 12) | (va & PAGE_MASK));
1365}
1366
1367/*
1368 * Add a list of wired pages to the kva
1369 * this routine is only used for temporary
1370 * kernel mappings that do not need to have
1371 * page modification or references recorded.
1372 * Note that old mappings are simply written
1373 * over.  The page *must* be wired.
1374 */
1375void
1376pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1377{
1378	int i;
1379	struct ia64_lpte *pte;
1380
1381	for (i = 0; i < count; i++) {
1382		vm_offset_t tva = va + i * PAGE_SIZE;
1383		int wasvalid;
1384		pte = pmap_find_kpte(tva);
1385		wasvalid = pte->pte_p;
1386		pmap_set_pte(pte, tva, VM_PAGE_TO_PHYS(m[i]),
1387			     0, PTE_PL_KERN, PTE_AR_RWX);
1388		if (wasvalid)
1389			ia64_ptc_g(tva, PAGE_SHIFT << 2);
1390	}
1391}
1392
1393/*
1394 * this routine jerks page mappings from the
1395 * kernel -- it is meant only for temporary mappings.
1396 */
1397void
1398pmap_qremove(vm_offset_t va, int count)
1399{
1400	int i;
1401	struct ia64_lpte *pte;
1402
1403	for (i = 0; i < count; i++) {
1404		pte = pmap_find_kpte(va);
1405		pmap_clear_pte(pte, va);
1406		va += PAGE_SIZE;
1407	}
1408}
1409
1410/*
1411 * Add a wired page to the kva.
1412 */
1413void
1414pmap_kenter(vm_offset_t va, vm_offset_t pa)
1415{
1416	struct ia64_lpte *pte;
1417	int wasvalid;
1418
1419	pte = pmap_find_kpte(va);
1420	wasvalid = pte->pte_p;
1421	pmap_set_pte(pte, va, pa, 0, PTE_PL_KERN, PTE_AR_RWX);
1422	if (wasvalid)
1423		ia64_ptc_g(va, PAGE_SHIFT << 2);
1424}
1425
1426/*
1427 * Remove a page from the kva
1428 */
1429void
1430pmap_kremove(vm_offset_t va)
1431{
1432	struct ia64_lpte *pte;
1433
1434	pte = pmap_find_kpte(va);
1435	pmap_clear_pte(pte, va);
1436}
1437
1438/*
1439 *	Used to map a range of physical addresses into kernel
1440 *	virtual address space.
1441 *
1442 *	The value passed in '*virt' is a suggested virtual address for
1443 *	the mapping. Architectures which can support a direct-mapped
1444 *	physical to virtual region can return the appropriate address
1445 *	within that region, leaving '*virt' unchanged. Other
1446 *	architectures should map the pages starting at '*virt' and
1447 *	update '*virt' with the first usable address after the mapped
1448 *	region.
1449 */
1450vm_offset_t
1451pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1452{
1453	return IA64_PHYS_TO_RR7(start);
1454}
1455
1456/*
1457 * Remove a single page from a process address space
1458 */
1459static void
1460pmap_remove_page(pmap_t pmap, vm_offset_t va)
1461{
1462	struct ia64_lpte *pte;
1463
1464	KASSERT((pmap == kernel_pmap || pmap == PCPU_GET(current_pmap)),
1465		("removing page for non-current pmap"));
1466
1467	pte = pmap_find_vhpt(va);
1468	if (pte) {
1469		pmap_remove_pte(pmap, pte, va, 0, 1);
1470		pmap_invalidate_page(pmap, va);
1471	}
1472	return;
1473}
1474
1475/*
1476 *	Remove the given range of addresses from the specified map.
1477 *
1478 *	It is assumed that the start and end are properly
1479 *	rounded to the page size.
1480 */
1481void
1482pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1483{
1484	pmap_t oldpmap;
1485	vm_offset_t va;
1486	pv_entry_t pv;
1487	struct ia64_lpte *pte;
1488
1489	if (pmap == NULL)
1490		return;
1491
1492	if (pmap->pm_stats.resident_count == 0)
1493		return;
1494
1495	oldpmap = pmap_install(pmap);
1496
1497	/*
1498	 * special handling of removing one page.  a very
1499	 * common operation and easy to short circuit some
1500	 * code.
1501	 */
1502	if (sva + PAGE_SIZE == eva) {
1503		pmap_remove_page(pmap, sva);
1504		pmap_install(oldpmap);
1505		return;
1506	}
1507
1508	if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
1509		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1510			va = pv->pv_va;
1511			if (va >= sva && va < eva) {
1512				pte = pmap_find_vhpt(va);
1513				pmap_remove_pte(pmap, pte, va, pv, 1);
1514				pmap_invalidate_page(pmap, va);
1515			}
1516		}
1517
1518	} else {
1519		for (va = sva; va < eva; va = va += PAGE_SIZE) {
1520			pte = pmap_find_vhpt(va);
1521			if (pte) {
1522				pmap_remove_pte(pmap, pte, va, 0, 1);
1523				pmap_invalidate_page(pmap, va);
1524			}
1525		}
1526	}
1527
1528	pmap_install(oldpmap);
1529}
1530
1531/*
1532 *	Routine:	pmap_remove_all
1533 *	Function:
1534 *		Removes this physical page from
1535 *		all physical maps in which it resides.
1536 *		Reflects back modify bits to the pager.
1537 *
1538 *	Notes:
1539 *		Original versions of this routine were very
1540 *		inefficient because they iteratively called
1541 *		pmap_remove (slow...)
1542 */
1543
1544void
1545pmap_remove_all(vm_page_t m)
1546{
1547	pmap_t oldpmap;
1548	pv_entry_t pv;
1549	int s;
1550
1551#if defined(PMAP_DIAGNOSTIC)
1552	/*
1553	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
1554	 * pages!
1555	 */
1556	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
1557		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%lx", VM_PAGE_TO_PHYS(m));
1558	}
1559#endif
1560
1561	s = splvm();
1562
1563	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1564		struct ia64_lpte *pte;
1565		pmap_t pmap = pv->pv_pmap;
1566		vm_offset_t va = pv->pv_va;
1567
1568		oldpmap = pmap_install(pmap);
1569		pte = pmap_find_vhpt(va);
1570		if (pmap_pte_pa(pte) != VM_PAGE_TO_PHYS(m))
1571			panic("pmap_remove_all: pv_table for %lx is inconsistent", VM_PAGE_TO_PHYS(m));
1572		pmap_remove_pte(pmap, pte, va, pv, 1);
1573		pmap_invalidate_page(pmap, va);
1574		pmap_install(oldpmap);
1575	}
1576
1577	vm_page_flag_clear(m, PG_WRITEABLE);
1578
1579	splx(s);
1580	return;
1581}
1582
1583/*
1584 *	Set the physical protection on the
1585 *	specified range of this map as requested.
1586 */
1587void
1588pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1589{
1590	pmap_t oldpmap;
1591	struct ia64_lpte *pte;
1592	int newprot;
1593
1594	if (pmap == NULL)
1595		return;
1596
1597	oldpmap = pmap_install(pmap);
1598
1599	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1600		pmap_remove(pmap, sva, eva);
1601		pmap_install(oldpmap);
1602		return;
1603	}
1604
1605	if (prot & VM_PROT_WRITE) {
1606		pmap_install(oldpmap);
1607		return;
1608	}
1609
1610	newprot = pte_prot(pmap, prot);
1611
1612	if ((sva & PAGE_MASK) || (eva & PAGE_MASK))
1613		panic("pmap_protect: unaligned addresses");
1614
1615	while (sva < eva) {
1616		/*
1617		 * If page is invalid, skip this page
1618		 */
1619		pte = pmap_find_vhpt(sva);
1620		if (!pte) {
1621			sva += PAGE_SIZE;
1622			continue;
1623		}
1624
1625		if (pmap_pte_prot(pte) != newprot) {
1626			if (pte->pte_ig & PTE_IG_MANAGED) {
1627				vm_offset_t pa = pmap_pte_pa(pte);
1628				vm_page_t m = PHYS_TO_VM_PAGE(pa);
1629				if (pte->pte_d) {
1630					if (pmap_track_modified(sva))
1631						vm_page_dirty(m);
1632					pte->pte_d = 0;
1633				}
1634				if (pte->pte_a) {
1635					vm_page_flag_set(m, PG_REFERENCED);
1636					pte->pte_a = 0;
1637				}
1638			}
1639			pmap_pte_set_prot(pte, newprot);
1640			pmap_update_vhpt(pte, sva);
1641			pmap_invalidate_page(pmap, sva);
1642		}
1643
1644		sva += PAGE_SIZE;
1645	}
1646	pmap_install(oldpmap);
1647}
1648
1649/*
1650 *	Insert the given physical page (p) at
1651 *	the specified virtual address (v) in the
1652 *	target physical map with the protection requested.
1653 *
1654 *	If specified, the page will be wired down, meaning
1655 *	that the related pte can not be reclaimed.
1656 *
1657 *	NB:  This is the only routine which MAY NOT lazy-evaluate
1658 *	or lose information.  That is, this routine must actually
1659 *	insert this page into the given map NOW.
1660 */
1661void
1662pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1663	   boolean_t wired)
1664{
1665	pmap_t oldpmap;
1666	vm_offset_t pa;
1667	vm_offset_t opa;
1668	struct ia64_lpte origpte;
1669	struct ia64_lpte *pte;
1670	int managed;
1671
1672	if (pmap == NULL)
1673		return;
1674
1675	oldpmap = pmap_install(pmap);
1676
1677	va &= ~PAGE_MASK;
1678#ifdef PMAP_DIAGNOSTIC
1679	if (va > VM_MAX_KERNEL_ADDRESS)
1680		panic("pmap_enter: toobig");
1681#endif
1682
1683	/*
1684	 * Find (or create) a pte for the given mapping.
1685	 */
1686	pte = pmap_find_pte(va);
1687	origpte = *pte;
1688
1689	if (origpte.pte_p)
1690		opa = pmap_pte_pa(&origpte);
1691	else
1692		opa = 0;
1693	managed = 0;
1694
1695	pa = VM_PAGE_TO_PHYS(m) & ~PAGE_MASK;
1696
1697	/*
1698	 * Mapping has not changed, must be protection or wiring change.
1699	 */
1700	if (origpte.pte_p && (opa == pa)) {
1701		/*
1702		 * Wiring change, just update stats. We don't worry about
1703		 * wiring PT pages as they remain resident as long as there
1704		 * are valid mappings in them. Hence, if a user page is wired,
1705		 * the PT page will be also.
1706		 */
1707		if (wired && ((origpte.pte_ig & PTE_IG_WIRED) == 0))
1708			pmap->pm_stats.wired_count++;
1709		else if (!wired && (origpte.pte_ig & PTE_IG_WIRED))
1710			pmap->pm_stats.wired_count--;
1711
1712		/*
1713		 * We might be turning off write access to the page,
1714		 * so we go ahead and sense modify status.
1715		 */
1716		if (origpte.pte_ig & PTE_IG_MANAGED) {
1717			if (origpte.pte_d && pmap_track_modified(va)) {
1718				vm_page_t om;
1719				om = PHYS_TO_VM_PAGE(opa);
1720				vm_page_dirty(om);
1721			}
1722		}
1723
1724		managed = origpte.pte_ig & PTE_IG_MANAGED;
1725		goto validate;
1726	}
1727	/*
1728	 * Mapping has changed, invalidate old range and fall
1729	 * through to handle validating new mapping.
1730	 */
1731	if (opa) {
1732		int error;
1733		vm_page_lock_queues();
1734		error = pmap_remove_pte(pmap, pte, va, 0, 0);
1735		vm_page_unlock_queues();
1736		if (error)
1737			panic("pmap_enter: pte vanished, va: 0x%lx", va);
1738	}
1739
1740	/*
1741	 * Enter on the PV list if part of our managed memory.
1742	 */
1743	if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
1744		pmap_insert_entry(pmap, va, m);
1745		managed |= PTE_IG_MANAGED;
1746	}
1747
1748	/*
1749	 * Increment counters
1750	 */
1751	pmap->pm_stats.resident_count++;
1752	if (wired)
1753		pmap->pm_stats.wired_count++;
1754
1755validate:
1756
1757	/*
1758	 * Now validate mapping with desired protection/wiring. This
1759	 * adds the pte to the VHPT if necessary.
1760	 */
1761	pmap_set_pte(pte, va, pa, managed | (wired ? PTE_IG_WIRED : 0),
1762		     pte_prot_pl(pmap, prot), pte_prot_ar(pmap, prot));
1763
1764	/*
1765	 * if the mapping or permission bits are different, we need
1766	 * to invalidate the page.
1767	 */
1768	if (!pmap_equal_pte(&origpte, pte))
1769		pmap_invalidate_page(pmap, va);
1770
1771	pmap_install(oldpmap);
1772}
1773
1774/*
1775 * this code makes some *MAJOR* assumptions:
1776 * 1. Current pmap & pmap exists.
1777 * 2. Not wired.
1778 * 3. Read access.
1779 * 4. No page table pages.
1780 * 5. Tlbflush is deferred to calling procedure.
1781 * 6. Page IS managed.
1782 * but is *MUCH* faster than pmap_enter...
1783 */
1784
1785static void
1786pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m)
1787{
1788	struct ia64_lpte *pte;
1789	pmap_t oldpmap;
1790
1791	oldpmap = pmap_install(pmap);
1792
1793	pte = pmap_find_pte(va);
1794	if (pte->pte_p)
1795		return;
1796
1797	/*
1798	 * Enter on the PV list since its part of our managed memory.
1799	 */
1800	pmap_insert_entry(pmap, va, m);
1801
1802	/*
1803	 * Increment counters
1804	 */
1805	pmap->pm_stats.resident_count++;
1806
1807	/*
1808	 * Initialise PTE with read-only protection and enter into VHPT.
1809	 */
1810	pmap_set_pte(pte, va, VM_PAGE_TO_PHYS(m),
1811		     PTE_IG_MANAGED,
1812		     PTE_PL_USER, PTE_AR_R);
1813
1814	pmap_install(oldpmap);
1815}
1816
1817/*
1818 * Make temporary mapping for a physical address. This is called
1819 * during dump.
1820 */
1821void *
1822pmap_kenter_temporary(vm_offset_t pa, int i)
1823{
1824	return (void *) IA64_PHYS_TO_RR7(pa - (i * PAGE_SIZE));
1825}
1826
1827#define MAX_INIT_PT (96)
1828/*
1829 * pmap_object_init_pt preloads the ptes for a given object
1830 * into the specified pmap.  This eliminates the blast of soft
1831 * faults on process startup and immediately after an mmap.
1832 */
1833void
1834pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
1835		    vm_object_t object, vm_pindex_t pindex,
1836		    vm_size_t size, int limit)
1837{
1838	pmap_t oldpmap;
1839	vm_offset_t tmpidx;
1840	int psize;
1841	vm_page_t p;
1842	int objpgs;
1843
1844	if (pmap == NULL || object == NULL)
1845		return;
1846
1847	oldpmap = pmap_install(pmap);
1848
1849	psize = ia64_btop(size);
1850
1851	if ((object->type != OBJT_VNODE) ||
1852		((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
1853			(object->resident_page_count > MAX_INIT_PT))) {
1854		pmap_install(oldpmap);
1855		return;
1856	}
1857
1858	if (psize + pindex > object->size) {
1859		if (object->size < pindex)
1860			return;
1861		psize = object->size - pindex;
1862	}
1863
1864	/*
1865	 * if we are processing a major portion of the object, then scan the
1866	 * entire thing.
1867	 */
1868	if (psize > (object->resident_page_count >> 2)) {
1869		objpgs = psize;
1870
1871		for (p = TAILQ_FIRST(&object->memq);
1872		    ((objpgs > 0) && (p != NULL));
1873		    p = TAILQ_NEXT(p, listq)) {
1874
1875			tmpidx = p->pindex;
1876			if (tmpidx < pindex) {
1877				continue;
1878			}
1879			tmpidx -= pindex;
1880			if (tmpidx >= psize) {
1881				continue;
1882			}
1883			/*
1884			 * don't allow an madvise to blow away our really
1885			 * free pages allocating pv entries.
1886			 */
1887			if ((limit & MAP_PREFAULT_MADVISE) &&
1888			    cnt.v_free_count < cnt.v_free_reserved) {
1889				break;
1890			}
1891			vm_page_lock_queues();
1892			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
1893				(p->busy == 0) &&
1894			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1895				if ((p->queue - p->pc) == PQ_CACHE)
1896					vm_page_deactivate(p);
1897				vm_page_busy(p);
1898				vm_page_unlock_queues();
1899				pmap_enter_quick(pmap,
1900						 addr + ia64_ptob(tmpidx), p);
1901				vm_page_lock_queues();
1902				vm_page_wakeup(p);
1903			}
1904			vm_page_unlock_queues();
1905			objpgs -= 1;
1906		}
1907	} else {
1908		/*
1909		 * else lookup the pages one-by-one.
1910		 */
1911		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
1912			/*
1913			 * don't allow an madvise to blow away our really
1914			 * free pages allocating pv entries.
1915			 */
1916			if ((limit & MAP_PREFAULT_MADVISE) &&
1917			    cnt.v_free_count < cnt.v_free_reserved) {
1918				break;
1919			}
1920			p = vm_page_lookup(object, tmpidx + pindex);
1921			if (p == NULL)
1922				continue;
1923			vm_page_lock_queues();
1924			if ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL &&
1925				(p->busy == 0) &&
1926			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
1927				if ((p->queue - p->pc) == PQ_CACHE)
1928					vm_page_deactivate(p);
1929				vm_page_busy(p);
1930				vm_page_unlock_queues();
1931				pmap_enter_quick(pmap,
1932						 addr + ia64_ptob(tmpidx), p);
1933				vm_page_lock_queues();
1934				vm_page_wakeup(p);
1935			}
1936			vm_page_unlock_queues();
1937		}
1938	}
1939	pmap_install(oldpmap);
1940	return;
1941}
1942
1943/*
1944 * pmap_prefault provides a quick way of clustering
1945 * pagefaults into a processes address space.  It is a "cousin"
1946 * of pmap_object_init_pt, except it runs at page fault time instead
1947 * of mmap time.
1948 */
1949#define PFBAK 4
1950#define PFFOR 4
1951#define PAGEORDER_SIZE (PFBAK+PFFOR)
1952
1953static int pmap_prefault_pageorder[] = {
1954	-1 * PAGE_SIZE, 1 * PAGE_SIZE,
1955	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
1956	-3 * PAGE_SIZE, 3 * PAGE_SIZE,
1957	-4 * PAGE_SIZE, 4 * PAGE_SIZE
1958};
1959
1960void
1961pmap_prefault(pmap, addra, entry)
1962	pmap_t pmap;
1963	vm_offset_t addra;
1964	vm_map_entry_t entry;
1965{
1966	int i;
1967	vm_offset_t starta;
1968	vm_offset_t addr;
1969	vm_pindex_t pindex;
1970	vm_page_t m, mpte;
1971	vm_object_t object;
1972
1973	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)))
1974		return;
1975
1976	object = entry->object.vm_object;
1977
1978	starta = addra - PFBAK * PAGE_SIZE;
1979	if (starta < entry->start) {
1980		starta = entry->start;
1981	} else if (starta > addra) {
1982		starta = 0;
1983	}
1984
1985	mpte = NULL;
1986	for (i = 0; i < PAGEORDER_SIZE; i++) {
1987		vm_object_t lobject;
1988		struct ia64_lpte *pte;
1989
1990		addr = addra + pmap_prefault_pageorder[i];
1991		if (addr > addra + (PFFOR * PAGE_SIZE))
1992			addr = 0;
1993
1994		if (addr < starta || addr >= entry->end)
1995			continue;
1996
1997		pte = pmap_find_vhpt(addr);
1998		if (pte && pte->pte_p)
1999			continue;
2000
2001		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
2002		lobject = object;
2003		for (m = vm_page_lookup(lobject, pindex);
2004		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
2005		    lobject = lobject->backing_object) {
2006			if (lobject->backing_object_offset & PAGE_MASK)
2007				break;
2008			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
2009			m = vm_page_lookup(lobject->backing_object, pindex);
2010		}
2011
2012		/*
2013		 * give-up when a page is not in memory
2014		 */
2015		if (m == NULL)
2016			break;
2017		vm_page_lock_queues();
2018		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
2019			(m->busy == 0) &&
2020		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
2021
2022			if ((m->queue - m->pc) == PQ_CACHE) {
2023				vm_page_deactivate(m);
2024			}
2025			vm_page_busy(m);
2026			vm_page_unlock_queues();
2027			pmap_enter_quick(pmap, addr, m);
2028			vm_page_lock_queues();
2029			vm_page_wakeup(m);
2030		}
2031		vm_page_unlock_queues();
2032	}
2033}
2034
2035/*
2036 *	Routine:	pmap_change_wiring
2037 *	Function:	Change the wiring attribute for a map/virtual-address
2038 *			pair.
2039 *	In/out conditions:
2040 *			The mapping must already exist in the pmap.
2041 */
2042void
2043pmap_change_wiring(pmap, va, wired)
2044	register pmap_t pmap;
2045	vm_offset_t va;
2046	boolean_t wired;
2047{
2048	pmap_t oldpmap;
2049	struct ia64_lpte *pte;
2050
2051	if (pmap == NULL)
2052		return;
2053
2054	oldpmap = pmap_install(pmap);
2055
2056	pte = pmap_find_vhpt(va);
2057
2058	if (wired && !pmap_pte_w(pte))
2059		pmap->pm_stats.wired_count++;
2060	else if (!wired && pmap_pte_w(pte))
2061		pmap->pm_stats.wired_count--;
2062
2063	/*
2064	 * Wiring is not a hardware characteristic so there is no need to
2065	 * invalidate TLB.
2066	 */
2067	pmap_pte_set_w(pte, wired);
2068
2069	pmap_install(oldpmap);
2070}
2071
2072
2073
2074/*
2075 *	Copy the range specified by src_addr/len
2076 *	from the source map to the range dst_addr/len
2077 *	in the destination map.
2078 *
2079 *	This routine is only advisory and need not do anything.
2080 */
2081
2082void
2083pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2084	  vm_offset_t src_addr)
2085{
2086}
2087
2088
2089/*
2090 *	pmap_zero_page zeros the specified hardware page by
2091 *	mapping it into virtual memory and using bzero to clear
2092 *	its contents.
2093 */
2094
2095void
2096pmap_zero_page(vm_page_t m)
2097{
2098	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2099	bzero((caddr_t) va, PAGE_SIZE);
2100}
2101
2102
2103/*
2104 *	pmap_zero_page_area zeros the specified hardware page by
2105 *	mapping it into virtual memory and using bzero to clear
2106 *	its contents.
2107 *
2108 *	off and size must reside within a single page.
2109 */
2110
2111void
2112pmap_zero_page_area(vm_page_t m, int off, int size)
2113{
2114	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2115	bzero((char *)(caddr_t)va + off, size);
2116}
2117
2118
2119/*
2120 *	pmap_zero_page_idle zeros the specified hardware page by
2121 *	mapping it into virtual memory and using bzero to clear
2122 *	its contents.  This is for the vm_idlezero process.
2123 */
2124
2125void
2126pmap_zero_page_idle(vm_page_t m)
2127{
2128	vm_offset_t va = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
2129	bzero((caddr_t) va, PAGE_SIZE);
2130}
2131
2132
2133/*
2134 *	pmap_copy_page copies the specified (machine independent)
2135 *	page by mapping the page into virtual memory and using
2136 *	bcopy to copy the page, one machine dependent page at a
2137 *	time.
2138 */
2139void
2140pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2141{
2142	vm_offset_t src = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(msrc));
2143	vm_offset_t dst = IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(mdst));
2144	bcopy((caddr_t) src, (caddr_t) dst, PAGE_SIZE);
2145}
2146
2147/*
2148 * Returns true if the pmap's pv is one of the first
2149 * 16 pvs linked to from this page.  This count may
2150 * be changed upwards or downwards in the future; it
2151 * is only necessary that true be returned for a small
2152 * subset of pmaps for proper page aging.
2153 */
2154boolean_t
2155pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2156{
2157	pv_entry_t pv;
2158	int loops = 0;
2159	int s;
2160
2161	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2162		return FALSE;
2163
2164	s = splvm();
2165
2166	/*
2167	 * Not found, check current mappings returning immediately if found.
2168	 */
2169	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2170		if (pv->pv_pmap == pmap) {
2171			splx(s);
2172			return TRUE;
2173		}
2174		loops++;
2175		if (loops >= 16)
2176			break;
2177	}
2178	splx(s);
2179	return (FALSE);
2180}
2181
2182#define PMAP_REMOVE_PAGES_CURPROC_ONLY
2183/*
2184 * Remove all pages from specified address space
2185 * this aids process exit speeds.  Also, this code
2186 * is special cased for current process only, but
2187 * can have the more generic (and slightly slower)
2188 * mode enabled.  This is much faster than pmap_remove
2189 * in the case of running down an entire address space.
2190 */
2191void
2192pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2193{
2194	pv_entry_t pv, npv;
2195	int s;
2196
2197#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
2198	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
2199		printf("warning: pmap_remove_pages called with non-current pmap\n");
2200		return;
2201	}
2202#endif
2203
2204	s = splvm();
2205	for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
2206		pv;
2207		pv = npv) {
2208		struct ia64_lpte *pte;
2209
2210		npv = TAILQ_NEXT(pv, pv_plist);
2211
2212		if (pv->pv_va >= eva || pv->pv_va < sva) {
2213			continue;
2214		}
2215
2216		pte = pmap_find_vhpt(pv->pv_va);
2217		if (!pte)
2218			panic("pmap_remove_pages: page on pm_pvlist has no pte\n");
2219
2220
2221/*
2222 * We cannot remove wired pages from a process' mapping at this time
2223 */
2224		if (pte->pte_ig & PTE_IG_WIRED) {
2225			continue;
2226		}
2227
2228		pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
2229	}
2230	splx(s);
2231
2232	pmap_invalidate_all(pmap);
2233}
2234
2235/*
2236 *      pmap_page_protect:
2237 *
2238 *      Lower the permission for all mappings to a given page.
2239 */
2240void
2241pmap_page_protect(vm_page_t m, vm_prot_t prot)
2242{
2243	pv_entry_t pv;
2244
2245	if ((prot & VM_PROT_WRITE) != 0)
2246		return;
2247	if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
2248		if ((m->flags & PG_WRITEABLE) == 0)
2249			return;
2250		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2251			int newprot = pte_prot(pv->pv_pmap, prot);
2252			pmap_t oldpmap = pmap_install(pv->pv_pmap);
2253			struct ia64_lpte *pte;
2254			pte = pmap_find_vhpt(pv->pv_va);
2255			pmap_pte_set_prot(pte, newprot);
2256			pmap_update_vhpt(pte, pv->pv_va);
2257			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2258			pmap_install(oldpmap);
2259		}
2260		vm_page_flag_clear(m, PG_WRITEABLE);
2261	} else {
2262		pmap_remove_all(m);
2263	}
2264}
2265
2266/*
2267 *	pmap_ts_referenced:
2268 *
2269 *	Return a count of reference bits for a page, clearing those bits.
2270 *	It is not necessary for every reference bit to be cleared, but it
2271 *	is necessary that 0 only be returned when there are truly no
2272 *	reference bits set.
2273 *
2274 *	XXX: The exact number of bits to check and clear is a matter that
2275 *	should be tested and standardized at some point in the future for
2276 *	optimal aging of shared pages.
2277 */
2278int
2279pmap_ts_referenced(vm_page_t m)
2280{
2281	pv_entry_t pv;
2282	int count = 0;
2283
2284	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2285		return 0;
2286
2287	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2288		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2289		struct ia64_lpte *pte;
2290		pte = pmap_find_vhpt(pv->pv_va);
2291		if (pte->pte_a) {
2292			count++;
2293			pte->pte_a = 0;
2294			pmap_update_vhpt(pte, pv->pv_va);
2295			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2296		}
2297		pmap_install(oldpmap);
2298	}
2299
2300	return count;
2301}
2302
2303#if 0
2304/*
2305 *	pmap_is_referenced:
2306 *
2307 *	Return whether or not the specified physical page was referenced
2308 *	in any physical maps.
2309 */
2310static boolean_t
2311pmap_is_referenced(vm_page_t m)
2312{
2313	pv_entry_t pv;
2314
2315	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2316		return FALSE;
2317
2318	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2319		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2320		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2321		pmap_install(oldpmap);
2322		if (pte->pte_a)
2323			return 1;
2324	}
2325
2326	return 0;
2327}
2328#endif
2329
2330/*
2331 *	pmap_is_modified:
2332 *
2333 *	Return whether or not the specified physical page was modified
2334 *	in any physical maps.
2335 */
2336boolean_t
2337pmap_is_modified(vm_page_t m)
2338{
2339	pv_entry_t pv;
2340
2341	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2342		return FALSE;
2343
2344	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2345		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2346		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2347		pmap_install(oldpmap);
2348		if (pte->pte_d)
2349			return 1;
2350	}
2351
2352	return 0;
2353}
2354
2355/*
2356 *	Clear the modify bits on the specified physical page.
2357 */
2358void
2359pmap_clear_modify(vm_page_t m)
2360{
2361	pv_entry_t pv;
2362
2363	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2364		return;
2365
2366	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2367		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2368		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2369		if (pte->pte_d) {
2370			pte->pte_d = 0;
2371			pmap_update_vhpt(pte, pv->pv_va);
2372			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2373		}
2374		pmap_install(oldpmap);
2375	}
2376}
2377
2378/*
2379 *	pmap_clear_reference:
2380 *
2381 *	Clear the reference bit on the specified physical page.
2382 */
2383void
2384pmap_clear_reference(vm_page_t m)
2385{
2386	pv_entry_t pv;
2387
2388	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
2389		return;
2390
2391	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2392		pmap_t oldpmap = pmap_install(pv->pv_pmap);
2393		struct ia64_lpte *pte = pmap_find_vhpt(pv->pv_va);
2394		if (pte->pte_a) {
2395			pte->pte_a = 0;
2396			pmap_update_vhpt(pte, pv->pv_va);
2397			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
2398		}
2399		pmap_install(oldpmap);
2400	}
2401}
2402
2403/*
2404 * Miscellaneous support routines follow
2405 */
2406
2407static void
2408ia64_protection_init()
2409{
2410	int prot, *kp, *up;
2411
2412	kp = protection_codes[0];
2413	up = protection_codes[1];
2414
2415	for (prot = 0; prot < 8; prot++) {
2416		switch (prot) {
2417		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
2418			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2419			*up++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2420			break;
2421
2422		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
2423			*kp++ = (PTE_AR_X_RX << 2) | PTE_PL_KERN;
2424			*up++ = (PTE_AR_X_RX << 2) | PTE_PL_USER;
2425			break;
2426
2427		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
2428			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2429			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2430			break;
2431
2432		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
2433			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2434			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2435			break;
2436
2437		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
2438			*kp++ = (PTE_AR_R << 2) | PTE_PL_KERN;
2439			*up++ = (PTE_AR_R << 2) | PTE_PL_USER;
2440			break;
2441
2442		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
2443			*kp++ = (PTE_AR_RX << 2) | PTE_PL_KERN;
2444			*up++ = (PTE_AR_RX << 2) | PTE_PL_USER;
2445			break;
2446
2447		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
2448			*kp++ = (PTE_AR_RW << 2) | PTE_PL_KERN;
2449			*up++ = (PTE_AR_RW << 2) | PTE_PL_USER;
2450			break;
2451
2452		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
2453			*kp++ = (PTE_AR_RWX << 2) | PTE_PL_KERN;
2454			*up++ = (PTE_AR_RWX << 2) | PTE_PL_USER;
2455			break;
2456		}
2457	}
2458}
2459
2460/*
2461 * Map a set of physical memory pages into the kernel virtual
2462 * address space. Return a pointer to where it is mapped. This
2463 * routine is intended to be used for mapping device memory,
2464 * NOT real memory.
2465 */
2466void *
2467pmap_mapdev(vm_offset_t pa, vm_size_t size)
2468{
2469	return (void*) IA64_PHYS_TO_RR6(pa);
2470}
2471
2472/*
2473 * 'Unmap' a range mapped by pmap_mapdev().
2474 */
2475void
2476pmap_unmapdev(vm_offset_t va, vm_size_t size)
2477{
2478	return;
2479}
2480
2481/*
2482 * perform the pmap work for mincore
2483 */
2484int
2485pmap_mincore(pmap_t pmap, vm_offset_t addr)
2486{
2487	pmap_t oldpmap;
2488	struct ia64_lpte *pte;
2489	int val = 0;
2490
2491	oldpmap = pmap_install(pmap);
2492	pte = pmap_find_vhpt(addr);
2493	pmap_install(oldpmap);
2494
2495	if (!pte)
2496		return 0;
2497
2498	if (pmap_pte_v(pte)) {
2499		vm_page_t m;
2500		vm_offset_t pa;
2501
2502		val = MINCORE_INCORE;
2503		if ((pte->pte_ig & PTE_IG_MANAGED) == 0)
2504			return val;
2505
2506		pa = pmap_pte_pa(pte);
2507
2508		m = PHYS_TO_VM_PAGE(pa);
2509
2510		/*
2511		 * Modified by us
2512		 */
2513		if (pte->pte_d)
2514			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
2515		/*
2516		 * Modified by someone
2517		 */
2518		else if (pmap_is_modified(m))
2519			val |= MINCORE_MODIFIED_OTHER;
2520		/*
2521		 * Referenced by us
2522		 */
2523		if (pte->pte_a)
2524			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
2525
2526		/*
2527		 * Referenced by someone
2528		 */
2529		else if (pmap_ts_referenced(m)) {
2530			val |= MINCORE_REFERENCED_OTHER;
2531			vm_page_flag_set(m, PG_REFERENCED);
2532		}
2533	}
2534	return val;
2535}
2536
2537void
2538pmap_activate(struct thread *td)
2539{
2540	pmap_install(vmspace_pmap(td->td_proc->p_vmspace));
2541}
2542
2543pmap_t
2544pmap_switch(pmap_t pm)
2545{
2546	pmap_t prevpm;
2547	int i;
2548
2549	mtx_assert(&sched_lock, MA_OWNED);
2550
2551	prevpm = PCPU_GET(current_pmap);
2552	if (prevpm == pm)
2553		return (prevpm);
2554	if (prevpm != NULL)
2555		atomic_clear_32(&prevpm->pm_active, PCPU_GET(cpumask));
2556	if (pm == NULL) {
2557		for (i = 0; i < 5; i++) {
2558			ia64_set_rr(IA64_RR_BASE(i),
2559			    (i << 8)|(PAGE_SHIFT << 2)|1);
2560		}
2561	} else {
2562		for (i = 0; i < 5; i++) {
2563			ia64_set_rr(IA64_RR_BASE(i),
2564			    (pm->pm_rid[i] << 8)|(PAGE_SHIFT << 2)|1);
2565		}
2566		atomic_set_32(&pm->pm_active, PCPU_GET(cpumask));
2567	}
2568	PCPU_SET(current_pmap, pm);
2569	__asm __volatile("srlz.d");
2570	return (prevpm);
2571}
2572
2573static pmap_t
2574pmap_install(pmap_t pm)
2575{
2576	pmap_t prevpm;
2577
2578	mtx_lock_spin(&sched_lock);
2579	prevpm = pmap_switch(pm);
2580	mtx_unlock_spin(&sched_lock);
2581	return (prevpm);
2582}
2583
2584vm_offset_t
2585pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
2586{
2587
2588	return addr;
2589}
2590
2591#include "opt_ddb.h"
2592
2593#ifdef DDB
2594
2595#include <ddb/ddb.h>
2596
2597static const char*	psnames[] = {
2598	"1B",	"2B",	"4B",	"8B",
2599	"16B",	"32B",	"64B",	"128B",
2600	"256B",	"512B",	"1K",	"2K",
2601	"4K",	"8K",	"16K",	"32K",
2602	"64K",	"128K",	"256K",	"512K",
2603	"1M",	"2M",	"4M",	"8M",
2604	"16M",	"32M",	"64M",	"128M",
2605	"256M",	"512M",	"1G",	"2G"
2606};
2607
2608static void
2609print_trs(int type)
2610{
2611	struct ia64_pal_result	res;
2612	int			i, maxtr;
2613	struct {
2614		struct ia64_pte	pte;
2615		struct ia64_itir itir;
2616		struct ia64_ifa ifa;
2617		struct ia64_rr	rr;
2618	}			buf;
2619	static const char*	manames[] = {
2620		"WB",	"bad",	"bad",	"bad",
2621		"UC",	"UCE",	"WC",	"NaT",
2622
2623	};
2624
2625	res = ia64_call_pal_static(PAL_VM_SUMMARY, 0, 0, 0);
2626	if (res.pal_status != 0) {
2627		db_printf("Can't get VM summary\n");
2628		return;
2629	}
2630
2631	if (type == 0)
2632		maxtr = (res.pal_result[0] >> 40) & 0xff;
2633	else
2634		maxtr = (res.pal_result[0] >> 32) & 0xff;
2635
2636	db_printf("V RID    Virtual Page  Physical Page PgSz ED AR PL D A MA  P KEY\n");
2637	for (i = 0; i <= maxtr; i++) {
2638		bzero(&buf, sizeof(buf));
2639		res = ia64_call_pal_stacked_physical
2640			(PAL_VM_TR_READ, i, type, ia64_tpa((u_int64_t) &buf));
2641		if (!(res.pal_result[0] & 1))
2642			buf.pte.pte_ar = 0;
2643		if (!(res.pal_result[0] & 2))
2644			buf.pte.pte_pl = 0;
2645		if (!(res.pal_result[0] & 4))
2646			buf.pte.pte_d = 0;
2647		if (!(res.pal_result[0] & 8))
2648			buf.pte.pte_ma = 0;
2649		db_printf(
2650			"%d %06x %013lx %013lx %4s %d  %d  %d  %d %d %-3s %d %06x\n",
2651			buf.ifa.ifa_ig & 1,
2652			buf.rr.rr_rid,
2653			buf.ifa.ifa_vpn,
2654			buf.pte.pte_ppn,
2655			psnames[buf.itir.itir_ps],
2656			buf.pte.pte_ed,
2657			buf.pte.pte_ar,
2658			buf.pte.pte_pl,
2659			buf.pte.pte_d,
2660			buf.pte.pte_a,
2661			manames[buf.pte.pte_ma],
2662			buf.pte.pte_p,
2663			buf.itir.itir_key);
2664	}
2665}
2666
2667DB_COMMAND(itr, db_itr)
2668{
2669	print_trs(0);
2670}
2671
2672DB_COMMAND(dtr, db_dtr)
2673{
2674	print_trs(1);
2675}
2676
2677DB_COMMAND(rr, db_rr)
2678{
2679	int i;
2680	u_int64_t t;
2681	struct ia64_rr rr;
2682
2683	printf("RR RID    PgSz VE\n");
2684	for (i = 0; i < 8; i++) {
2685		__asm __volatile ("mov %0=rr[%1]"
2686				  : "=r"(t)
2687				  : "r"(IA64_RR_BASE(i)));
2688		*(u_int64_t *) &rr = t;
2689		printf("%d  %06x %4s %d\n",
2690		       i, rr.rr_rid, psnames[rr.rr_ps], rr.rr_ve);
2691	}
2692}
2693
2694DB_COMMAND(thash, db_thash)
2695{
2696	if (!have_addr)
2697		return;
2698
2699	db_printf("%p\n", (void *) ia64_thash(addr));
2700}
2701
2702DB_COMMAND(ttag, db_ttag)
2703{
2704	if (!have_addr)
2705		return;
2706
2707	db_printf("0x%lx\n", ia64_ttag(addr));
2708}
2709
2710#endif
2711