1/*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58
59/*
60 *	File:	pmap.h
61 *
62 *	Authors:  Avadis Tevanian, Jr., Michael Wayne Young
63 *	Date:	1985
64 *
65 *	Machine-dependent structures for the physical map module.
66 */
67#ifdef KERNEL_PRIVATE
68#ifndef	_PMAP_MACHINE_
69#define _PMAP_MACHINE_	1
70
71#ifndef	ASSEMBLER
72
73#include <platforms.h>
74
75#include <mach/kern_return.h>
76#include <mach/machine/vm_types.h>
77#include <mach/vm_prot.h>
78#include <mach/vm_statistics.h>
79#include <mach/machine/vm_param.h>
80#include <kern/kern_types.h>
81#include <kern/thread.h>
82#include <kern/lock.h>
83#include <mach/branch_predicates.h>
84
85#include <i386/mp.h>
86#include <i386/proc_reg.h>
87
88#include <i386/pal_routines.h>
89
90/*
91 *	Define the generic in terms of the specific
92 */
93
94#define	INTEL_PGBYTES		I386_PGBYTES
95#define INTEL_PGSHIFT		I386_PGSHIFT
96#define	intel_btop(x)		i386_btop(x)
97#define	intel_ptob(x)		i386_ptob(x)
98#define	intel_round_page(x)	i386_round_page(x)
99#define	intel_trunc_page(x)	i386_trunc_page(x)
100#define trunc_intel_to_vm(x)	trunc_i386_to_vm(x)
101#define round_intel_to_vm(x)	round_i386_to_vm(x)
102#define vm_to_intel(x)		vm_to_i386(x)
103
104/*
105 *	i386/i486/i860 Page Table Entry
106 */
107
108#endif	/* ASSEMBLER */
109
110#define NPGPTD          4ULL
111#define PDESHIFT        21ULL
112#define PTEMASK         0x1ffULL
113#define PTEINDX         3ULL
114
115#define PTESHIFT        12ULL
116
117#ifdef __i386__
118#define INITPT_SEG_BASE  0x100000
119#endif
120
121#ifdef __x86_64__
122#define LOW_4GB_MASK	((vm_offset_t)0x00000000FFFFFFFFUL)
123#endif
124
125#define PDESIZE		sizeof(pd_entry_t) /* for assembly files */
126#define PTESIZE		sizeof(pt_entry_t) /* for assembly files */
127
128#define INTEL_OFFMASK	(I386_PGBYTES - 1)
129#define INTEL_LOFFMASK	(I386_LPGBYTES - 1)
130#define PG_FRAME        0x000FFFFFFFFFF000ULL
131#define NPTEPG          (PAGE_SIZE/(sizeof (pt_entry_t)))
132#define NPTDPG          (PAGE_SIZE/(sizeof (pd_entry_t)))
133
134#define NBPTD           (NPGPTD << PAGE_SHIFT)
135#define NPDEPTD         (NBPTD / (sizeof (pd_entry_t)))
136#define NPDEPG          (PAGE_SIZE/(sizeof (pd_entry_t)))
137#define NBPDE           (1ULL << PDESHIFT)
138#define PDEMASK         (NBPDE - 1)
139
140#define PTE_PER_PAGE	512 /* number of PTE's per page on any level */
141
142 /* cleanly define parameters for all the page table levels */
143typedef uint64_t        pml4_entry_t;
144#define NPML4PG         (PAGE_SIZE/(sizeof (pml4_entry_t)))
145#define PML4SHIFT       39
146#define PML4PGSHIFT     9
147#define NBPML4          (1ULL << PML4SHIFT)
148#define PML4MASK        (NBPML4-1)
149#define PML4_ENTRY_NULL ((pml4_entry_t *) 0)
150
151typedef uint64_t        pdpt_entry_t;
152#define NPDPTPG         (PAGE_SIZE/(sizeof (pdpt_entry_t)))
153#define PDPTSHIFT       30
154#define PDPTPGSHIFT     9
155#define NBPDPT          (1ULL << PDPTSHIFT)
156#define PDPTMASK        (NBPDPT-1)
157#define PDPT_ENTRY_NULL ((pdpt_entry_t *) 0)
158
159typedef uint64_t        pd_entry_t;
160#define NPDPG           (PAGE_SIZE/(sizeof (pd_entry_t)))
161#define PDSHIFT         21
162#define PDPGSHIFT       9
163#define NBPD            (1ULL << PDSHIFT)
164#define PDMASK          (NBPD-1)
165#define PD_ENTRY_NULL   ((pd_entry_t *) 0)
166
167typedef uint64_t        pt_entry_t;
168#define NPTPG           (PAGE_SIZE/(sizeof (pt_entry_t)))
169#define PTSHIFT         12
170#define PTPGSHIFT       9
171#define NBPT            (1ULL << PTSHIFT)
172#define PTMASK          (NBPT-1)
173#define PT_ENTRY_NULL	((pt_entry_t *) 0)
174
175typedef uint64_t  pmap_paddr_t;
176
177#if	DEBUG
178#define PMAP_ASSERT 1
179#endif
180#if PMAP_ASSERT
181#define	pmap_assert(ex) ((ex) ? (void)0 : Assert(__FILE__, __LINE__, # ex))
182
183#define pmap_assert2(ex, fmt, args...)					\
184	do {								\
185		if (!(ex)) {						\
186			kprintf("Assertion %s failed (%s:%d, caller %p) " fmt , #ex, __FILE__, __LINE__, __builtin_return_address(0),  ##args); \
187			panic("Assertion %s failed (%s:%d, caller %p) " fmt , #ex, __FILE__, __LINE__, __builtin_return_address(0),  ##args); 		\
188		}							\
189	} while(0)
190#else
191#define pmap_assert(ex)
192#define pmap_assert2(ex, fmt, args...)
193#endif
194
195/* superpages */
196#ifdef __x86_64__
197#define SUPERPAGE_NBASEPAGES 512
198#else
199#define SUPERPAGE_NBASEPAGES 1	/* we don't support superpages on i386 */
200#endif
201
202/*
203 * Atomic 64-bit store of a page table entry.
204 */
205static inline void
206pmap_store_pte(pt_entry_t *entryp, pt_entry_t value)
207{
208#ifdef __i386__
209	/*
210	 * Load the new value into %ecx:%ebx
211	 * Load the old value into %edx:%eax
212	 * Compare-exchange-8bytes at address entryp (loaded in %edi)
213	 * If the compare succeeds, the new value will have been stored.
214	 * Otherwise, the old value changed and reloaded, so try again.
215	 */
216	__asm__ volatile(
217		"	movl	(%0), %%eax	\n\t"
218		"	movl	4(%0), %%edx	\n\t"
219		"1:				\n\t"
220		"	cmpxchg8b (%0)		\n\t"
221		"	jnz 1b"
222		:
223		: "D" (entryp),
224		  "b" ((uint32_t)value),
225		  "c" ((uint32_t)(value >> 32))
226		: "eax", "edx", "memory");
227#else
228	/*
229	 * In the 32-bit kernel a compare-and-exchange loop was
230	 * required to provide atomicity. For K64, life is easier:
231	 */
232	*entryp = value;
233#endif
234}
235
236/* in 64 bit spaces, the number of each type of page in the page tables */
237#define NPML4PGS        (1ULL * (PAGE_SIZE/(sizeof (pml4_entry_t))))
238#define NPDPTPGS        (NPML4PGS * (PAGE_SIZE/(sizeof (pdpt_entry_t))))
239#define NPDEPGS         (NPDPTPGS * (PAGE_SIZE/(sizeof (pd_entry_t))))
240#define NPTEPGS         (NPDEPGS * (PAGE_SIZE/(sizeof (pt_entry_t))))
241
242#ifdef __i386__
243/*
244 * The 64-bit kernel is remapped in uber-space which is at the base
245 * the highest 4th-level directory (KERNEL_UBER_PML4_INDEX). That is,
246 * 512GB from the top of virtual space (or zero).
247 */
248#define KERNEL_UBER_PML4_INDEX	511
249#define KERNEL_UBER_BASE	(0ULL - NBPML4)
250#define KERNEL_UBER_BASE_HI32	((uint32_t)(KERNEL_UBER_BASE >> 32))
251#else
252#define KERNEL_PML4_INDEX		511
253#define KERNEL_KEXTS_INDEX	510	/* Home of KEXTs - the basement */
254#define KERNEL_PHYSMAP_PML4_INDEX	509	/* virtual to physical map */
255#define KERNEL_BASE		(0ULL - NBPML4)
256#define KERNEL_BASEMENT		(KERNEL_BASE - NBPML4)
257#endif
258
259#define	VM_WIMG_COPYBACK	VM_MEM_COHERENT
260#define	VM_WIMG_COPYBACKLW	VM_WIMG_COPYBACK
261#define	VM_WIMG_DEFAULT		VM_MEM_COHERENT
262/* ?? intel ?? */
263#define VM_WIMG_IO		(VM_MEM_COHERENT | 	\
264				VM_MEM_NOT_CACHEABLE | VM_MEM_GUARDED)
265#define VM_WIMG_WTHRU		(VM_MEM_WRITE_THROUGH | VM_MEM_COHERENT | VM_MEM_GUARDED)
266/* write combining mode, aka store gather */
267#define VM_WIMG_WCOMB		(VM_MEM_NOT_CACHEABLE | VM_MEM_COHERENT)
268#define	VM_WIMG_INNERWBACK	VM_MEM_COHERENT
269/*
270 * Pte related macros
271 */
272#ifdef __i386__
273#define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<<PDESHIFT)|((pti)<<PTESHIFT)))
274#define VADDR64(pmi, pdi, pti) ((vm_offset_t)(((pmi)<<PLM4SHIFT))((pdi)<<PDESHIFT)|((pti)<<PTESHIFT))
275#else
276#define KVADDR(pmi, pdpi, pdi, pti)		  \
277	 ((vm_offset_t)			  \
278		((uint64_t) -1    << 47)        | \
279		((uint64_t)(pmi)  << PML4SHIFT) | \
280		((uint64_t)(pdpi) << PDPTSHIFT) | \
281		((uint64_t)(pdi)  << PDESHIFT)  | \
282		((uint64_t)(pti)  << PTESHIFT))
283#endif
284
285/*
286 * Size of Kernel address space.  This is the number of page table pages
287 * (4MB each) to use for the kernel.  256 pages == 1 Gigabyte.
288 * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc).
289 */
290#ifndef KVA_PAGES
291#define KVA_PAGES	1024
292#endif
293
294#ifndef NKPT
295#define	NKPT		500	/* actual number of kernel page tables */
296#endif
297#ifndef NKPDE
298#define NKPDE	(KVA_PAGES - 1)	/* addressable number of page tables/pde's */
299#endif
300
301
302#ifdef __i386__
303enum high_cpu_types {
304  HIGH_CPU_ISS0,
305  HIGH_CPU_ISS1,
306  HIGH_CPU_DESC,
307  HIGH_CPU_LDT_BEGIN,
308  HIGH_CPU_LDT_END = HIGH_CPU_LDT_BEGIN + (LDTSZ / 512) - 1,
309  HIGH_CPU_END
310};
311
312enum  high_fixed_addresses {
313  HIGH_FIXED_TRAMPS,  /* must be first */
314  HIGH_FIXED_TRAMPS_END,
315  HIGH_FIXED_GDT,
316  HIGH_FIXED_IDT,
317  HIGH_FIXED_LDT_BEGIN,
318  HIGH_FIXED_LDT_END = HIGH_FIXED_LDT_BEGIN + (LDTSZ / 512) - 1,
319  HIGH_FIXED_KTSS,
320  HIGH_FIXED_DFTSS,
321  HIGH_FIXED_DBTSS,
322  HIGH_FIXED_CPUS_BEGIN,
323  HIGH_FIXED_CPUS_END = HIGH_FIXED_CPUS_BEGIN + (HIGH_CPU_END * MAX_CPUS) - 1,
324};
325
326
327/* XXX64  below PTDI values need cleanup */
328/*
329 * The *PTDI values control the layout of virtual memory
330 *
331 */
332#define        KPTDI           (0x000)/* start of kernel virtual pde's */
333#define        PTDPTDI         (0x7F4) /* ptd entry that points to ptd! */
334#define        APTDPTDI        (0x7F8) /* alt ptd entry that points to APTD */
335#define        UMAXPTDI        (0x7F8) /* ptd entry for user space end */
336#define	UMAXPTEOFF	(NPTEPG)	/* pte entry for user space end */
337
338#define KERNBASE       VADDR(KPTDI,0)
339
340/*
341 *	Convert address offset to directory address
342 *	containing the page table pointer - legacy
343 */
344/*#define pmap_pde(m,v) (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT]))*/
345
346#define HIGH_MEM_BASE  ((uint32_t)( -NBPDE) )  /* shared gdt etc seg addr */ /* XXX64 ?? */
347#define pmap_index_to_virt(x)  (HIGH_MEM_BASE | ((unsigned)(x) << PAGE_SHIFT))
348#endif
349
350/*
351 *	Convert address offset to page descriptor index
352 */
353#define pdptnum(pmap, a) (((vm_offset_t)(a) >> PDPTSHIFT) & PDPTMASK)
354#define pdenum(pmap, a)	(((vm_offset_t)(a) >> PDESHIFT) & PDEMASK)
355#define PMAP_INVALID_PDPTNUM (~0ULL)
356
357#define pdeidx(pmap, a)    (((a) >> PDSHIFT)   & ((1ULL<<(48 - PDSHIFT)) -1))
358#define pdptidx(pmap, a)   (((a) >> PDPTSHIFT) & ((1ULL<<(48 - PDPTSHIFT)) -1))
359#define pml4idx(pmap, a)   (((a) >> PML4SHIFT) & ((1ULL<<(48 - PML4SHIFT)) -1))
360
361
362/*
363 *	Convert page descriptor index to user virtual address
364 */
365#define pdetova(a)	((vm_offset_t)(a) << PDESHIFT)
366
367/*
368 *	Convert address offset to page table index
369 */
370#define ptenum(a)	(((vm_offset_t)(a) >> PTESHIFT) & PTEMASK)
371
372/*
373 *	Hardware pte bit definitions (to be used directly on the ptes
374 *	without using the bit fields).
375 */
376
377#define INTEL_PTE_VALID		0x00000001ULL
378#define INTEL_PTE_WRITE		0x00000002ULL
379#define INTEL_PTE_RW		0x00000002ULL
380#define INTEL_PTE_USER		0x00000004ULL
381#define INTEL_PTE_WTHRU		0x00000008ULL
382#define INTEL_PTE_NCACHE 	0x00000010ULL
383#define INTEL_PTE_REF		0x00000020ULL
384#define INTEL_PTE_MOD		0x00000040ULL
385#define INTEL_PTE_PS		0x00000080ULL
386#define INTEL_PTE_PTA		0x00000080ULL
387#define INTEL_PTE_GLOBAL	0x00000100ULL
388#define INTEL_PTE_WIRED		0x00000200ULL
389#define INTEL_PDPTE_NESTED	0x00000400ULL
390#define INTEL_PTE_PFN		PG_FRAME
391
392#define INTEL_PTE_NX		(1ULL << 63)
393
394#define INTEL_PTE_INVALID       0
395/* This is conservative, but suffices */
396#define INTEL_PTE_RSVD		((1ULL << 10) | (1ULL << 11) | (0x1FFULL << 54))
397
398#define	pa_to_pte(a)		((a) & INTEL_PTE_PFN) /* XXX */
399#define	pte_to_pa(p)		((p) & INTEL_PTE_PFN) /* XXX */
400#define	pte_increment_pa(p)	((p) += INTEL_OFFMASK+1)
401
402#define pte_kernel_rw(p)          ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_RW))
403#define pte_kernel_ro(p)          ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID))
404#define pte_user_rw(p)            ((pt_entry)t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER|INTEL_PTE_RW))
405#define pte_user_ro(p)            ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER))
406
407#define PMAP_DEFAULT_CACHE	0
408#define PMAP_INHIBIT_CACHE	1
409#define PMAP_GUARDED_CACHE	2
410#define PMAP_ACTIVATE_CACHE	4
411#define PMAP_NO_GUARD_CACHE	8
412
413
414#ifndef	ASSEMBLER
415
416#include <sys/queue.h>
417
418/*
419 * Address of current and alternate address space page table maps
420 * and directories.
421 */
422
423#ifdef __i386__
424extern pt_entry_t	PTmap[], APTmap[], Upte;
425extern pd_entry_t	PTD[], APTD[], PTDpde[], APTDpde[], Upde;
426extern pmap_paddr_t	lo_kernel_cr3;
427extern pdpt_entry_t	*IdlePDPT64;
428extern pdpt_entry_t	IdlePDPT[];
429extern pml4_entry_t	IdlePML4[];
430#else
431extern pt_entry_t	*PTmap;
432extern pdpt_entry_t	*IdlePDPT;
433extern pml4_entry_t	*IdlePML4;
434#endif
435extern boolean_t	no_shared_cr3;
436extern addr64_t		kernel64_cr3;
437extern pd_entry_t	*IdlePTD;	/* physical addr of "Idle" state PTD */
438
439extern uint64_t		pmap_pv_hashlist_walks;
440extern uint64_t		pmap_pv_hashlist_cnts;
441extern uint32_t		pmap_pv_hashlist_max;
442extern uint32_t		pmap_kernel_text_ps;
443
444#ifdef __i386__
445/*
446 * ** i386 **
447 * virtual address to page table entry and
448 * to physical address. Likewise for alternate address space.
449 * Note: these work recursively, thus vtopte of a pte will give
450 * the corresponding pde that in turn maps it.
451 */
452
453#define	vtopte(va)	(PTmap + i386_btop((vm_offset_t)va))
454#endif
455
456
457#ifdef __x86_64__
458#define ID_MAP_VTOP(x)	((void *)(((uint64_t)(x)) & LOW_4GB_MASK))
459
460extern	uint64_t physmap_base, physmap_max;
461
462#define NPHYSMAP (MAX(K64_MAXMEM/GB + 4, 4))
463
464static inline boolean_t physmap_enclosed(addr64_t a) {
465	return (a < (NPHYSMAP * GB));
466}
467
468static	inline void * PHYSMAP_PTOV_check(void *paddr) {
469	uint64_t pvaddr = (uint64_t)paddr + physmap_base;
470
471	if (__improbable(pvaddr >= physmap_max))
472		panic("PHYSMAP_PTOV bounds exceeded, 0x%qx, 0x%qx, 0x%qx",
473		      pvaddr, physmap_base, physmap_max);
474
475	return (void *)pvaddr;
476}
477
478#define PHYSMAP_PTOV(x)	(PHYSMAP_PTOV_check((void*) (x)))
479
480/*
481 * For KASLR, we alias the master processor's IDT and GDT at fixed
482 * virtual addresses to defeat SIDT/SGDT address leakage.
483 */
484#define MASTER_IDT_ALIAS	(VM_MIN_KERNEL_ADDRESS + 0x0000)
485#define MASTER_GDT_ALIAS	(VM_MIN_KERNEL_ADDRESS + 0x1000)
486
487/*
488 * The low global vector page is mapped at a fixed alias also.
489 */
490#define LOWGLOBAL_ALIAS		(VM_MIN_KERNEL_ADDRESS + 0x2000)
491
492#endif /*__x86_64__ */
493
494typedef	volatile long	cpu_set;	/* set of CPUs - must be <= 32 */
495					/* changed by other processors */
496#include <vm/vm_page.h>
497
498/*
499 *	For each vm_page_t, there is a list of all currently
500 *	valid virtual mappings of that page.  An entry is
501 *	a pv_entry_t; the list is the pv_table.
502 */
503
504struct pmap {
505	decl_simple_lock_data(,lock)	/* lock on map */
506	pmap_paddr_t    pm_cr3;         /* physical addr */
507	boolean_t       pm_shared;
508        pd_entry_t      *dirbase;        /* page directory pointer */
509#ifdef __i386__
510	pmap_paddr_t    pdirbase;        /* phys. address of dirbase */
511	vm_offset_t     pm_hold;        /* true pdpt zalloc addr */
512#endif
513        vm_object_t     pm_obj;         /* object to hold pde's */
514        task_map_t      pm_task_map;
515        pdpt_entry_t    *pm_pdpt;       /* KVA of 3rd level page */
516	pml4_entry_t    *pm_pml4;       /* VKA of top level */
517	vm_object_t     pm_obj_pdpt;    /* holds pdpt pages */
518	vm_object_t     pm_obj_pml4;    /* holds pml4 pages */
519#define	PMAP_PCID_MAX_CPUS	(48)	/* Must be a multiple of 8 */
520	pcid_t		pmap_pcid_cpus[PMAP_PCID_MAX_CPUS];
521	volatile uint8_t pmap_pcid_coherency_vector[PMAP_PCID_MAX_CPUS];
522	struct pmap_statistics	stats;	/* map statistics */
523	int		ref_count;	/* reference count */
524        int		nx_enabled;
525	ledger_t	ledger;		/* ledger tracking phys mappings */
526};
527
528
529#if NCOPY_WINDOWS > 0
530#define PMAP_PDPT_FIRST_WINDOW 0
531#define PMAP_PDPT_NWINDOWS 4
532#define PMAP_PDE_FIRST_WINDOW (PMAP_PDPT_NWINDOWS)
533#define PMAP_PDE_NWINDOWS 4
534#define PMAP_PTE_FIRST_WINDOW (PMAP_PDE_FIRST_WINDOW + PMAP_PDE_NWINDOWS)
535#define PMAP_PTE_NWINDOWS 4
536
537#define PMAP_NWINDOWS_FIRSTFREE (PMAP_PTE_FIRST_WINDOW + PMAP_PTE_NWINDOWS)
538#define PMAP_WINDOW_SIZE 8
539#define PMAP_NWINDOWS (PMAP_NWINDOWS_FIRSTFREE + PMAP_WINDOW_SIZE)
540
541typedef struct {
542	pt_entry_t	*prv_CMAP;
543	caddr_t		prv_CADDR;
544} mapwindow_t;
545
546typedef struct cpu_pmap {
547        int                     pdpt_window_index;
548        int                     pde_window_index;
549        int                     pte_window_index;
550	mapwindow_t		mapwindow[PMAP_NWINDOWS];
551} cpu_pmap_t;
552
553
554extern mapwindow_t *pmap_get_mapwindow(pt_entry_t pentry);
555extern void         pmap_put_mapwindow(mapwindow_t *map);
556#endif
557
558typedef struct pmap_memory_regions {
559	ppnum_t base;
560	ppnum_t end;
561	ppnum_t alloc;
562	uint32_t type;
563	uint64_t attribute;
564} pmap_memory_region_t;
565
566extern unsigned pmap_memory_region_count;
567extern unsigned pmap_memory_region_current;
568
569#define PMAP_MEMORY_REGIONS_SIZE 128
570
571extern pmap_memory_region_t pmap_memory_regions[];
572#include <i386/pmap_pcid.h>
573
574static inline void
575set_dirbase(pmap_t tpmap, __unused thread_t thread) {
576	int ccpu = cpu_number();
577	cpu_datap(ccpu)->cpu_task_cr3 = tpmap->pm_cr3;
578	cpu_datap(ccpu)->cpu_task_map = tpmap->pm_task_map;
579#ifndef __i386__
580	/*
581	 * Switch cr3 if necessary
582	 * - unless running with no_shared_cr3 debugging mode
583	 *   and we're not on the kernel's cr3 (after pre-empted copyio)
584	 */
585	if (__probable(!no_shared_cr3)) {
586		if (get_cr3_base() != tpmap->pm_cr3) {
587			if (pmap_pcid_ncpus) {
588				pmap_pcid_activate(tpmap, ccpu);
589			}
590			else
591				set_cr3_raw(tpmap->pm_cr3);
592		}
593	} else {
594		if (get_cr3_base() != cpu_datap(ccpu)->cpu_kernel_cr3)
595			set_cr3_raw(cpu_datap(ccpu)->cpu_kernel_cr3);
596	}
597#endif
598}
599
600/*
601 *	External declarations for PMAP_ACTIVATE.
602 */
603
604extern void		process_pmap_updates(void);
605extern void		pmap_update_interrupt(void);
606
607/*
608 *	Machine dependent routines that are used only for i386/i486/i860.
609 */
610
611extern addr64_t		(kvtophys)(
612				vm_offset_t	addr);
613
614extern kern_return_t	pmap_expand(
615				pmap_t		pmap,
616				vm_map_offset_t	addr,
617				unsigned int options);
618#if	!defined(__x86_64__)
619extern pt_entry_t	*pmap_pte(
620				struct pmap	*pmap,
621				vm_map_offset_t	addr);
622
623extern pd_entry_t	*pmap_pde(
624				struct pmap	*pmap,
625				vm_map_offset_t	addr);
626
627extern pd_entry_t	*pmap64_pde(
628				struct pmap	*pmap,
629				vm_map_offset_t	addr);
630
631extern pdpt_entry_t	*pmap64_pdpt(
632				struct pmap	*pmap,
633				vm_map_offset_t	addr);
634#endif
635extern vm_offset_t	pmap_map(
636				vm_offset_t	virt,
637				vm_map_offset_t	start,
638				vm_map_offset_t	end,
639				vm_prot_t	prot,
640				unsigned int	flags);
641
642extern vm_offset_t	pmap_map_bd(
643				vm_offset_t	virt,
644				vm_map_offset_t	start,
645				vm_map_offset_t	end,
646				vm_prot_t	prot,
647				unsigned int	flags);
648
649extern void		pmap_bootstrap(
650				vm_offset_t	load_start,
651				boolean_t	IA32e);
652
653extern boolean_t	pmap_valid_page(
654				ppnum_t	pn);
655
656extern int		pmap_list_resident_pages(
657				struct pmap	*pmap,
658				vm_offset_t	*listp,
659				int		space);
660extern void		x86_filter_TLB_coherency_interrupts(boolean_t);
661#ifdef __i386__
662extern void             pmap_commpage32_init(
663					   vm_offset_t kernel,
664					   vm_offset_t user,
665					   int count);
666extern void             pmap_commpage64_init(
667					   vm_offset_t	kernel,
668					   vm_map_offset_t user,
669					   int count);
670
671#endif
672/*
673 * Get cache attributes (as pagetable bits) for the specified phys page
674 */
675extern	unsigned	pmap_get_cache_attributes(ppnum_t);
676#if NCOPY_WINDOWS > 0
677extern struct cpu_pmap	*pmap_cpu_alloc(
678				boolean_t	is_boot_cpu);
679extern void		pmap_cpu_free(
680				struct cpu_pmap	*cp);
681#endif
682
683extern void		pmap_map_block(
684				pmap_t pmap,
685				addr64_t va,
686				ppnum_t pa,
687				uint32_t size,
688				vm_prot_t prot,
689				int attr,
690				unsigned int flags);
691
692extern void invalidate_icache(vm_offset_t addr, unsigned cnt, int phys);
693extern void flush_dcache(vm_offset_t addr, unsigned count, int phys);
694extern ppnum_t          pmap_find_phys(pmap_t map, addr64_t va);
695
696extern void pmap_cpu_init(void);
697extern void pmap_disable_NX(pmap_t pmap);
698#ifdef __i386__
699extern void pmap_set_4GB_pagezero(pmap_t pmap);
700extern void pmap_clear_4GB_pagezero(pmap_t pmap);
701extern void pmap_load_kernel_cr3(void);
702extern vm_offset_t pmap_cpu_high_map_vaddr(int, enum high_cpu_types);
703extern vm_offset_t pmap_high_map_vaddr(enum high_cpu_types);
704extern vm_offset_t pmap_high_map(pt_entry_t, enum high_cpu_types);
705extern vm_offset_t pmap_cpu_high_shared_remap(int, enum high_cpu_types, vm_offset_t, int);
706extern vm_offset_t pmap_high_shared_remap(enum high_fixed_addresses, vm_offset_t, int);
707#endif
708
709extern void pt_fake_zone_init(int);
710extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *,
711			      uint64_t *, int *, int *, int *);
712extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__printflike(1,2));
713
714/*
715 *	Macros for speed.
716 */
717
718
719#include <kern/spl.h>
720
721
722#define PMAP_ACTIVATE_MAP(map, thread)	{				\
723	register pmap_t		tpmap;					\
724                                                                        \
725        tpmap = vm_map_pmap(map);					\
726        set_dirbase(tpmap, thread);					\
727}
728
729#ifdef __i386__
730#define PMAP_DEACTIVATE_MAP(map, thread)				\
731	if (vm_map_pmap(map)->pm_task_map == TASK_MAP_64BIT_SHARED)	\
732		pmap_load_kernel_cr3();
733#elif defined(__x86_64__)
734#define PMAP_DEACTIVATE_MAP(map, thread)				\
735	pmap_assert(pmap_pcid_ncpus ? (pcid_for_pmap_cpu_tuple(map->pmap, cpu_number()) == (get_cr3_raw() & 0xFFF)) : TRUE);
736#else
737#define PMAP_DEACTIVATE_MAP(map, thread)
738#endif
739
740#if   defined(__i386__)
741
742#define	PMAP_SWITCH_CONTEXT(old_th, new_th, my_cpu) {			\
743	spl_t		spl;						\
744	pt_entry_t	*kpdp;						\
745	pt_entry_t	*updp;						\
746        int		i;						\
747        int		need_flush;					\
748                                                                        \
749        need_flush = 0;							\
750        spl = splhigh();						\
751	if ((old_th->map != new_th->map) || (new_th->task != old_th->task)) {	\
752		PMAP_DEACTIVATE_MAP(old_th->map, old_th);		\
753		PMAP_ACTIVATE_MAP(new_th->map, new_th);			\
754	}								\
755        kpdp = current_cpu_datap()->cpu_copywindow_pdp;			\
756        for (i = 0; i < NCOPY_WINDOWS; i++) {				\
757                if (new_th->machine.copy_window[i].user_base != (user_addr_t)-1) {	\
758	                updp = pmap_pde(new_th->map->pmap,		\
759                              new_th->machine.copy_window[i].user_base);\
760                        pmap_store_pte(kpdp, updp ? *updp : 0);		\
761                }							\
762                kpdp++;							\
763        }								\
764	splx(spl);							\
765        if (new_th->machine.copyio_state == WINDOWS_OPENED)		\
766                need_flush = 1;						\
767        else								\
768                new_th->machine.copyio_state = WINDOWS_DIRTY;		\
769        if (new_th->machine.physwindow_pte) {				\
770	  pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep),	\
771			       new_th->machine.physwindow_pte);	        \
772                if (need_flush == 0)					\
773                        invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base);\
774        }								\
775        if (need_flush)							\
776                flush_tlb();						\
777}
778
779#else /* __x86_64__ */
780#define	PMAP_SWITCH_CONTEXT(old_th, new_th, my_cpu) {			\
781                                                                        \
782	pmap_assert(ml_get_interrupts_enabled() == FALSE);		\
783	if (old_th->map != new_th->map) {				\
784		PMAP_DEACTIVATE_MAP(old_th->map, old_th);		\
785		PMAP_ACTIVATE_MAP(new_th->map, new_th);			\
786	}								\
787}
788#endif /* __i386__ */
789
790#if NCOPY_WINDOWS > 0
791#define	PMAP_SWITCH_USER(th, new_map, my_cpu) {				\
792	spl_t		spl;						\
793									\
794	spl = splhigh();						\
795	PMAP_DEACTIVATE_MAP(th->map, th);				\
796	th->map = new_map;						\
797	PMAP_ACTIVATE_MAP(th->map, th);					\
798	splx(spl);							\
799	inval_copy_windows(th);						\
800}
801#else
802#define	PMAP_SWITCH_USER(th, new_map, my_cpu) {				\
803	spl_t		spl;						\
804									\
805	spl = splhigh();						\
806	PMAP_DEACTIVATE_MAP(th->map, th);				\
807	th->map = new_map;						\
808	PMAP_ACTIVATE_MAP(th->map, th);					\
809	splx(spl);							\
810}
811#endif
812
813/*
814 * Marking the current cpu's cr3 inactive is achieved by setting its lsb.
815 * Marking the current cpu's cr3 active once more involves clearng this bit.
816 * Note that valid page tables are page-aligned and so the bottom 12 bits
817 * are normally zero, modulo PCID.
818 * We can only mark the current cpu active/inactive but we can test any cpu.
819 */
820#define CPU_CR3_MARK_INACTIVE()						\
821	current_cpu_datap()->cpu_active_cr3 |= 1
822
823#define CPU_CR3_MARK_ACTIVE()	 					\
824	current_cpu_datap()->cpu_active_cr3 &= ~1
825
826#define CPU_CR3_IS_ACTIVE(cpu)						\
827	((cpu_datap(cpu)->cpu_active_cr3 & 1) == 0)
828
829#define CPU_GET_ACTIVE_CR3(cpu)						\
830	(cpu_datap(cpu)->cpu_active_cr3 & ~1)
831
832#define CPU_GET_TASK_CR3(cpu)						\
833	(cpu_datap(cpu)->cpu_task_cr3)
834
835/*
836 *	Mark this cpu idle, and remove it from the active set,
837 *	since it is not actively using any pmap.  Signal_cpus
838 *	will notice that it is idle, and avoid signaling it,
839 *	but will queue the update request for when the cpu
840 *	becomes active.
841 */
842#if   defined(__x86_64__)
843#define MARK_CPU_IDLE(my_cpu)	{					\
844	assert(ml_get_interrupts_enabled() == FALSE);			\
845	CPU_CR3_MARK_INACTIVE();					\
846	__asm__ volatile("mfence");					\
847}
848#else /* __i386__ native */
849#define MARK_CPU_IDLE(my_cpu)	{					\
850	assert(ml_get_interrupts_enabled() == FALSE);			\
851	/*								\
852	 *	Mark this cpu idle, and remove it from the active set,	\
853	 *	since it is not actively using any pmap.  Signal_cpus	\
854	 *	will notice that it is idle, and avoid signaling it,	\
855	 *	but will queue the update request for when the cpu	\
856	 *	becomes active.						\
857	 */								\
858	if (!cpu_mode_is64bit() || no_shared_cr3)			\
859		process_pmap_updates();					\
860	else								\
861		pmap_load_kernel_cr3();					\
862	CPU_CR3_MARK_INACTIVE();					\
863	__asm__ volatile("mfence");					\
864}
865#endif /* __i386__ */
866
867#define MARK_CPU_ACTIVE(my_cpu) {					\
868	assert(ml_get_interrupts_enabled() == FALSE);			\
869	/*								\
870	 *	If a kernel_pmap update was requested while this cpu	\
871	 *	was idle, process it as if we got the interrupt.	\
872	 *	Before doing so, remove this cpu from the idle set.	\
873	 *	Since we do not grab any pmap locks while we flush	\
874	 *	our TLB, another cpu may start an update operation	\
875	 *	before we finish.  Removing this cpu from the idle	\
876	 *	set assures that we will receive another update		\
877	 *	interrupt if this happens.				\
878	 */								\
879	CPU_CR3_MARK_ACTIVE();						\
880	__asm__ volatile("mfence");					\
881									\
882	if (current_cpu_datap()->cpu_tlb_invalid)			\
883	    process_pmap_updates();					\
884}
885
886#define PMAP_CONTEXT(pmap, thread)
887
888#define pmap_kernel_va(VA)	\
889	((((vm_offset_t) (VA)) >= vm_min_kernel_address) &&	\
890	 (((vm_offset_t) (VA)) <= vm_max_kernel_address))
891
892
893#define pmap_resident_count(pmap)	((pmap)->stats.resident_count)
894#define pmap_resident_max(pmap)		((pmap)->stats.resident_max)
895#define	pmap_copy(dst_pmap,src_pmap,dst_addr,len,src_addr)
896#define	pmap_attribute(pmap,addr,size,attr,value) \
897					(KERN_INVALID_ADDRESS)
898#define	pmap_attribute_cache_sync(addr,size,attr,value) \
899					(KERN_INVALID_ADDRESS)
900
901#define MACHINE_PMAP_IS_EMPTY	1
902extern boolean_t pmap_is_empty(pmap_t		pmap,
903			       vm_map_offset_t	start,
904			       vm_map_offset_t	end);
905
906#define MACHINE_BOOTSTRAPPTD	1	/* Static bootstrap page-tables */
907
908kern_return_t
909pmap_permissions_verify(pmap_t, vm_map_t, vm_offset_t, vm_offset_t);
910
911#endif	/* ASSEMBLER */
912
913
914#endif	/* _PMAP_MACHINE_ */
915
916
917#endif  /* KERNEL_PRIVATE */
918