pte.h revision 1.22
1/*	$NetBSD: pte.h,v 1.22 2010/04/06 20:43:57 jld Exp $	*/
2
3/*
4 * Copyright (c) 2001 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Frank van der Linden for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *      This product includes software developed for the NetBSD Project by
20 *      Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38/*
39 *
40 * Copyright (c) 1997 Charles D. Cranor and Washington University.
41 * All rights reserved.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 *    notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 *    notice, this list of conditions and the following disclaimer in the
50 *    documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 *    must display the following acknowledgment:
53 *      This product includes software developed by Charles D. Cranor and
54 *      Washington University.
55 * 4. The name of the author may not be used to endorse or promote products
56 *    derived from this software without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
59 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
60 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
61 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
62 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
63 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
64 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
65 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
66 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
67 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68 */
69
70/*
71 * pte.h rewritten by chuck based on the jolitz version, plus random
72 * info on the pentium and other processors found on the net.   the
73 * goal of this rewrite is to provide enough documentation on the MMU
74 * hardware that the reader will be able to understand it without having
75 * to refer to a hardware manual.
76 */
77
78#ifndef _I386_PTE_H_
79#define _I386_PTE_H_
80#ifdef _KERNEL_OPT
81#include "opt_xen.h"
82#endif
83
84/*
85 * i386 MMU hardware structure (without PAE extension):
86 *
87 * the i386 MMU is a two-level MMU which maps 4GB of virtual memory.
88 * the pagesize is 4K (4096 [0x1000] bytes), although newer pentium
89 * processors can support a 4MB pagesize as well.
90 *
91 * the first level table (segment table?) is called a "page directory"
92 * and it contains 1024 page directory entries (PDEs).   each PDE is
93 * 4 bytes (an int), so a PD fits in a single 4K page.   this page is
94 * the page directory page (PDP).  each PDE in a PDP maps 4MB of space
95 * (1024 * 4MB = 4GB).   a PDE contains the physical address of the
96 * second level table: the page table.   or, if 4MB pages are being used,
97 * then the PDE contains the PA of the 4MB page being mapped.
98 *
99 * a page table consists of 1024 page table entries (PTEs).  each PTE is
100 * 4 bytes (an int), so a page table also fits in a single 4K page.  a
101 * 4K page being used as a page table is called a page table page (PTP).
102 * each PTE in a PTP maps one 4K page (1024 * 4K = 4MB).   a PTE contains
103 * the physical address of the page it maps and some flag bits (described
104 * below).
105 *
106 * the processor has a special register, "cr3", which points to the
107 * the PDP which is currently controlling the mappings of the virtual
108 * address space.
109 *
110 * the following picture shows the translation process for a 4K page:
111 *
112 * %cr3 register [PA of PDP]
113 *      |
114 *      |
115 *      |   bits <31-22> of VA         bits <21-12> of VA   bits <11-0>
116 *      |   index the PDP (0 - 1023)   index the PTP        are the page offset
117 *      |         |                           |                  |
118 *      |         v                           |                  |
119 *      +--->+----------+                     |                  |
120 *           | PD Page  |   PA of             v                  |
121 *           |          |---PTP-------->+------------+           |
122 *           | 1024 PDE |               | page table |--PTE--+   |
123 *           | entries  |               | (aka PTP)  |       |   |
124 *           +----------+               | 1024 PTE   |       |   |
125 *                                      | entries    |       |   |
126 *                                      +------------+       |   |
127 *                                                           |   |
128 *                                                bits <31-12>   bits <11-0>
129 *                                                p h y s i c a l  a d d r
130 *
131 * the i386 caches PTEs in a TLB.   it is important to flush out old
132 * TLB mappings when making a change to a mappings.   writing to the
133 * %cr3 will flush the entire TLB.    newer processors also have an
134 * instruction that will invalidate the mapping of a single page (which
135 * is useful if you are changing a single mappings because it preserves
136 * all the cached TLB entries).
137 *
138 * as shows, bits 31-12 of the PTE contain PA of the page being mapped.
139 * the rest of the PTE is defined as follows:
140 *   bit#	name	use
141 *   11		n/a	available for OS use, hardware ignores it
142 *   10		n/a	available for OS use, hardware ignores it
143 *   9		n/a	available for OS use, hardware ignores it
144 *   8		G	global bit (see discussion below)
145 *   7		PS	page size [for PDEs] (0=4k, 1=4M <if supported>)
146 *   6		D	dirty (modified) page
147 *   5		A	accessed (referenced) page
148 *   4		PCD	cache disable
149 *   3		PWT	prevent write through (cache)
150 *   2		U/S	user/supervisor bit (0=supervisor only, 1=both u&s)
151 *   1		R/W	read/write bit (0=read only, 1=read-write)
152 *   0		P	present (valid)
153 *
154 * notes:
155 *  - PS is only supported on newer processors
156 *  - PTEs with the G bit are global in the sense that they are not
157 *    flushed from the TLB when %cr3 is written (to flush, use the
158 *    "flush single page" instruction).   this is only supported on
159 *    newer processors.    this bit can be used to keep the kernel's
160 *    TLB entries around while context switching.   since the kernel
161 *    is mapped into all processes at the same place it does not make
162 *    sense to flush these entries when switching from one process'
163 *    pmap to another.
164 *
165 * The PAE extension extends the size of the PTE to 64 bits (52bits physical
166 * address) and is compatible with the amd64 PTE format. The first level
167 * maps 2M, the second 1G, so a third level page table is introduced to
168 * map the 4GB virtual address space. This PD has only 4 entries.
169 * We can't use recursive mapping at level 3 to map the PD pages, as this
170 * would eat one GB of address space. In addition, Xen imposes restrictions
171 * on the entries we put in the L3 page (for example, the page pointed to by
172 * the last slot can't be shared among different L3 pages), which makes
173 * handling this L3 page in the same way we do for L2 on i386 (or L4 on amd64)
174 * difficult. For most things we'll just pretend to have only 2 levels,
175 * with the 2 high bits of the L2 index being in fact the index in the
176 * L3.
177 */
178
179#if !defined(_LOCORE)
180
181/*
182 * here we define the data types for PDEs and PTEs
183 */
184#ifdef PAE
185typedef uint64_t pd_entry_t;		/* PDE */
186typedef uint64_t pt_entry_t;		/* PTE */
187#else
188typedef uint32_t pd_entry_t;		/* PDE */
189typedef uint32_t pt_entry_t;		/* PTE */
190#endif
191
192#endif
193
194/*
195 * now we define various for playing with virtual addresses
196 */
197
198#ifdef PAE
199#define	L1_SHIFT	12
200#define	L2_SHIFT	21
201#define	L3_SHIFT	30
202#define	NBPD_L1		(1ULL << L1_SHIFT) /* # bytes mapped by L1 ent (4K) */
203#define	NBPD_L2		(1ULL << L2_SHIFT) /* # bytes mapped by L2 ent (2MB) */
204#define	NBPD_L3		(1ULL << L3_SHIFT) /* # bytes mapped by L3 ent (1GB) */
205
206#define	L3_MASK		0xc0000000
207#define	L2_REALMASK	0x3fe00000
208#define	L2_MASK		(L2_REALMASK | L3_MASK)
209#define	L1_MASK		0x001ff000
210
211#define	L3_FRAME	(L3_MASK)
212#define	L2_FRAME	(L3_FRAME | L2_MASK)
213#define	L1_FRAME	(L2_FRAME|L1_MASK)
214
215#define	PG_FRAME	0x000ffffffffff000ULL /* page frame mask */
216#define	PG_LGFRAME	0x000fffffffe00000ULL /* large (2MB) page frame mask */
217
218/* macros to get real L2 and L3 index, from our "extended" L2 index */
219#define l2tol3(idx)	((idx) >> (L3_SHIFT - L2_SHIFT))
220#define l2tol2(idx)	((idx) & (L2_REALMASK >>  L2_SHIFT))
221#else /* PAE */
222#define	L1_SHIFT	12
223#define	L2_SHIFT	22
224#define	NBPD_L1		(1UL << L1_SHIFT) /* # bytes mapped by L1 ent (4K) */
225#define	NBPD_L2		(1UL << L2_SHIFT) /* # bytes mapped by L2 ent (4MB) */
226
227#define L2_MASK		0xffc00000
228#define L1_MASK		0x003ff000
229
230#define L2_FRAME	(L2_MASK)
231#define L1_FRAME	(L2_FRAME|L1_MASK)
232
233#define	PG_FRAME	0xfffff000	/* page frame mask */
234#define	PG_LGFRAME	0xffc00000	/* large (4MB) page frame mask */
235
236#endif /* PAE */
237/*
238 * here we define the bits of the PDE/PTE, as described above:
239 *
240 * XXXCDC: need to rename these (PG_u == ugly).
241 */
242
243#define	PG_V		0x00000001	/* valid entry */
244#define	PG_RO		0x00000000	/* read-only page */
245#define	PG_RW		0x00000002	/* read-write page */
246#define	PG_u		0x00000004	/* user accessible page */
247#define	PG_PROT		0x00000806	/* all protection bits */
248#define	PG_N		0x00000018	/* non-cacheable */
249#define	PG_U		0x00000020	/* has been used */
250#define	PG_M		0x00000040	/* has been modified */
251#define PG_PS		0x00000080	/* 4MB page size */
252#define PG_G		0x00000100	/* global, don't TLB flush */
253#define PG_AVAIL1	0x00000200	/* ignored by hardware */
254#define PG_AVAIL2	0x00000400	/* ignored by hardware */
255#define PG_AVAIL3	0x00000800	/* ignored by hardware */
256
257/*
258 * various short-hand protection codes
259 */
260
261#define	PG_KR		0x00000000	/* kernel read-only */
262#define	PG_KW		0x00000002	/* kernel read-write */
263
264#ifdef PAE
265#define	PG_NX		0x8000000000000000 /* No-execute */
266#else
267#define	PG_NX		0		/* dummy */
268#endif
269
270/*
271 * page protection exception bits
272 */
273
274#define PGEX_P		0x01	/* protection violation (vs. no mapping) */
275#define PGEX_W		0x02	/* exception during a write cycle */
276#define PGEX_U		0x04	/* exception while in user mode (upl) */
277
278#endif /* _I386_PTE_H_ */
279