ept.c revision 241147
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD$"); 31 32#include <sys/types.h> 33#include <sys/errno.h> 34#include <sys/systm.h> 35#include <sys/malloc.h> 36#include <sys/smp.h> 37 38#include <vm/vm.h> 39#include <vm/pmap.h> 40 41#include <machine/param.h> 42#include <machine/cpufunc.h> 43#include <machine/pmap.h> 44#include <machine/vmparam.h> 45 46#include <machine/vmm.h> 47#include "vmx_cpufunc.h" 48#include "vmx_msr.h" 49#include "vmx.h" 50#include "ept.h" 51 52#define EPT_PWL4(cap) ((cap) & (1UL << 6)) 53#define EPT_MEMORY_TYPE_WB(cap) ((cap) & (1UL << 14)) 54#define EPT_PDE_SUPERPAGE(cap) ((cap) & (1UL << 16)) /* 2MB pages */ 55#define EPT_PDPTE_SUPERPAGE(cap) ((cap) & (1UL << 17)) /* 1GB pages */ 56#define INVVPID_SUPPORTED(cap) ((cap) & (1UL << 32)) 57#define INVEPT_SUPPORTED(cap) ((cap) & (1UL << 20)) 58 59#define INVVPID_ALL_TYPES_MASK 0xF0000000000UL 60#define INVVPID_ALL_TYPES_SUPPORTED(cap) \ 61 (((cap) & INVVPID_ALL_TYPES_MASK) == INVVPID_ALL_TYPES_MASK) 62 63#define INVEPT_ALL_TYPES_MASK 0x6000000UL 64#define INVEPT_ALL_TYPES_SUPPORTED(cap) \ 65 (((cap) & INVEPT_ALL_TYPES_MASK) == INVEPT_ALL_TYPES_MASK) 66 67#define EPT_PG_RD (1 << 0) 68#define EPT_PG_WR (1 << 1) 69#define EPT_PG_EX (1 << 2) 70#define EPT_PG_MEMORY_TYPE(x) ((x) << 3) 71#define EPT_PG_IGNORE_PAT (1 << 6) 72#define EPT_PG_SUPERPAGE (1 << 7) 73 74#define EPT_ADDR_MASK ((uint64_t)-1 << 12) 75 76MALLOC_DECLARE(M_VMX); 77 78static uint64_t page_sizes_mask; 79 80int 81ept_init(void) 82{ 83 int page_shift; 84 uint64_t cap; 85 86 cap = rdmsr(MSR_VMX_EPT_VPID_CAP); 87 88 /* 89 * Verify that: 90 * - page walk length is 4 steps 91 * - extended page tables can be laid out in write-back memory 92 * - invvpid instruction with all possible types is supported 93 * - invept instruction with all possible types is supported 94 */ 95 if (!EPT_PWL4(cap) || 96 !EPT_MEMORY_TYPE_WB(cap) || 97 !INVVPID_SUPPORTED(cap) || 98 !INVVPID_ALL_TYPES_SUPPORTED(cap) || 99 !INVEPT_SUPPORTED(cap) || 100 !INVEPT_ALL_TYPES_SUPPORTED(cap)) 101 return (EINVAL); 102 103 /* Set bits in 'page_sizes_mask' for each valid page size */ 104 page_shift = PAGE_SHIFT; 105 page_sizes_mask = 1UL << page_shift; /* 4KB page */ 106 107 page_shift += 9; 108 if (EPT_PDE_SUPERPAGE(cap)) 109 page_sizes_mask |= 1UL << page_shift; /* 2MB superpage */ 110 111 page_shift += 9; 112 if (EPT_PDPTE_SUPERPAGE(cap)) 113 page_sizes_mask |= 1UL << page_shift; /* 1GB superpage */ 114 115 return (0); 116} 117 118#if 0 119static void 120ept_dump(uint64_t *ptp, int nlevels) 121{ 122 int i, t, tabs; 123 uint64_t *ptpnext, ptpval; 124 125 if (--nlevels < 0) 126 return; 127 128 tabs = 3 - nlevels; 129 for (t = 0; t < tabs; t++) 130 printf("\t"); 131 printf("PTP = %p\n", ptp); 132 133 for (i = 0; i < 512; i++) { 134 ptpval = ptp[i]; 135 136 if (ptpval == 0) 137 continue; 138 139 for (t = 0; t < tabs; t++) 140 printf("\t"); 141 printf("%3d 0x%016lx\n", i, ptpval); 142 143 if (nlevels != 0 && (ptpval & EPT_PG_SUPERPAGE) == 0) { 144 ptpnext = (uint64_t *) 145 PHYS_TO_DMAP(ptpval & EPT_ADDR_MASK); 146 ept_dump(ptpnext, nlevels); 147 } 148 } 149} 150#endif 151 152static size_t 153ept_create_mapping(uint64_t *ptp, vm_paddr_t gpa, vm_paddr_t hpa, size_t length, 154 vm_memattr_t attr, vm_prot_t prot, boolean_t spok) 155{ 156 int spshift, ptpshift, ptpindex, nlevels; 157 158 /* 159 * Compute the size of the mapping that we can accomodate. 160 * 161 * This is based on three factors: 162 * - super page sizes supported by the processor 163 * - alignment of the region starting at 'gpa' and 'hpa' 164 * - length of the region 'len' 165 */ 166 spshift = PAGE_SHIFT; 167 if (spok) 168 spshift += (EPT_PWLEVELS - 1) * 9; 169 while (spshift >= PAGE_SHIFT) { 170 uint64_t spsize = 1UL << spshift; 171 if ((page_sizes_mask & spsize) != 0 && 172 (gpa & (spsize - 1)) == 0 && 173 (hpa & (spsize - 1)) == 0 && 174 length >= spsize) { 175 break; 176 } 177 spshift -= 9; 178 } 179 180 if (spshift < PAGE_SHIFT) { 181 panic("Invalid spshift for gpa 0x%016lx, hpa 0x%016lx, " 182 "length 0x%016lx, page_sizes_mask 0x%016lx", 183 gpa, hpa, length, page_sizes_mask); 184 } 185 186 nlevels = EPT_PWLEVELS; 187 while (--nlevels >= 0) { 188 ptpshift = PAGE_SHIFT + nlevels * 9; 189 ptpindex = (gpa >> ptpshift) & 0x1FF; 190 191 /* We have reached the leaf mapping */ 192 if (spshift >= ptpshift) 193 break; 194 195 /* 196 * We are working on a non-leaf page table page. 197 * 198 * Create the next level page table page if necessary and point 199 * to it from the current page table. 200 */ 201 if (ptp[ptpindex] == 0) { 202 void *nlp = malloc(PAGE_SIZE, M_VMX, M_WAITOK | M_ZERO); 203 ptp[ptpindex] = vtophys(nlp); 204 ptp[ptpindex] |= EPT_PG_RD | EPT_PG_WR | EPT_PG_EX; 205 } 206 207 /* Work our way down to the next level page table page */ 208 ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & EPT_ADDR_MASK); 209 } 210 211 if ((gpa & ((1UL << ptpshift) - 1)) != 0) { 212 panic("ept_create_mapping: gpa 0x%016lx and ptpshift %d " 213 "mismatch\n", gpa, ptpshift); 214 } 215 216 if (prot != VM_PROT_NONE) { 217 /* Do the mapping */ 218 ptp[ptpindex] = hpa; 219 220 /* Apply the access controls */ 221 if (prot & VM_PROT_READ) 222 ptp[ptpindex] |= EPT_PG_RD; 223 if (prot & VM_PROT_WRITE) 224 ptp[ptpindex] |= EPT_PG_WR; 225 if (prot & VM_PROT_EXECUTE) 226 ptp[ptpindex] |= EPT_PG_EX; 227 228 /* 229 * XXX should we enforce this memory type by setting the 230 * ignore PAT bit to 1. 231 */ 232 ptp[ptpindex] |= EPT_PG_MEMORY_TYPE(attr); 233 234 if (nlevels > 0) 235 ptp[ptpindex] |= EPT_PG_SUPERPAGE; 236 } else { 237 /* Remove the mapping */ 238 ptp[ptpindex] = 0; 239 } 240 241 return (1UL << ptpshift); 242} 243 244static vm_paddr_t 245ept_lookup_mapping(uint64_t *ptp, vm_paddr_t gpa) 246{ 247 int nlevels, ptpshift, ptpindex; 248 uint64_t ptpval, hpabase, pgmask; 249 250 nlevels = EPT_PWLEVELS; 251 while (--nlevels >= 0) { 252 ptpshift = PAGE_SHIFT + nlevels * 9; 253 ptpindex = (gpa >> ptpshift) & 0x1FF; 254 255 ptpval = ptp[ptpindex]; 256 257 /* Cannot make progress beyond this point */ 258 if ((ptpval & (EPT_PG_RD | EPT_PG_WR | EPT_PG_EX)) == 0) 259 break; 260 261 if (nlevels == 0 || (ptpval & EPT_PG_SUPERPAGE)) { 262 pgmask = (1UL << ptpshift) - 1; 263 hpabase = ptpval & ~pgmask; 264 return (hpabase | (gpa & pgmask)); 265 } 266 267 /* Work our way down to the next level page table page */ 268 ptp = (uint64_t *)PHYS_TO_DMAP(ptpval & EPT_ADDR_MASK); 269 } 270 271 return ((vm_paddr_t)-1); 272} 273 274static void 275ept_free_pt_entry(pt_entry_t pte) 276{ 277 if (pte == 0) 278 return; 279 280 /* sanity check */ 281 if ((pte & EPT_PG_SUPERPAGE) != 0) 282 panic("ept_free_pt_entry: pte cannot have superpage bit"); 283 284 return; 285} 286 287static void 288ept_free_pd_entry(pd_entry_t pde) 289{ 290 pt_entry_t *pt; 291 int i; 292 293 if (pde == 0) 294 return; 295 296 if ((pde & EPT_PG_SUPERPAGE) == 0) { 297 pt = (pt_entry_t *)PHYS_TO_DMAP(pde & EPT_ADDR_MASK); 298 for (i = 0; i < NPTEPG; i++) 299 ept_free_pt_entry(pt[i]); 300 free(pt, M_VMX); /* free the page table page */ 301 } 302} 303 304static void 305ept_free_pdp_entry(pdp_entry_t pdpe) 306{ 307 pd_entry_t *pd; 308 int i; 309 310 if (pdpe == 0) 311 return; 312 313 if ((pdpe & EPT_PG_SUPERPAGE) == 0) { 314 pd = (pd_entry_t *)PHYS_TO_DMAP(pdpe & EPT_ADDR_MASK); 315 for (i = 0; i < NPDEPG; i++) 316 ept_free_pd_entry(pd[i]); 317 free(pd, M_VMX); /* free the page directory page */ 318 } 319} 320 321static void 322ept_free_pml4_entry(pml4_entry_t pml4e) 323{ 324 pdp_entry_t *pdp; 325 int i; 326 327 if (pml4e == 0) 328 return; 329 330 if ((pml4e & EPT_PG_SUPERPAGE) == 0) { 331 pdp = (pdp_entry_t *)PHYS_TO_DMAP(pml4e & EPT_ADDR_MASK); 332 for (i = 0; i < NPDPEPG; i++) 333 ept_free_pdp_entry(pdp[i]); 334 free(pdp, M_VMX); /* free the page directory ptr page */ 335 } 336} 337 338void 339ept_vmcleanup(struct vmx *vmx) 340{ 341 int i; 342 343 for (i = 0; i < NPML4EPG; i++) 344 ept_free_pml4_entry(vmx->pml4ept[i]); 345} 346 347int 348ept_vmmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t len, 349 vm_memattr_t attr, int prot, boolean_t spok) 350{ 351 size_t n; 352 struct vmx *vmx = arg; 353 354 while (len > 0) { 355 n = ept_create_mapping(vmx->pml4ept, gpa, hpa, len, attr, 356 prot, spok); 357 len -= n; 358 gpa += n; 359 hpa += n; 360 } 361 362 return (0); 363} 364 365vm_paddr_t 366ept_vmmmap_get(void *arg, vm_paddr_t gpa) 367{ 368 vm_paddr_t hpa; 369 struct vmx *vmx; 370 371 vmx = arg; 372 hpa = ept_lookup_mapping(vmx->pml4ept, gpa); 373 return (hpa); 374} 375 376static void 377invept_single_context(void *arg) 378{ 379 struct invept_desc desc = *(struct invept_desc *)arg; 380 381 invept(INVEPT_TYPE_SINGLE_CONTEXT, desc); 382} 383 384void 385ept_invalidate_mappings(u_long pml4ept) 386{ 387 struct invept_desc invept_desc = { 0 }; 388 389 invept_desc.eptp = EPTP(pml4ept); 390 391 smp_rendezvous(NULL, invept_single_context, NULL, &invept_desc); 392} 393