vmm_mem.c revision 221828
1/*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD$"); 31 32#include <sys/param.h> 33#include <sys/lock.h> 34#include <sys/mutex.h> 35#include <sys/linker.h> 36#include <sys/systm.h> 37#include <sys/malloc.h> 38#include <sys/kernel.h> 39 40#include <vm/vm.h> 41#include <vm/pmap.h> 42 43#include <machine/md_var.h> 44#include <machine/metadata.h> 45#include <machine/pc/bios.h> 46#include <machine/vmparam.h> 47#include <machine/pmap.h> 48 49#include "vmm_util.h" 50#include "vmm_mem.h" 51 52static MALLOC_DEFINE(M_VMM_MEM, "vmm memory", "vmm memory"); 53 54#define MB (1024 * 1024) 55#define GB (1024 * MB) 56 57#define VMM_MEM_MAXSEGS 64 58 59/* protected by vmm_mem_mtx */ 60static struct { 61 vm_paddr_t base; 62 vm_size_t length; 63} vmm_mem_avail[VMM_MEM_MAXSEGS]; 64 65static int vmm_mem_nsegs; 66 67static vm_paddr_t maxaddr; 68 69static struct mtx vmm_mem_mtx; 70 71/* 72 * Steal any memory that was deliberately hidden from FreeBSD either by 73 * the use of MAXMEM kernel config option or the hw.physmem loader tunable. 74 */ 75static int 76vmm_mem_steal_memory(void) 77{ 78 int nsegs; 79 caddr_t kmdp; 80 uint32_t smapsize; 81 uint64_t base, length; 82 struct bios_smap *smapbase, *smap, *smapend; 83 84 /* 85 * Borrowed from hammer_time() and getmemsize() in machdep.c 86 */ 87 kmdp = preload_search_by_type("elf kernel"); 88 if (kmdp == NULL) 89 kmdp = preload_search_by_type("elf64 kernel"); 90 91 smapbase = (struct bios_smap *)preload_search_info(kmdp, 92 MODINFO_METADATA | MODINFOMD_SMAP); 93 if (smapbase == NULL) 94 panic("No BIOS smap info from loader!"); 95 96 smapsize = *((uint32_t *)smapbase - 1); 97 smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); 98 99 nsegs = 0; 100 for (smap = smapbase; smap < smapend; smap++) { 101 /* 102 * XXX 103 * Assuming non-overlapping, monotonically increasing 104 * memory segments. 105 */ 106 if (smap->type != SMAP_TYPE_MEMORY) 107 continue; 108 if (smap->length == 0) 109 break; 110 111 base = roundup(smap->base, NBPDR); 112 length = rounddown(smap->length, NBPDR); 113 114 /* Skip this segment if FreeBSD is using all of it. */ 115 if (base + length <= ptoa(Maxmem)) 116 continue; 117 118 /* 119 * If FreeBSD is using part of this segment then adjust 120 * 'base' and 'length' accordingly. 121 */ 122 if (base < ptoa(Maxmem)) { 123 uint64_t used; 124 used = roundup(ptoa(Maxmem), NBPDR) - base; 125 base += used; 126 length -= used; 127 } 128 129 if (length == 0) 130 continue; 131 132 vmm_mem_avail[nsegs].base = base; 133 vmm_mem_avail[nsegs].length = length; 134 135 if (base + length > maxaddr) 136 maxaddr = base + length; 137 138 if (0 && bootverbose) { 139 printf("vmm_mem_populate: index %d, base 0x%0lx, " 140 "length %ld\n", 141 nsegs, vmm_mem_avail[nsegs].base, 142 vmm_mem_avail[nsegs].length); 143 } 144 145 nsegs++; 146 if (nsegs >= VMM_MEM_MAXSEGS) { 147 printf("vmm_mem_populate: maximum number of vmm memory " 148 "segments reached!\n"); 149 return (ENOSPC); 150 } 151 } 152 153 vmm_mem_nsegs = nsegs; 154 155 return (0); 156} 157 158static void 159vmm_mem_direct_map(vm_paddr_t start, vm_paddr_t end) 160{ 161 vm_paddr_t addr, remaining; 162 int pdpi, pdi, superpage_size; 163 pml4_entry_t *pml4p; 164 pdp_entry_t *pdp; 165 pd_entry_t *pd; 166 uint64_t page_attr_bits; 167 168 if (end >= NBPML4) 169 panic("Cannot map memory beyond %ldGB", NBPML4 / GB); 170 171 /* XXX FreeBSD 8.1 does not use 1G superpages in the direct map */ 172 if (0 && vmm_supports_1G_pages()) 173 superpage_size = NBPDP; 174 else 175 superpage_size = NBPDR; 176 177 /* 178 * Get the page directory pointer page that contains the direct 179 * map address mappings. 180 */ 181 pml4p = kernel_pmap->pm_pml4; 182 pdp = (pdp_entry_t *)PHYS_TO_DMAP(pml4p[DMPML4I] & ~PAGE_MASK); 183 184 page_attr_bits = PG_RW | PG_V | PG_PS | PG_G; 185 addr = start; 186 while (addr < end) { 187 remaining = end - addr; 188 pdpi = addr / NBPDP; 189 if (superpage_size == NBPDP && 190 remaining >= NBPDP && 191 addr % NBPDP == 0) { 192 /* 193 * If there isn't a mapping for this address then 194 * create one but if there is one already make sure 195 * it matches what we expect it to be. 196 */ 197 if (pdp[pdpi] == 0) { 198 pdp[pdpi] = addr | page_attr_bits; 199 if (0 && bootverbose) { 200 printf("vmm_mem_populate: mapping " 201 "0x%lx with 1GB page at " 202 "pdpi %d\n", addr, pdpi); 203 } 204 } else { 205 pdp_entry_t pdpe = pdp[pdpi]; 206 if ((pdpe & ~PAGE_MASK) != addr || 207 (pdpe & page_attr_bits) != page_attr_bits) { 208 panic("An invalid mapping 0x%016lx " 209 "already exists for 0x%016lx\n", 210 pdpe, addr); 211 } 212 } 213 addr += NBPDP; 214 } else { 215 if (remaining < NBPDR) { 216 panic("vmm_mem_populate: remaining (%ld) must " 217 "be greater than NBPDR (%d)\n", 218 remaining, NBPDR); 219 } 220 if (pdp[pdpi] == 0) { 221 /* 222 * XXX we lose this memory forever because 223 * we do not keep track of the virtual address 224 * that would be required to free this page. 225 */ 226 pd = malloc(PAGE_SIZE, M_VMM_MEM, 227 M_WAITOK | M_ZERO); 228 if ((uintptr_t)pd & PAGE_MASK) { 229 panic("vmm_mem_populate: page directory" 230 "page not aligned on %d " 231 "boundary\n", PAGE_SIZE); 232 } 233 pdp[pdpi] = vtophys(pd); 234 pdp[pdpi] |= PG_RW | PG_V | PG_U; 235 if (0 && bootverbose) { 236 printf("Creating page directory " 237 "at pdp index %d for 0x%016lx\n", 238 pdpi, addr); 239 } 240 } 241 pdi = (addr % NBPDP) / NBPDR; 242 pd = (pd_entry_t *)PHYS_TO_DMAP(pdp[pdpi] & ~PAGE_MASK); 243 244 /* 245 * Create a new mapping if one doesn't already exist 246 * or validate it if it does. 247 */ 248 if (pd[pdi] == 0) { 249 pd[pdi] = addr | page_attr_bits; 250 if (0 && bootverbose) { 251 printf("vmm_mem_populate: mapping " 252 "0x%lx with 2MB page at " 253 "pdpi %d, pdi %d\n", 254 addr, pdpi, pdi); 255 } 256 } else { 257 pd_entry_t pde = pd[pdi]; 258 if ((pde & ~PAGE_MASK) != addr || 259 (pde & page_attr_bits) != page_attr_bits) { 260 panic("An invalid mapping 0x%016lx " 261 "already exists for 0x%016lx\n", 262 pde, addr); 263 } 264 } 265 addr += NBPDR; 266 } 267 } 268} 269 270static int 271vmm_mem_populate(void) 272{ 273 int seg, error; 274 vm_paddr_t start, end; 275 276 /* populate the vmm_mem_avail[] array */ 277 error = vmm_mem_steal_memory(); 278 if (error) 279 return (error); 280 281 /* 282 * Now map the memory that was hidden from FreeBSD in 283 * the direct map VA space. 284 */ 285 for (seg = 0; seg < vmm_mem_nsegs; seg++) { 286 start = vmm_mem_avail[seg].base; 287 end = start + vmm_mem_avail[seg].length; 288 if ((start & PDRMASK) != 0 || (end & PDRMASK) != 0) { 289 panic("start (0x%016lx) and end (0x%016lx) must be " 290 "aligned on a %dMB boundary\n", 291 start, end, NBPDR / MB); 292 } 293 vmm_mem_direct_map(start, end); 294 } 295 296 return (0); 297} 298 299int 300vmm_mem_init(void) 301{ 302 int error; 303 304 mtx_init(&vmm_mem_mtx, "vmm_mem_mtx", NULL, MTX_DEF); 305 306 error = vmm_mem_populate(); 307 if (error) 308 return (error); 309 310 return (0); 311} 312 313vm_paddr_t 314vmm_mem_alloc(size_t size) 315{ 316 int i; 317 vm_paddr_t addr; 318 319 if ((size & PDRMASK) != 0) { 320 panic("vmm_mem_alloc: size 0x%0lx must be " 321 "aligned on a 0x%0x boundary\n", size, NBPDR); 322 } 323 324 addr = 0; 325 326 mtx_lock(&vmm_mem_mtx); 327 for (i = 0; i < vmm_mem_nsegs; i++) { 328 if (vmm_mem_avail[i].length >= size) { 329 addr = vmm_mem_avail[i].base; 330 vmm_mem_avail[i].base += size; 331 vmm_mem_avail[i].length -= size; 332 /* remove a zero length segment */ 333 if (vmm_mem_avail[i].length == 0) { 334 memmove(&vmm_mem_avail[i], 335 &vmm_mem_avail[i + 1], 336 (vmm_mem_nsegs - (i + 1)) * 337 sizeof(vmm_mem_avail[0])); 338 vmm_mem_nsegs--; 339 } 340 break; 341 } 342 } 343 mtx_unlock(&vmm_mem_mtx); 344 345 return (addr); 346} 347 348void 349vmm_mem_free(vm_paddr_t base, size_t length) 350{ 351 int i; 352 353 if ((base & PDRMASK) != 0 || (length & PDRMASK) != 0) { 354 panic("vmm_mem_free: base 0x%0lx and length 0x%0lx must be " 355 "aligned on a 0x%0x boundary\n", base, length, NBPDR); 356 } 357 358 mtx_lock(&vmm_mem_mtx); 359 360 for (i = 0; i < vmm_mem_nsegs; i++) { 361 if (vmm_mem_avail[i].base > base) 362 break; 363 } 364 365 if (vmm_mem_nsegs >= VMM_MEM_MAXSEGS) 366 panic("vmm_mem_free: cannot free any more segments"); 367 368 /* Create a new segment at index 'i' */ 369 memmove(&vmm_mem_avail[i + 1], &vmm_mem_avail[i], 370 (vmm_mem_nsegs - i) * sizeof(vmm_mem_avail[0])); 371 372 vmm_mem_avail[i].base = base; 373 vmm_mem_avail[i].length = length; 374 375 vmm_mem_nsegs++; 376 377coalesce_some_more: 378 for (i = 0; i < vmm_mem_nsegs - 1; i++) { 379 if (vmm_mem_avail[i].base + vmm_mem_avail[i].length == 380 vmm_mem_avail[i + 1].base) { 381 vmm_mem_avail[i].length += vmm_mem_avail[i + 1].length; 382 memmove(&vmm_mem_avail[i + 1], &vmm_mem_avail[i + 2], 383 (vmm_mem_nsegs - (i + 2)) * sizeof(vmm_mem_avail[0])); 384 vmm_mem_nsegs--; 385 goto coalesce_some_more; 386 } 387 } 388 389 mtx_unlock(&vmm_mem_mtx); 390} 391 392vm_paddr_t 393vmm_mem_maxaddr(void) 394{ 395 396 return (maxaddr); 397} 398 399void 400vmm_mem_dump(void) 401{ 402 int i; 403 vm_paddr_t base; 404 vm_size_t length; 405 406 mtx_lock(&vmm_mem_mtx); 407 for (i = 0; i < vmm_mem_nsegs; i++) { 408 base = vmm_mem_avail[i].base; 409 length = vmm_mem_avail[i].length; 410 printf("%-4d0x%016lx 0x%016lx\n", i, base, base + length); 411 } 412 mtx_unlock(&vmm_mem_mtx); 413} 414