vmm_mem.c revision 239700
133975Sjdp/*- 234495Sjdp * Copyright (c) 2011 NetApp, Inc. 333975Sjdp * All rights reserved. 433975Sjdp * 533975Sjdp * Redistribution and use in source and binary forms, with or without 633975Sjdp * modification, are permitted provided that the following conditions 733975Sjdp * are met: 833975Sjdp * 1. Redistributions of source code must retain the above copyright 933975Sjdp * notice, this list of conditions and the following disclaimer. 1033975Sjdp * 2. Redistributions in binary form must reproduce the above copyright 1133975Sjdp * notice, this list of conditions and the following disclaimer in the 1233975Sjdp * documentation and/or other materials provided with the distribution. 1333975Sjdp * 1433975Sjdp * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 1533975Sjdp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1633975Sjdp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1733975Sjdp * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 1833975Sjdp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1933975Sjdp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2033975Sjdp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2133975Sjdp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2233975Sjdp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2333975Sjdp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2433975Sjdp * SUCH DAMAGE. 2534495Sjdp * 2634495Sjdp * $FreeBSD$ 2734495Sjdp */ 2834495Sjdp 2934495Sjdp#include <sys/cdefs.h> 3033975Sjdp__FBSDID("$FreeBSD$"); 3133975Sjdp 3234495Sjdp#include <sys/param.h> 3334495Sjdp#include <sys/lock.h> 3434495Sjdp#include <sys/mutex.h> 3534495Sjdp#include <sys/linker.h> 3634495Sjdp#include <sys/systm.h> 3734495Sjdp#include <sys/malloc.h> 3834495Sjdp#include <sys/kernel.h> 3934495Sjdp 40#include <vm/vm.h> 41#include <vm/pmap.h> 42 43#include <machine/md_var.h> 44#include <machine/metadata.h> 45#include <machine/pc/bios.h> 46#include <machine/vmparam.h> 47#include <machine/pmap.h> 48 49#include "vmm_util.h" 50#include "vmm_mem.h" 51 52static MALLOC_DEFINE(M_VMM_MEM, "vmm memory", "vmm memory"); 53 54#define MB (1024 * 1024) 55#define GB (1024 * MB) 56 57#define VMM_MEM_MAXSEGS 64 58 59/* protected by vmm_mem_mtx */ 60static struct { 61 vm_paddr_t base; 62 vm_size_t length; 63} vmm_mem_avail[VMM_MEM_MAXSEGS]; 64 65static int vmm_mem_nsegs; 66size_t vmm_mem_total_bytes; 67 68static vm_paddr_t maxaddr; 69 70static struct mtx vmm_mem_mtx; 71 72/* 73 * Steal any memory that was deliberately hidden from FreeBSD either by 74 * the use of MAXMEM kernel config option or the hw.physmem loader tunable. 75 */ 76static int 77vmm_mem_steal_memory(void) 78{ 79 int nsegs; 80 caddr_t kmdp; 81 uint32_t smapsize; 82 uint64_t base, length; 83 struct bios_smap *smapbase, *smap, *smapend; 84 85 /* 86 * Borrowed from hammer_time() and getmemsize() in machdep.c 87 */ 88 kmdp = preload_search_by_type("elf kernel"); 89 if (kmdp == NULL) 90 kmdp = preload_search_by_type("elf64 kernel"); 91 92 smapbase = (struct bios_smap *)preload_search_info(kmdp, 93 MODINFO_METADATA | MODINFOMD_SMAP); 94 if (smapbase == NULL) 95 panic("No BIOS smap info from loader!"); 96 97 smapsize = *((uint32_t *)smapbase - 1); 98 smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); 99 100 vmm_mem_total_bytes = 0; 101 nsegs = 0; 102 for (smap = smapbase; smap < smapend; smap++) { 103 /* 104 * XXX 105 * Assuming non-overlapping, monotonically increasing 106 * memory segments. 107 */ 108 if (smap->type != SMAP_TYPE_MEMORY) 109 continue; 110 if (smap->length == 0) 111 break; 112 113 base = roundup(smap->base, NBPDR); 114 length = rounddown(smap->length, NBPDR); 115 116 /* Skip this segment if FreeBSD is using all of it. */ 117 if (base + length <= ptoa(Maxmem)) 118 continue; 119 120 /* 121 * If FreeBSD is using part of this segment then adjust 122 * 'base' and 'length' accordingly. 123 */ 124 if (base < ptoa(Maxmem)) { 125 uint64_t used; 126 used = roundup(ptoa(Maxmem), NBPDR) - base; 127 base += used; 128 length -= used; 129 } 130 131 if (length == 0) 132 continue; 133 134 vmm_mem_avail[nsegs].base = base; 135 vmm_mem_avail[nsegs].length = length; 136 vmm_mem_total_bytes += length; 137 138 if (base + length > maxaddr) 139 maxaddr = base + length; 140 141 if (0 && bootverbose) { 142 printf("vmm_mem_populate: index %d, base 0x%0lx, " 143 "length %ld\n", 144 nsegs, vmm_mem_avail[nsegs].base, 145 vmm_mem_avail[nsegs].length); 146 } 147 148 nsegs++; 149 if (nsegs >= VMM_MEM_MAXSEGS) { 150 printf("vmm_mem_populate: maximum number of vmm memory " 151 "segments reached!\n"); 152 return (ENOSPC); 153 } 154 } 155 156 vmm_mem_nsegs = nsegs; 157 158 return (0); 159} 160 161static void 162vmm_mem_direct_map(vm_paddr_t start, vm_paddr_t end) 163{ 164 vm_paddr_t addr, remaining; 165 int pdpi, pdi, superpage_size; 166 pml4_entry_t *pml4p; 167 pdp_entry_t *pdp; 168 pd_entry_t *pd; 169 uint64_t page_attr_bits; 170 171 if (end >= NBPML4) 172 panic("Cannot map memory beyond %ldGB", NBPML4 / GB); 173 174 if (vmm_supports_1G_pages()) 175 superpage_size = NBPDP; 176 else 177 superpage_size = NBPDR; 178 179 /* 180 * Get the page directory pointer page that contains the direct 181 * map address mappings. 182 */ 183 pml4p = kernel_pmap->pm_pml4; 184 pdp = (pdp_entry_t *)PHYS_TO_DMAP(pml4p[DMPML4I] & ~PAGE_MASK); 185 186 page_attr_bits = PG_RW | PG_V | PG_PS | PG_G; 187 addr = start; 188 while (addr < end) { 189 remaining = end - addr; 190 pdpi = addr / NBPDP; 191 if (superpage_size == NBPDP && 192 remaining >= NBPDP && 193 addr % NBPDP == 0) { 194 /* 195 * If there isn't a mapping for this address then 196 * create one but if there is one already make sure 197 * it matches what we expect it to be. 198 */ 199 if (pdp[pdpi] == 0) { 200 pdp[pdpi] = addr | page_attr_bits; 201 if (0 && bootverbose) { 202 printf("vmm_mem_populate: mapping " 203 "0x%lx with 1GB page at " 204 "pdpi %d\n", addr, pdpi); 205 } 206 } else { 207 pdp_entry_t pdpe = pdp[pdpi]; 208 if ((pdpe & ~PAGE_MASK) != addr || 209 (pdpe & page_attr_bits) != page_attr_bits) { 210 panic("An invalid mapping 0x%016lx " 211 "already exists for 0x%016lx\n", 212 pdpe, addr); 213 } 214 } 215 addr += NBPDP; 216 } else { 217 if (remaining < NBPDR) { 218 panic("vmm_mem_populate: remaining (%ld) must " 219 "be greater than NBPDR (%d)\n", 220 remaining, NBPDR); 221 } 222 if (pdp[pdpi] == 0) { 223 /* 224 * XXX we lose this memory forever because 225 * we do not keep track of the virtual address 226 * that would be required to free this page. 227 */ 228 pd = malloc(PAGE_SIZE, M_VMM_MEM, 229 M_WAITOK | M_ZERO); 230 if ((uintptr_t)pd & PAGE_MASK) { 231 panic("vmm_mem_populate: page directory" 232 "page not aligned on %d " 233 "boundary\n", PAGE_SIZE); 234 } 235 pdp[pdpi] = vtophys(pd); 236 pdp[pdpi] |= PG_RW | PG_V | PG_U; 237 if (0 && bootverbose) { 238 printf("Creating page directory " 239 "at pdp index %d for 0x%016lx\n", 240 pdpi, addr); 241 } 242 } 243 pdi = (addr % NBPDP) / NBPDR; 244 pd = (pd_entry_t *)PHYS_TO_DMAP(pdp[pdpi] & ~PAGE_MASK); 245 246 /* 247 * Create a new mapping if one doesn't already exist 248 * or validate it if it does. 249 */ 250 if (pd[pdi] == 0) { 251 pd[pdi] = addr | page_attr_bits; 252 if (0 && bootverbose) { 253 printf("vmm_mem_populate: mapping " 254 "0x%lx with 2MB page at " 255 "pdpi %d, pdi %d\n", 256 addr, pdpi, pdi); 257 } 258 } else { 259 pd_entry_t pde = pd[pdi]; 260 if ((pde & ~PAGE_MASK) != addr || 261 (pde & page_attr_bits) != page_attr_bits) { 262 panic("An invalid mapping 0x%016lx " 263 "already exists for 0x%016lx\n", 264 pde, addr); 265 } 266 } 267 addr += NBPDR; 268 } 269 } 270} 271 272static int 273vmm_mem_populate(void) 274{ 275 int seg, error; 276 vm_paddr_t start, end; 277 278 /* populate the vmm_mem_avail[] array */ 279 error = vmm_mem_steal_memory(); 280 if (error) 281 return (error); 282 283 /* 284 * Now map the memory that was hidden from FreeBSD in 285 * the direct map VA space. 286 */ 287 for (seg = 0; seg < vmm_mem_nsegs; seg++) { 288 start = vmm_mem_avail[seg].base; 289 end = start + vmm_mem_avail[seg].length; 290 if ((start & PDRMASK) != 0 || (end & PDRMASK) != 0) { 291 panic("start (0x%016lx) and end (0x%016lx) must be " 292 "aligned on a %dMB boundary\n", 293 start, end, NBPDR / MB); 294 } 295 vmm_mem_direct_map(start, end); 296 } 297 298 return (0); 299} 300 301int 302vmm_mem_init(void) 303{ 304 int error; 305 306 mtx_init(&vmm_mem_mtx, "vmm_mem_mtx", NULL, MTX_DEF); 307 308 error = vmm_mem_populate(); 309 if (error) 310 return (error); 311 312 return (0); 313} 314 315vm_paddr_t 316vmm_mem_alloc(size_t size) 317{ 318 int i; 319 vm_paddr_t addr; 320 321 if ((size & PDRMASK) != 0) { 322 panic("vmm_mem_alloc: size 0x%0lx must be " 323 "aligned on a 0x%0x boundary\n", size, NBPDR); 324 } 325 326 addr = 0; 327 328 mtx_lock(&vmm_mem_mtx); 329 for (i = 0; i < vmm_mem_nsegs; i++) { 330 if (vmm_mem_avail[i].length >= size) { 331 addr = vmm_mem_avail[i].base; 332 vmm_mem_avail[i].base += size; 333 vmm_mem_avail[i].length -= size; 334 /* remove a zero length segment */ 335 if (vmm_mem_avail[i].length == 0) { 336 memmove(&vmm_mem_avail[i], 337 &vmm_mem_avail[i + 1], 338 (vmm_mem_nsegs - (i + 1)) * 339 sizeof(vmm_mem_avail[0])); 340 vmm_mem_nsegs--; 341 } 342 break; 343 } 344 } 345 mtx_unlock(&vmm_mem_mtx); 346 347 return (addr); 348} 349 350size_t 351vmm_mem_get_mem_total(void) 352{ 353 return vmm_mem_total_bytes; 354} 355 356size_t 357vmm_mem_get_mem_free(void) 358{ 359 size_t length = 0; 360 int i; 361 362 mtx_lock(&vmm_mem_mtx); 363 for (i = 0; i < vmm_mem_nsegs; i++) { 364 length += vmm_mem_avail[i].length; 365 } 366 mtx_unlock(&vmm_mem_mtx); 367 368 return(length); 369} 370 371void 372vmm_mem_free(vm_paddr_t base, size_t length) 373{ 374 int i; 375 376 if ((base & PDRMASK) != 0 || (length & PDRMASK) != 0) { 377 panic("vmm_mem_free: base 0x%0lx and length 0x%0lx must be " 378 "aligned on a 0x%0x boundary\n", base, length, NBPDR); 379 } 380 381 mtx_lock(&vmm_mem_mtx); 382 383 for (i = 0; i < vmm_mem_nsegs; i++) { 384 if (vmm_mem_avail[i].base > base) 385 break; 386 } 387 388 if (vmm_mem_nsegs >= VMM_MEM_MAXSEGS) 389 panic("vmm_mem_free: cannot free any more segments"); 390 391 /* Create a new segment at index 'i' */ 392 memmove(&vmm_mem_avail[i + 1], &vmm_mem_avail[i], 393 (vmm_mem_nsegs - i) * sizeof(vmm_mem_avail[0])); 394 395 vmm_mem_avail[i].base = base; 396 vmm_mem_avail[i].length = length; 397 398 vmm_mem_nsegs++; 399 400coalesce_some_more: 401 for (i = 0; i < vmm_mem_nsegs - 1; i++) { 402 if (vmm_mem_avail[i].base + vmm_mem_avail[i].length == 403 vmm_mem_avail[i + 1].base) { 404 vmm_mem_avail[i].length += vmm_mem_avail[i + 1].length; 405 memmove(&vmm_mem_avail[i + 1], &vmm_mem_avail[i + 2], 406 (vmm_mem_nsegs - (i + 2)) * sizeof(vmm_mem_avail[0])); 407 vmm_mem_nsegs--; 408 goto coalesce_some_more; 409 } 410 } 411 412 mtx_unlock(&vmm_mem_mtx); 413} 414 415vm_paddr_t 416vmm_mem_maxaddr(void) 417{ 418 419 return (maxaddr); 420} 421 422void 423vmm_mem_dump(void) 424{ 425 int i; 426 vm_paddr_t base; 427 vm_size_t length; 428 429 mtx_lock(&vmm_mem_mtx); 430 for (i = 0; i < vmm_mem_nsegs; i++) { 431 base = vmm_mem_avail[i].base; 432 length = vmm_mem_avail[i].length; 433 printf("%-4d0x%016lx 0x%016lx\n", i, base, base + length); 434 } 435 mtx_unlock(&vmm_mem_mtx); 436} 437