1/* 2 * Copyright (c) 2003-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989, 1988 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56 57 58#include <mach/i386/vm_param.h> 59 60#include <string.h> 61#include <mach/vm_param.h> 62#include <mach/vm_prot.h> 63#include <mach/machine.h> 64#include <mach/time_value.h> 65#include <kern/spl.h> 66#include <kern/assert.h> 67#include <kern/debug.h> 68#include <kern/misc_protos.h> 69#include <kern/cpu_data.h> 70#include <kern/processor.h> 71#include <vm/vm_page.h> 72#include <vm/pmap.h> 73#include <vm/vm_kern.h> 74#include <i386/pmap.h> 75#include <i386/misc_protos.h> 76#include <i386/cpuid.h> 77#include <mach/thread_status.h> 78#include <pexpert/i386/efi.h> 79#include <i386/i386_lowmem.h> 80#include <x86_64/lowglobals.h> 81#include <i386/pal_routines.h> 82 83#include <mach-o/loader.h> 84#include <libkern/kernel_mach_header.h> 85 86 87vm_size_t mem_size = 0; 88pmap_paddr_t first_avail = 0;/* first after page tables */ 89 90uint64_t max_mem; /* Size of physical memory (bytes), adjusted by maxmem */ 91uint64_t mem_actual; 92uint64_t sane_size = 0; /* Memory size for defaults calculations */ 93 94/* 95 * KASLR parameters 96 */ 97ppnum_t vm_kernel_base_page; 98vm_offset_t vm_kernel_base; 99vm_offset_t vm_kernel_top; 100vm_offset_t vm_kernel_stext; 101vm_offset_t vm_kernel_etext; 102vm_offset_t vm_kernel_slide; 103vm_offset_t vm_hib_base; 104vm_offset_t vm_kext_base = VM_MIN_KERNEL_AND_KEXT_ADDRESS; 105vm_offset_t vm_kext_top = VM_MIN_KERNEL_ADDRESS; 106 107#define MAXLORESERVE (32 * 1024 * 1024) 108 109ppnum_t max_ppnum = 0; 110ppnum_t lowest_lo = 0; 111ppnum_t lowest_hi = 0; 112ppnum_t highest_hi = 0; 113 114enum {PMAP_MAX_RESERVED_RANGES = 32}; 115uint32_t pmap_reserved_pages_allocated = 0; 116uint32_t pmap_reserved_range_indices[PMAP_MAX_RESERVED_RANGES]; 117uint32_t pmap_last_reserved_range_index = 0; 118uint32_t pmap_reserved_ranges = 0; 119 120extern unsigned int bsd_mbuf_cluster_reserve(boolean_t *); 121 122pmap_paddr_t avail_start, avail_end; 123vm_offset_t virtual_avail, virtual_end; 124static pmap_paddr_t avail_remaining; 125vm_offset_t static_memory_end = 0; 126 127vm_offset_t sHIB, eHIB, stext, etext, sdata, edata, sconstdata, econstdata, end; 128 129/* 130 * _mh_execute_header is the mach_header for the currently executing kernel 131 */ 132vm_offset_t segTEXTB; unsigned long segSizeTEXT; 133vm_offset_t segDATAB; unsigned long segSizeDATA; 134vm_offset_t segLINKB; unsigned long segSizeLINK; 135vm_offset_t segPRELINKB; unsigned long segSizePRELINK; 136vm_offset_t segHIBB; unsigned long segSizeHIB; 137vm_offset_t sectCONSTB; unsigned long sectSizeConst; 138 139boolean_t doconstro_override = FALSE; 140 141static kernel_segment_command_t *segTEXT, *segDATA; 142static kernel_section_t *cursectTEXT, *lastsectTEXT; 143static kernel_section_t *sectDCONST; 144 145extern uint64_t firmware_Conventional_bytes; 146extern uint64_t firmware_RuntimeServices_bytes; 147extern uint64_t firmware_ACPIReclaim_bytes; 148extern uint64_t firmware_ACPINVS_bytes; 149extern uint64_t firmware_PalCode_bytes; 150extern uint64_t firmware_Reserved_bytes; 151extern uint64_t firmware_Unusable_bytes; 152extern uint64_t firmware_other_bytes; 153uint64_t firmware_MMIO_bytes; 154 155/* 156 * Linker magic to establish the highest address in the kernel. 157 */ 158extern void *last_kernel_symbol; 159 160#if DEBUG 161#define PRINT_PMAP_MEMORY_TABLE 162#define DBG(x...) kprintf(x) 163#else 164#define DBG(x...) 165#endif /* DEBUG */ 166/* 167 * Basic VM initialization. 168 */ 169void 170i386_vm_init(uint64_t maxmem, 171 boolean_t IA32e, 172 boot_args *args) 173{ 174 pmap_memory_region_t *pmptr; 175 pmap_memory_region_t *prev_pmptr; 176 EfiMemoryRange *mptr; 177 unsigned int mcount; 178 unsigned int msize; 179 ppnum_t fap; 180 unsigned int i; 181 ppnum_t maxpg = 0; 182 uint32_t pmap_type; 183 uint32_t maxloreserve; 184 uint32_t maxdmaaddr; 185 uint32_t mbuf_reserve = 0; 186 boolean_t mbuf_override = FALSE; 187 boolean_t coalescing_permitted; 188 vm_kernel_base_page = i386_btop(args->kaddr); 189 vm_offset_t base_address; 190 vm_offset_t static_base_address; 191 192 /* 193 * Establish the KASLR parameters. 194 */ 195 static_base_address = ml_static_ptovirt(KERNEL_BASE_OFFSET); 196 base_address = ml_static_ptovirt(args->kaddr); 197 vm_kernel_slide = base_address - static_base_address; 198 if (args->kslide) { 199 kprintf("KASLR slide: 0x%016lx dynamic\n", vm_kernel_slide); 200 if (vm_kernel_slide != ((vm_offset_t)args->kslide)) 201 panic("Kernel base inconsistent with slide - rebased?"); 202 } else { 203 /* No slide relative to on-disk symbols */ 204 kprintf("KASLR slide: 0x%016lx static and ignored\n", 205 vm_kernel_slide); 206 vm_kernel_slide = 0; 207 } 208 209 /* 210 * Zero out local relocations to avoid confusing kxld. 211 * TODO: might be better to move this code to OSKext::initialize 212 */ 213 if (_mh_execute_header.flags & MH_PIE) { 214 struct load_command *loadcmd; 215 uint32_t cmd; 216 217 loadcmd = (struct load_command *)((uintptr_t)&_mh_execute_header + 218 sizeof (_mh_execute_header)); 219 220 for (cmd = 0; cmd < _mh_execute_header.ncmds; cmd++) { 221 if (loadcmd->cmd == LC_DYSYMTAB) { 222 struct dysymtab_command *dysymtab; 223 224 dysymtab = (struct dysymtab_command *)loadcmd; 225 dysymtab->nlocrel = 0; 226 dysymtab->locreloff = 0; 227 kprintf("Hiding local relocations\n"); 228 break; 229 } 230 loadcmd = (struct load_command *)((uintptr_t)loadcmd + loadcmd->cmdsize); 231 } 232 } 233 234 /* 235 * Now retrieve addresses for end, edata, and etext 236 * from MACH-O headers. 237 */ 238 segTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, 239 "__TEXT", &segSizeTEXT); 240 segDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, 241 "__DATA", &segSizeDATA); 242 segLINKB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, 243 "__LINKEDIT", &segSizeLINK); 244 segHIBB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, 245 "__HIB", &segSizeHIB); 246 segPRELINKB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, 247 "__PRELINK_TEXT", &segSizePRELINK); 248 segTEXT = getsegbynamefromheader(&_mh_execute_header, 249 "__TEXT"); 250 segDATA = getsegbynamefromheader(&_mh_execute_header, 251 "__DATA"); 252 sectDCONST = getsectbynamefromheader(&_mh_execute_header, 253 "__DATA", "__const"); 254 cursectTEXT = lastsectTEXT = firstsect(segTEXT); 255 /* Discover the last TEXT section within the TEXT segment */ 256 while ((cursectTEXT = nextsect(segTEXT, cursectTEXT)) != NULL) { 257 lastsectTEXT = cursectTEXT; 258 } 259 260 sHIB = segHIBB; 261 eHIB = segHIBB + segSizeHIB; 262 vm_hib_base = sHIB; 263 /* Zero-padded from ehib to stext if text is 2M-aligned */ 264 stext = segTEXTB; 265 lowGlo.lgStext = stext; 266 etext = (vm_offset_t) round_page_64(lastsectTEXT->addr + lastsectTEXT->size); 267 /* Zero-padded from etext to sdata if text is 2M-aligned */ 268 sdata = segDATAB; 269 edata = segDATAB + segSizeDATA; 270 271 sectCONSTB = (vm_offset_t) sectDCONST->addr; 272 sectSizeConst = sectDCONST->size; 273 sconstdata = sectCONSTB; 274 econstdata = sectCONSTB + sectSizeConst; 275 276 if (sectSizeConst & PAGE_MASK) { 277 kernel_section_t *ns = nextsect(segDATA, sectDCONST); 278 if (ns && !(ns->addr & PAGE_MASK)) 279 doconstro_override = TRUE; 280 } else 281 doconstro_override = TRUE; 282 283 DBG("segTEXTB = %p\n", (void *) segTEXTB); 284 DBG("segDATAB = %p\n", (void *) segDATAB); 285 DBG("segLINKB = %p\n", (void *) segLINKB); 286 DBG("segHIBB = %p\n", (void *) segHIBB); 287 DBG("segPRELINKB = %p\n", (void *) segPRELINKB); 288 DBG("sHIB = %p\n", (void *) sHIB); 289 DBG("eHIB = %p\n", (void *) eHIB); 290 DBG("stext = %p\n", (void *) stext); 291 DBG("etext = %p\n", (void *) etext); 292 DBG("sdata = %p\n", (void *) sdata); 293 DBG("edata = %p\n", (void *) edata); 294 DBG("sconstdata = %p\n", (void *) sconstdata); 295 DBG("econstdata = %p\n", (void *) econstdata); 296 DBG("kernel_top = %p\n", (void *) &last_kernel_symbol); 297 298 vm_kernel_base = sHIB; 299 vm_kernel_top = (vm_offset_t) &last_kernel_symbol; 300 vm_kernel_stext = stext; 301 vm_kernel_etext = etext; 302 303 vm_set_page_size(); 304 305 /* 306 * Compute the memory size. 307 */ 308 309 avail_remaining = 0; 310 avail_end = 0; 311 pmptr = pmap_memory_regions; 312 prev_pmptr = 0; 313 pmap_memory_region_count = pmap_memory_region_current = 0; 314 fap = (ppnum_t) i386_btop(first_avail); 315 316 mptr = (EfiMemoryRange *)ml_static_ptovirt((vm_offset_t)args->MemoryMap); 317 if (args->MemoryMapDescriptorSize == 0) 318 panic("Invalid memory map descriptor size"); 319 msize = args->MemoryMapDescriptorSize; 320 mcount = args->MemoryMapSize / msize; 321 322#define FOURGIG 0x0000000100000000ULL 323#define ONEGIG 0x0000000040000000ULL 324 325 for (i = 0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) { 326 ppnum_t base, top; 327 uint64_t region_bytes = 0; 328 329 if (pmap_memory_region_count >= PMAP_MEMORY_REGIONS_SIZE) { 330 kprintf("WARNING: truncating memory region count at %d\n", pmap_memory_region_count); 331 break; 332 } 333 base = (ppnum_t) (mptr->PhysicalStart >> I386_PGSHIFT); 334 top = (ppnum_t) (((mptr->PhysicalStart) >> I386_PGSHIFT) + mptr->NumberOfPages - 1); 335 336 if (base == 0) { 337 /* 338 * Avoid having to deal with the edge case of the 339 * very first possible physical page and the roll-over 340 * to -1; just ignore that page. 341 */ 342 kprintf("WARNING: ignoring first page in [0x%llx:0x%llx]\n", (uint64_t) base, (uint64_t) top); 343 base++; 344 } 345 if (top + 1 == 0) { 346 /* 347 * Avoid having to deal with the edge case of the 348 * very last possible physical page and the roll-over 349 * to 0; just ignore that page. 350 */ 351 kprintf("WARNING: ignoring last page in [0x%llx:0x%llx]\n", (uint64_t) base, (uint64_t) top); 352 top--; 353 } 354 if (top < base) { 355 /* 356 * That was the only page in that region, so 357 * ignore the whole region. 358 */ 359 continue; 360 } 361 362#if MR_RSV_TEST 363 static uint32_t nmr = 0; 364 if ((base > 0x20000) && (nmr++ < 4)) 365 mptr->Attribute |= EFI_MEMORY_KERN_RESERVED; 366#endif 367 region_bytes = (uint64_t)(mptr->NumberOfPages << I386_PGSHIFT); 368 pmap_type = mptr->Type; 369 370 switch (mptr->Type) { 371 case kEfiLoaderCode: 372 case kEfiLoaderData: 373 case kEfiBootServicesCode: 374 case kEfiBootServicesData: 375 case kEfiConventionalMemory: 376 /* 377 * Consolidate usable memory types into one. 378 */ 379 pmap_type = kEfiConventionalMemory; 380 sane_size += region_bytes; 381 firmware_Conventional_bytes += region_bytes; 382 break; 383 /* 384 * sane_size should reflect the total amount of physical 385 * RAM in the system, not just the amount that is 386 * available for the OS to use. 387 * FIXME:Consider deriving this value from SMBIOS tables 388 * rather than reverse engineering the memory map. 389 * Alternatively, see 390 * <rdar://problem/4642773> Memory map should 391 * describe all memory 392 * Firmware on some systems guarantees that the memory 393 * map is complete via the "RomReservedMemoryTracked" 394 * feature field--consult that where possible to 395 * avoid the "round up to 128M" workaround below. 396 */ 397 398 case kEfiRuntimeServicesCode: 399 case kEfiRuntimeServicesData: 400 firmware_RuntimeServices_bytes += region_bytes; 401 sane_size += region_bytes; 402 break; 403 case kEfiACPIReclaimMemory: 404 firmware_ACPIReclaim_bytes += region_bytes; 405 sane_size += region_bytes; 406 break; 407 case kEfiACPIMemoryNVS: 408 firmware_ACPINVS_bytes += region_bytes; 409 sane_size += region_bytes; 410 break; 411 case kEfiPalCode: 412 firmware_PalCode_bytes += region_bytes; 413 sane_size += region_bytes; 414 break; 415 416 case kEfiReservedMemoryType: 417 firmware_Reserved_bytes += region_bytes; 418 break; 419 case kEfiUnusableMemory: 420 firmware_Unusable_bytes += region_bytes; 421 break; 422 case kEfiMemoryMappedIO: 423 case kEfiMemoryMappedIOPortSpace: 424 firmware_MMIO_bytes += region_bytes; 425 break; 426 default: 427 firmware_other_bytes += region_bytes; 428 break; 429 } 430 431 DBG("EFI region %d: type %u/%d, base 0x%x, top 0x%x %s\n", 432 i, mptr->Type, pmap_type, base, top, 433 (mptr->Attribute&EFI_MEMORY_KERN_RESERVED)? "RESERVED" : 434 (mptr->Attribute&EFI_MEMORY_RUNTIME)? "RUNTIME" : ""); 435 436 if (maxpg) { 437 if (base >= maxpg) 438 break; 439 top = (top > maxpg) ? maxpg : top; 440 } 441 442 /* 443 * handle each region 444 */ 445 if ((mptr->Attribute & EFI_MEMORY_RUNTIME) == EFI_MEMORY_RUNTIME || 446 pmap_type != kEfiConventionalMemory) { 447 prev_pmptr = 0; 448 continue; 449 } else { 450 /* 451 * Usable memory region 452 */ 453 if (top < I386_LOWMEM_RESERVED || 454 !pal_is_usable_memory(base, top)) { 455 prev_pmptr = 0; 456 continue; 457 } 458 /* 459 * A range may be marked with with the 460 * EFI_MEMORY_KERN_RESERVED attribute 461 * on some systems, to indicate that the range 462 * must not be made available to devices. 463 */ 464 465 if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED) { 466 if (++pmap_reserved_ranges > PMAP_MAX_RESERVED_RANGES) { 467 panic("Too many reserved ranges %u\n", pmap_reserved_ranges); 468 } 469 } 470 471 if (top < fap) { 472 /* 473 * entire range below first_avail 474 * salvage some low memory pages 475 * we use some very low memory at startup 476 * mark as already allocated here 477 */ 478 if (base >= I386_LOWMEM_RESERVED) 479 pmptr->base = base; 480 else 481 pmptr->base = I386_LOWMEM_RESERVED; 482 483 pmptr->end = top; 484 485 486 if ((mptr->Attribute & EFI_MEMORY_KERN_RESERVED) && 487 (top < vm_kernel_base_page)) { 488 pmptr->alloc_up = pmptr->base; 489 pmptr->alloc_down = pmptr->end; 490 pmap_reserved_range_indices[pmap_last_reserved_range_index++] = pmap_memory_region_count; 491 } 492 else { 493 /* 494 * mark as already mapped 495 */ 496 pmptr->alloc_up = top + 1; 497 pmptr->alloc_down = top; 498 } 499 pmptr->type = pmap_type; 500 pmptr->attribute = mptr->Attribute; 501 } 502 else if ( (base < fap) && (top > fap) ) { 503 /* 504 * spans first_avail 505 * put mem below first avail in table but 506 * mark already allocated 507 */ 508 pmptr->base = base; 509 pmptr->end = (fap - 1); 510 pmptr->alloc_up = pmptr->end + 1; 511 pmptr->alloc_down = pmptr->end; 512 pmptr->type = pmap_type; 513 pmptr->attribute = mptr->Attribute; 514 /* 515 * we bump these here inline so the accounting 516 * below works correctly 517 */ 518 pmptr++; 519 pmap_memory_region_count++; 520 521 pmptr->alloc_up = pmptr->base = fap; 522 pmptr->type = pmap_type; 523 pmptr->attribute = mptr->Attribute; 524 pmptr->alloc_down = pmptr->end = top; 525 526 if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED) 527 pmap_reserved_range_indices[pmap_last_reserved_range_index++] = pmap_memory_region_count; 528 } else { 529 /* 530 * entire range useable 531 */ 532 pmptr->alloc_up = pmptr->base = base; 533 pmptr->type = pmap_type; 534 pmptr->attribute = mptr->Attribute; 535 pmptr->alloc_down = pmptr->end = top; 536 if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED) 537 pmap_reserved_range_indices[pmap_last_reserved_range_index++] = pmap_memory_region_count; 538 } 539 540 if (i386_ptob(pmptr->end) > avail_end ) 541 avail_end = i386_ptob(pmptr->end); 542 543 avail_remaining += (pmptr->end - pmptr->base); 544 coalescing_permitted = (prev_pmptr && (pmptr->attribute == prev_pmptr->attribute) && ((pmptr->attribute & EFI_MEMORY_KERN_RESERVED) == 0)); 545 /* 546 * Consolidate contiguous memory regions, if possible 547 */ 548 if (prev_pmptr && 549 (pmptr->type == prev_pmptr->type) && 550 (coalescing_permitted) && 551 (pmptr->base == pmptr->alloc_up) && 552 (prev_pmptr->end == prev_pmptr->alloc_down) && 553 (pmptr->base == (prev_pmptr->end + 1))) 554 { 555 prev_pmptr->end = pmptr->end; 556 prev_pmptr->alloc_down = pmptr->alloc_down; 557 } else { 558 pmap_memory_region_count++; 559 prev_pmptr = pmptr; 560 pmptr++; 561 } 562 } 563 } 564 565#ifdef PRINT_PMAP_MEMORY_TABLE 566 { 567 unsigned int j; 568 pmap_memory_region_t *p = pmap_memory_regions; 569 addr64_t region_start, region_end; 570 addr64_t efi_start, efi_end; 571 for (j=0;j<pmap_memory_region_count;j++, p++) { 572 kprintf("pmap region %d type %d base 0x%llx alloc_up 0x%llx alloc_down 0x%llx top 0x%llx\n", 573 j, p->type, 574 (addr64_t) p->base << I386_PGSHIFT, 575 (addr64_t) p->alloc_up << I386_PGSHIFT, 576 (addr64_t) p->alloc_down << I386_PGSHIFT, 577 (addr64_t) p->end << I386_PGSHIFT); 578 region_start = (addr64_t) p->base << I386_PGSHIFT; 579 region_end = ((addr64_t) p->end << I386_PGSHIFT) - 1; 580 mptr = (EfiMemoryRange *) ml_static_ptovirt((vm_offset_t)args->MemoryMap); 581 for (i=0; i<mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) { 582 if (mptr->Type != kEfiLoaderCode && 583 mptr->Type != kEfiLoaderData && 584 mptr->Type != kEfiBootServicesCode && 585 mptr->Type != kEfiBootServicesData && 586 mptr->Type != kEfiConventionalMemory) { 587 efi_start = (addr64_t)mptr->PhysicalStart; 588 efi_end = efi_start + ((vm_offset_t)mptr->NumberOfPages << I386_PGSHIFT) - 1; 589 if ((efi_start >= region_start && efi_start <= region_end) || 590 (efi_end >= region_start && efi_end <= region_end)) { 591 kprintf(" *** Overlapping region with EFI runtime region %d\n", i); 592 } 593 } 594 } 595 } 596 } 597#endif 598 599 avail_start = first_avail; 600 mem_actual = sane_size; 601 602 /* 603 * For user visible memory size, round up to 128 Mb - accounting for the various stolen memory 604 * not reported by EFI. 605 */ 606 607 sane_size = (sane_size + 128 * MB - 1) & ~((uint64_t)(128 * MB - 1)); 608 609 /* 610 * We cap at KERNEL_MAXMEM bytes (currently 32GB for K32, 96GB for K64). 611 * Unless overriden by the maxmem= boot-arg 612 * -- which is a non-zero maxmem argument to this function. 613 */ 614 if (maxmem == 0 && sane_size > KERNEL_MAXMEM) { 615 maxmem = KERNEL_MAXMEM; 616 printf("Physical memory %lld bytes capped at %dGB\n", 617 sane_size, (uint32_t) (KERNEL_MAXMEM/GB)); 618 } 619 620 /* 621 * if user set maxmem, reduce memory sizes 622 */ 623 if ( (maxmem > (uint64_t)first_avail) && (maxmem < sane_size)) { 624 ppnum_t discarded_pages = (ppnum_t)((sane_size - maxmem) >> I386_PGSHIFT); 625 ppnum_t highest_pn = 0; 626 ppnum_t cur_end = 0; 627 uint64_t pages_to_use; 628 unsigned cur_region = 0; 629 630 sane_size = maxmem; 631 632 if (avail_remaining > discarded_pages) 633 avail_remaining -= discarded_pages; 634 else 635 avail_remaining = 0; 636 637 pages_to_use = avail_remaining; 638 639 while (cur_region < pmap_memory_region_count && pages_to_use) { 640 for (cur_end = pmap_memory_regions[cur_region].base; 641 cur_end < pmap_memory_regions[cur_region].end && pages_to_use; 642 cur_end++) { 643 if (cur_end > highest_pn) 644 highest_pn = cur_end; 645 pages_to_use--; 646 } 647 if (pages_to_use == 0) { 648 pmap_memory_regions[cur_region].end = cur_end; 649 pmap_memory_regions[cur_region].alloc_down = cur_end; 650 } 651 652 cur_region++; 653 } 654 pmap_memory_region_count = cur_region; 655 656 avail_end = i386_ptob(highest_pn + 1); 657 } 658 659 /* 660 * mem_size is only a 32 bit container... follow the PPC route 661 * and pin it to a 2 Gbyte maximum 662 */ 663 if (sane_size > (FOURGIG >> 1)) 664 mem_size = (vm_size_t)(FOURGIG >> 1); 665 else 666 mem_size = (vm_size_t)sane_size; 667 max_mem = sane_size; 668 669 kprintf("Physical memory %llu MB\n", sane_size/MB); 670 671 max_valid_low_ppnum = (2 * GB) / PAGE_SIZE; 672 673 if (!PE_parse_boot_argn("max_valid_dma_addr", &maxdmaaddr, sizeof (maxdmaaddr))) { 674 max_valid_dma_address = (uint64_t)4 * (uint64_t)GB; 675 } else { 676 max_valid_dma_address = ((uint64_t) maxdmaaddr) * MB; 677 678 if ((max_valid_dma_address / PAGE_SIZE) < max_valid_low_ppnum) 679 max_valid_low_ppnum = (ppnum_t)(max_valid_dma_address / PAGE_SIZE); 680 } 681 if (avail_end >= max_valid_dma_address) { 682 683 if (!PE_parse_boot_argn("maxloreserve", &maxloreserve, sizeof (maxloreserve))) { 684 685 if (sane_size >= (ONEGIG * 15)) 686 maxloreserve = (MAXLORESERVE / PAGE_SIZE) * 4; 687 else if (sane_size >= (ONEGIG * 7)) 688 maxloreserve = (MAXLORESERVE / PAGE_SIZE) * 2; 689 else 690 maxloreserve = MAXLORESERVE / PAGE_SIZE; 691 692#if SOCKETS 693 mbuf_reserve = bsd_mbuf_cluster_reserve(&mbuf_override) / PAGE_SIZE; 694#endif 695 } else 696 maxloreserve = (maxloreserve * (1024 * 1024)) / PAGE_SIZE; 697 698 if (maxloreserve) { 699 vm_lopage_free_limit = maxloreserve; 700 701 if (mbuf_override == TRUE) { 702 vm_lopage_free_limit += mbuf_reserve; 703 vm_lopage_lowater = 0; 704 } else 705 vm_lopage_lowater = vm_lopage_free_limit / 16; 706 707 vm_lopage_refill = TRUE; 708 vm_lopage_needed = TRUE; 709 } 710 } 711 712 /* 713 * Initialize kernel physical map. 714 * Kernel virtual address starts at VM_KERNEL_MIN_ADDRESS. 715 */ 716 kprintf("avail_remaining = 0x%lx\n", (unsigned long)avail_remaining); 717 pmap_bootstrap(0, IA32e); 718} 719 720 721unsigned int 722pmap_free_pages(void) 723{ 724 return (unsigned int)avail_remaining; 725} 726 727 728boolean_t pmap_next_page_reserved(ppnum_t *); 729 730/* 731 * Pick a page from a "kernel private" reserved range; works around 732 * errata on some hardware. 733 */ 734boolean_t 735pmap_next_page_reserved(ppnum_t *pn) { 736 if (pmap_reserved_ranges) { 737 uint32_t n; 738 pmap_memory_region_t *region; 739 for (n = 0; n < pmap_last_reserved_range_index; n++) { 740 uint32_t reserved_index = pmap_reserved_range_indices[n]; 741 region = &pmap_memory_regions[reserved_index]; 742 if (region->alloc_up <= region->alloc_down) { 743 *pn = region->alloc_up++; 744 avail_remaining--; 745 746 if (*pn > max_ppnum) 747 max_ppnum = *pn; 748 749 if (lowest_lo == 0 || *pn < lowest_lo) 750 lowest_lo = *pn; 751 752 pmap_reserved_pages_allocated++; 753#if DEBUG 754 if (region->alloc_up > region->alloc_down) { 755 kprintf("Exhausted reserved range index: %u, base: 0x%x end: 0x%x, type: 0x%x, attribute: 0x%llx\n", reserved_index, region->base, region->end, region->type, region->attribute); 756 } 757#endif 758 return TRUE; 759 } 760 } 761 } 762 return FALSE; 763} 764 765 766boolean_t 767pmap_next_page_hi( 768 ppnum_t *pn) 769{ 770 pmap_memory_region_t *region; 771 int n; 772 773 if (pmap_next_page_reserved(pn)) 774 return TRUE; 775 776 if (avail_remaining) { 777 for (n = pmap_memory_region_count - 1; n >= 0; n--) { 778 region = &pmap_memory_regions[n]; 779 780 if (region->alloc_down >= region->alloc_up) { 781 *pn = region->alloc_down--; 782 avail_remaining--; 783 784 if (*pn > max_ppnum) 785 max_ppnum = *pn; 786 787 if (lowest_lo == 0 || *pn < lowest_lo) 788 lowest_lo = *pn; 789 790 if (lowest_hi == 0 || *pn < lowest_hi) 791 lowest_hi = *pn; 792 793 if (*pn > highest_hi) 794 highest_hi = *pn; 795 796 return TRUE; 797 } 798 } 799 } 800 return FALSE; 801} 802 803 804boolean_t 805pmap_next_page( 806 ppnum_t *pn) 807{ 808 if (avail_remaining) while (pmap_memory_region_current < pmap_memory_region_count) { 809 if (pmap_memory_regions[pmap_memory_region_current].alloc_up > 810 pmap_memory_regions[pmap_memory_region_current].alloc_down) { 811 pmap_memory_region_current++; 812 continue; 813 } 814 *pn = pmap_memory_regions[pmap_memory_region_current].alloc_up++; 815 avail_remaining--; 816 817 if (*pn > max_ppnum) 818 max_ppnum = *pn; 819 820 if (lowest_lo == 0 || *pn < lowest_lo) 821 lowest_lo = *pn; 822 823 return TRUE; 824 } 825 return FALSE; 826} 827 828 829boolean_t 830pmap_valid_page( 831 ppnum_t pn) 832{ 833 unsigned int i; 834 pmap_memory_region_t *pmptr = pmap_memory_regions; 835 836 for (i = 0; i < pmap_memory_region_count; i++, pmptr++) { 837 if ( (pn >= pmptr->base) && (pn <= pmptr->end) ) 838 return TRUE; 839 } 840 return FALSE; 841} 842 843