vm_kern.c revision 1887
1107120Sjulian/* 2107120Sjulian * Copyright (c) 1991, 1993 3107120Sjulian * The Regents of the University of California. All rights reserved. 4107120Sjulian * 5107120Sjulian * This code is derived from software contributed to Berkeley by 6107120Sjulian * The Mach Operating System project at Carnegie-Mellon University. 7107120Sjulian * 8107120Sjulian * Redistribution and use in source and binary forms, with or without 9107120Sjulian * modification, are permitted provided that the following conditions 10107120Sjulian * are met: 11107120Sjulian * 1. Redistributions of source code must retain the above copyright 12107120Sjulian * notice, this list of conditions and the following disclaimer. 13107120Sjulian * 2. Redistributions in binary form must reproduce the above copyright 14107120Sjulian * notice, this list of conditions and the following disclaimer in the 15107120Sjulian * documentation and/or other materials provided with the distribution. 16107120Sjulian * 3. All advertising materials mentioning features or use of this software 17107120Sjulian * must display the following acknowledgement: 18107120Sjulian * This product includes software developed by the University of 19107120Sjulian * California, Berkeley and its contributors. 20107120Sjulian * 4. Neither the name of the University nor the names of its contributors 21107120Sjulian * may be used to endorse or promote products derived from this software 22107120Sjulian * without specific prior written permission. 23107120Sjulian * 24107120Sjulian * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25107120Sjulian * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26107120Sjulian * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27107120Sjulian * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28121054Semax * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29107120Sjulian * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30107120Sjulian * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31107120Sjulian * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32107120Sjulian * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33281210Stakawata * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34121054Semax * SUCH DAMAGE. 35107120Sjulian * 36158834Smarkus * from: @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 37107120Sjulian * 38107120Sjulian * 39107120Sjulian * Copyright (c) 1987, 1990 Carnegie-Mellon University. 40158834Smarkus * All rights reserved. 41107120Sjulian * 42107120Sjulian * Authors: Avadis Tevanian, Jr., Michael Wayne Young 43107120Sjulian * 44107120Sjulian * Permission to use, copy, modify and distribute this software and 45107120Sjulian * its documentation is hereby granted, provided that both the copyright 46107120Sjulian * notice and this permission notice appear in all copies of the 47107120Sjulian * software, derivative works or modified versions, and any portions 48107120Sjulian * thereof, and that both notices appear in supporting documentation. 49107120Sjulian * 50107120Sjulian * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51107120Sjulian * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52107120Sjulian * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53114879Sjulian * 54107120Sjulian * Carnegie Mellon requests users of this software to return to 55107120Sjulian * 56107120Sjulian * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57107120Sjulian * School of Computer Science 58107120Sjulian * Carnegie Mellon University 59107120Sjulian * Pittsburgh PA 15213-3890 60107120Sjulian * 61107120Sjulian * any improvements or extensions that they make and grant Carnegie the 62114879Sjulian * rights to redistribute these changes. 63107120Sjulian * 64107120Sjulian * $Id: vm_kern.c,v 1.3 1994/08/02 07:55:22 davidg Exp $ 65107120Sjulian */ 66107120Sjulian 67107120Sjulian/* 68107120Sjulian * Kernel memory management. 69107120Sjulian */ 70107120Sjulian 71107120Sjulian#include <sys/param.h> 72107120Sjulian#include <sys/systm.h> 73107120Sjulian 74107120Sjulian#include <vm/vm.h> 75107120Sjulian#include <vm/vm_page.h> 76107120Sjulian#include <vm/vm_pageout.h> 77107120Sjulian#include <vm/vm_kern.h> 78114879Sjulian 79107120Sjulian/* 80107120Sjulian * kmem_alloc_pageable: 81107120Sjulian * 82107120Sjulian * Allocate pageable memory to the kernel's address map. 83107120Sjulian * map must be "kernel_map" below. 84107120Sjulian */ 85107120Sjulian 86107120Sjulianvm_offset_t kmem_alloc_pageable(map, size) 87107120Sjulian vm_map_t map; 88107120Sjulian register vm_size_t size; 89107120Sjulian{ 90107120Sjulian vm_offset_t addr; 91107120Sjulian register int result; 92107120Sjulian 93107120Sjulian#if 0 94107120Sjulian if (map != kernel_map) 95107120Sjulian panic("kmem_alloc_pageable: not called with kernel_map"); 96107120Sjulian#endif 97107120Sjulian 98107120Sjulian size = round_page(size); 99107120Sjulian 100107120Sjulian addr = vm_map_min(map); 101107120Sjulian result = vm_map_find(map, NULL, (vm_offset_t) 0, 102107120Sjulian &addr, size, TRUE); 103107120Sjulian if (result != KERN_SUCCESS) { 104107120Sjulian return(0); 105107120Sjulian } 106107120Sjulian 107107120Sjulian return(addr); 108107120Sjulian} 109107120Sjulian 110107120Sjulian/* 111107120Sjulian * Allocate wired-down memory in the kernel's address map 112107120Sjulian * or a submap. 113107120Sjulian */ 114107120Sjulianvm_offset_t kmem_alloc(map, size) 115107120Sjulian register vm_map_t map; 116107120Sjulian register vm_size_t size; 117107120Sjulian{ 118107120Sjulian vm_offset_t addr; 119107120Sjulian register vm_offset_t offset; 120107120Sjulian extern vm_object_t kernel_object; 121107120Sjulian vm_offset_t i; 122107120Sjulian 123107120Sjulian size = round_page(size); 124107120Sjulian 125107120Sjulian /* 126107120Sjulian * Use the kernel object for wired-down kernel pages. 127107120Sjulian * Assume that no region of the kernel object is 128107120Sjulian * referenced more than once. 129107120Sjulian */ 130107120Sjulian 131107120Sjulian /* 132107120Sjulian * Locate sufficient space in the map. This will give us the 133107120Sjulian * final virtual address for the new memory, and thus will tell 134107120Sjulian * us the offset within the kernel map. 135107120Sjulian */ 136107120Sjulian vm_map_lock(map); 137107120Sjulian if (vm_map_findspace(map, 0, size, &addr)) { 138107120Sjulian vm_map_unlock(map); 139107120Sjulian return (0); 140107120Sjulian } 141107120Sjulian offset = addr - VM_MIN_KERNEL_ADDRESS; 142107120Sjulian vm_object_reference(kernel_object); 143121054Semax vm_map_insert(map, kernel_object, offset, addr, addr + size); 144107120Sjulian vm_map_unlock(map); 145107120Sjulian 146107120Sjulian /* 147107120Sjulian * Guarantee that there are pages already in this object 148107120Sjulian * before calling vm_map_pageable. This is to prevent the 149107120Sjulian * following scenario: 150107120Sjulian * 151107120Sjulian * 1) Threads have swapped out, so that there is a 152107120Sjulian * pager for the kernel_object. 153107120Sjulian * 2) The kmsg zone is empty, and so we are kmem_allocing 154107120Sjulian * a new page for it. 155107120Sjulian * 3) vm_map_pageable calls vm_fault; there is no page, 156107120Sjulian * but there is a pager, so we call 157107120Sjulian * pager_data_request. But the kmsg zone is empty, 158107120Sjulian * so we must kmem_alloc. 159107120Sjulian * 4) goto 1 160114879Sjulian * 5) Even if the kmsg zone is not empty: when we get 161107120Sjulian * the data back from the pager, it will be (very 162107120Sjulian * stale) non-zero data. kmem_alloc is defined to 163107120Sjulian * return zero-filled memory. 164107120Sjulian * 165107120Sjulian * We're intentionally not activating the pages we allocate 166107120Sjulian * to prevent a race with page-out. vm_map_pageable will wire 167107120Sjulian * the pages. 168107120Sjulian */ 169107120Sjulian 170107120Sjulian vm_object_lock(kernel_object); 171107120Sjulian for (i = 0 ; i < size; i+= PAGE_SIZE) { 172107120Sjulian vm_page_t mem; 173107120Sjulian 174107120Sjulian while ((mem = vm_page_alloc(kernel_object, offset+i)) == NULL) { 175107120Sjulian vm_object_unlock(kernel_object); 176107120Sjulian VM_WAIT; 177107120Sjulian vm_object_lock(kernel_object); 178107120Sjulian } 179107120Sjulian vm_page_zero_fill(mem); 180107120Sjulian mem->flags &= ~PG_BUSY; 181107120Sjulian } 182107120Sjulian vm_object_unlock(kernel_object); 183107120Sjulian 184107120Sjulian /* 185107120Sjulian * And finally, mark the data as non-pageable. 186107120Sjulian */ 187107120Sjulian 188107120Sjulian (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE); 189107120Sjulian 190107120Sjulian /* 191107120Sjulian * Try to coalesce the map 192107120Sjulian */ 193107120Sjulian 194107120Sjulian vm_map_simplify(map, addr); 195114879Sjulian 196107120Sjulian return(addr); 197107120Sjulian} 198107120Sjulian 199107120Sjulian/* 200107120Sjulian * kmem_free: 201107120Sjulian * 202107120Sjulian * Release a region of kernel virtual memory allocated 203107120Sjulian * with kmem_alloc, and return the physical pages 204107120Sjulian * associated with that region. 205114879Sjulian */ 206107120Sjulianvoid kmem_free(map, addr, size) 207107120Sjulian vm_map_t map; 208107120Sjulian register vm_offset_t addr; 209107120Sjulian vm_size_t size; 210107120Sjulian{ 211107120Sjulian (void) vm_map_remove(map, trunc_page(addr), round_page(addr + size)); 212107120Sjulian} 213107120Sjulian 214107120Sjulian/* 215107120Sjulian * kmem_suballoc: 216107120Sjulian * 217107120Sjulian * Allocates a map to manage a subrange 218107120Sjulian * of the kernel virtual address space. 219107120Sjulian * 220107120Sjulian * Arguments are as follows: 221107120Sjulian * 222107120Sjulian * parent Map to take range from 223107120Sjulian * size Size of range to find 224107120Sjulian * min, max Returned endpoints of map 225107120Sjulian * pageable Can the region be paged 226107120Sjulian */ 227107120Sjulianvm_map_t kmem_suballoc(parent, min, max, size, pageable) 228107120Sjulian register vm_map_t parent; 229107120Sjulian vm_offset_t *min, *max; 230107120Sjulian register vm_size_t size; 231107120Sjulian boolean_t pageable; 232107120Sjulian{ 233107120Sjulian register int ret; 234107120Sjulian vm_map_t result; 235107120Sjulian 236107120Sjulian size = round_page(size); 237107120Sjulian 238107120Sjulian *min = (vm_offset_t) vm_map_min(parent); 239107120Sjulian ret = vm_map_find(parent, NULL, (vm_offset_t) 0, 240107120Sjulian min, size, TRUE); 241107120Sjulian if (ret != KERN_SUCCESS) { 242121054Semax printf("kmem_suballoc: bad status return of %d.\n", ret); 243107120Sjulian panic("kmem_suballoc"); 244107120Sjulian } 245107120Sjulian *max = *min + size; 246107120Sjulian pmap_reference(vm_map_pmap(parent)); 247121054Semax result = vm_map_create(vm_map_pmap(parent), *min, *max, pageable); 248107120Sjulian if (result == NULL) 249107120Sjulian panic("kmem_suballoc: cannot create submap"); 250107120Sjulian if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS) 251107120Sjulian panic("kmem_suballoc: unable to change range to submap"); 252107120Sjulian return(result); 253107120Sjulian} 254107120Sjulian 255107120Sjulian/* 256107120Sjulian * Allocate wired-down memory in the kernel's address map for the higher 257107120Sjulian * level kernel memory allocator (kern/kern_malloc.c). We cannot use 258107120Sjulian * kmem_alloc() because we may need to allocate memory at interrupt 259107120Sjulian * level where we cannot block (canwait == FALSE). 260107120Sjulian * 261107120Sjulian * This routine has its own private kernel submap (kmem_map) and object 262107120Sjulian * (kmem_object). This, combined with the fact that only malloc uses 263107120Sjulian * this routine, ensures that we will never block in map or object waits. 264107120Sjulian * 265107120Sjulian * Note that this still only works in a uni-processor environment and 266107120Sjulian * when called at splhigh(). 267107120Sjulian * 268107120Sjulian * We don't worry about expanding the map (adding entries) since entries 269107120Sjulian * for wired maps are statically allocated. 270107120Sjulian */ 271107120Sjulianvm_offset_t 272107120Sjuliankmem_malloc(map, size, canwait) 273107120Sjulian register vm_map_t map; 274107120Sjulian register vm_size_t size; 275107120Sjulian boolean_t canwait; 276107120Sjulian{ 277107120Sjulian register vm_offset_t offset, i; 278107120Sjulian vm_map_entry_t entry; 279107120Sjulian vm_offset_t addr; 280107120Sjulian vm_page_t m; 281107120Sjulian extern vm_object_t kmem_object; 282107120Sjulian 283107120Sjulian if (map != kmem_map && map != mb_map) 284107120Sjulian panic("kern_malloc_alloc: map != {kmem,mb}_map"); 285107120Sjulian 286107120Sjulian size = round_page(size); 287107120Sjulian addr = vm_map_min(map); 288107120Sjulian 289107120Sjulian /* 290107120Sjulian * Locate sufficient space in the map. This will give us the 291107120Sjulian * final virtual address for the new memory, and thus will tell 292107120Sjulian * us the offset within the kernel map. 293107120Sjulian */ 294121054Semax vm_map_lock(map); 295107120Sjulian if (vm_map_findspace(map, 0, size, &addr)) { 296107120Sjulian vm_map_unlock(map); 297107120Sjulian#if 0 298107120Sjulian if (canwait) /* XXX should wait */ 299107120Sjulian panic("kmem_malloc: %s too small", 300107120Sjulian map == kmem_map ? "kmem_map" : "mb_map"); 301107120Sjulian#endif 302107120Sjulian if (canwait) 303121054Semax panic("kmem_malloc: map too small"); 304107120Sjulian return (0); 305107120Sjulian } 306107120Sjulian offset = addr - vm_map_min(kmem_map); 307107120Sjulian vm_object_reference(kmem_object); 308107120Sjulian vm_map_insert(map, kmem_object, offset, addr, addr + size); 309107120Sjulian 310107120Sjulian /* 311107120Sjulian * If we can wait, just mark the range as wired 312107120Sjulian * (will fault pages as necessary). 313107120Sjulian */ 314107120Sjulian if (canwait) { 315107120Sjulian vm_map_unlock(map); 316107120Sjulian (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, 317107120Sjulian FALSE); 318107120Sjulian vm_map_simplify(map, addr); 319107120Sjulian return(addr); 320107120Sjulian } 321114879Sjulian 322107120Sjulian /* 323114879Sjulian * If we cannot wait then we must allocate all memory up front, 324107120Sjulian * pulling it off the active queue to prevent pageout. 325107120Sjulian */ 326107120Sjulian vm_object_lock(kmem_object); 327107120Sjulian for (i = 0; i < size; i += PAGE_SIZE) { 328107120Sjulian m = vm_page_alloc(kmem_object, offset + i); 329107120Sjulian 330107120Sjulian /* 331114879Sjulian * Ran out of space, free everything up and return. 332107120Sjulian * Don't need to lock page queues here as we know 333107120Sjulian * that the pages we got aren't on any queues. 334114879Sjulian */ 335107120Sjulian if (m == NULL) { 336114879Sjulian while (i != 0) { 337107120Sjulian i -= PAGE_SIZE; 338114879Sjulian m = vm_page_lookup(kmem_object, offset + i); 339107120Sjulian vm_page_free(m); 340107120Sjulian } 341107120Sjulian vm_object_unlock(kmem_object); 342107120Sjulian vm_map_delete(map, addr, addr + size); 343107120Sjulian vm_map_unlock(map); 344107120Sjulian return(0); 345107120Sjulian } 346107120Sjulian#if 0 347107120Sjulian vm_page_zero_fill(m); 348107120Sjulian#endif 349107120Sjulian m->flags &= ~PG_BUSY; 350107120Sjulian } 351107120Sjulian vm_object_unlock(kmem_object); 352107120Sjulian 353107120Sjulian /* 354107120Sjulian * Mark map entry as non-pageable. 355107120Sjulian * Assert: vm_map_insert() will never be able to extend the previous 356107120Sjulian * entry so there will be a new entry exactly corresponding to this 357107120Sjulian * address range and it will have wired_count == 0. 358107120Sjulian */ 359107120Sjulian if (!vm_map_lookup_entry(map, addr, &entry) || 360107120Sjulian entry->start != addr || entry->end != addr + size || 361114879Sjulian entry->wired_count) 362107120Sjulian panic("kmem_malloc: entry not found or misaligned"); 363114879Sjulian entry->wired_count++; 364107120Sjulian 365114879Sjulian /* 366107120Sjulian * Loop thru pages, entering them in the pmap. 367107120Sjulian * (We cannot add them to the wired count without 368107120Sjulian * wrapping the vm_page_queue_lock in splimp...) 369107120Sjulian */ 370107120Sjulian for (i = 0; i < size; i += PAGE_SIZE) { 371107120Sjulian vm_object_lock(kmem_object); 372107120Sjulian m = vm_page_lookup(kmem_object, offset + i); 373114879Sjulian vm_object_unlock(kmem_object); 374107120Sjulian/* 375107120Sjulian pmap_enter(map->pmap, addr + i, VM_PAGE_TO_PHYS(m), 376114879Sjulian VM_PROT_DEFAULT, TRUE); 377107120Sjulian*/ 378114879Sjulian pmap_kenter( addr + i, VM_PAGE_TO_PHYS(m)); 379107120Sjulian } 380114879Sjulian pmap_update(); 381107120Sjulian vm_map_unlock(map); 382107120Sjulian 383107120Sjulian vm_map_simplify(map, addr); 384107120Sjulian return(addr); 385107120Sjulian} 386107120Sjulian 387107120Sjulian/* 388107120Sjulian * kmem_alloc_wait 389107120Sjulian * 390107120Sjulian * Allocates pageable memory from a sub-map of the kernel. If the submap 391107120Sjulian * has no room, the caller sleeps waiting for more memory in the submap. 392107120Sjulian * 393107120Sjulian */ 394107120Sjulianvm_offset_t kmem_alloc_wait(map, size) 395107120Sjulian vm_map_t map; 396107120Sjulian vm_size_t size; 397107120Sjulian{ 398107120Sjulian vm_offset_t addr; 399107120Sjulian 400107120Sjulian size = round_page(size); 401107120Sjulian 402107120Sjulian for (;;) { 403114879Sjulian /* 404107120Sjulian * To make this work for more than one map, 405114879Sjulian * use the map's lock to lock out sleepers/wakers. 406114879Sjulian */ 407114879Sjulian vm_map_lock(map); 408114879Sjulian if (vm_map_findspace(map, 0, size, &addr) == 0) 409114879Sjulian break; 410114879Sjulian /* no space now; see if we can ever get space */ 411114879Sjulian if (vm_map_max(map) - vm_map_min(map) < size) { 412114879Sjulian vm_map_unlock(map); 413114879Sjulian return (0); 414114879Sjulian } 415114879Sjulian assert_wait((int)map, TRUE); 416114879Sjulian vm_map_unlock(map); 417114879Sjulian thread_block("kmaw"); 418114879Sjulian } 419114879Sjulian vm_map_insert(map, NULL, (vm_offset_t)0, addr, addr + size); 420114879Sjulian vm_map_unlock(map); 421114879Sjulian return (addr); 422114879Sjulian} 423114879Sjulian 424114879Sjulian/* 425114879Sjulian * kmem_free_wakeup 426114879Sjulian * 427114879Sjulian * Returns memory to a submap of the kernel, and wakes up any threads 428114879Sjulian * waiting for memory in that map. 429114879Sjulian */ 430114879Sjulianvoid kmem_free_wakeup(map, addr, size) 431114879Sjulian vm_map_t map; 432114879Sjulian vm_offset_t addr; 433114879Sjulian vm_size_t size; 434114879Sjulian{ 435114879Sjulian vm_map_lock(map); 436114879Sjulian (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size)); 437114879Sjulian thread_wakeup((int)map); 438114879Sjulian vm_map_unlock(map); 439114879Sjulian} 440114879Sjulian 441114879Sjulian/* 442114879Sjulian * Create the kernel map; insert a mapping covering kernel text, data, bss, 443114879Sjulian * and all space allocated thus far (`boostrap' data). The new map will thus 444114879Sjulian * map the range between VM_MIN_KERNEL_ADDRESS and `start' as allocated, and 445114879Sjulian * the range between `start' and `end' as free. 446114879Sjulian */ 447158834Smarkusvoid kmem_init(start, end) 448158834Smarkus vm_offset_t start, end; 449158834Smarkus{ 450158834Smarkus register vm_map_t m; 451158834Smarkus 452158834Smarkus m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end, FALSE); 453158834Smarkus vm_map_lock(m); 454158834Smarkus /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ 455158834Smarkus kernel_map = m; 456158834Smarkus (void) vm_map_insert(m, NULL, (vm_offset_t)0, 457158834Smarkus VM_MIN_KERNEL_ADDRESS, start); 458158834Smarkus /* ... and ending with the completion of the above `insert' */ 459158834Smarkus vm_map_unlock(m); 460158834Smarkus} 461158834Smarkus