vm_kern.c revision 1887
11541Srgrimes/* 21541Srgrimes * Copyright (c) 1991, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * This code is derived from software contributed to Berkeley by 61541Srgrimes * The Mach Operating System project at Carnegie-Mellon University. 71541Srgrimes * 81541Srgrimes * Redistribution and use in source and binary forms, with or without 91541Srgrimes * modification, are permitted provided that the following conditions 101541Srgrimes * are met: 111541Srgrimes * 1. Redistributions of source code must retain the above copyright 121541Srgrimes * notice, this list of conditions and the following disclaimer. 131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer in the 151541Srgrimes * documentation and/or other materials provided with the distribution. 161541Srgrimes * 3. All advertising materials mentioning features or use of this software 171541Srgrimes * must display the following acknowledgement: 181541Srgrimes * This product includes software developed by the University of 191541Srgrimes * California, Berkeley and its contributors. 201541Srgrimes * 4. Neither the name of the University nor the names of its contributors 211541Srgrimes * may be used to endorse or promote products derived from this software 221541Srgrimes * without specific prior written permission. 231541Srgrimes * 241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 271541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 291541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 301541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 321541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 341541Srgrimes * SUCH DAMAGE. 351541Srgrimes * 361817Sdg * from: @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 371541Srgrimes * 381541Srgrimes * 391541Srgrimes * Copyright (c) 1987, 1990 Carnegie-Mellon University. 401541Srgrimes * All rights reserved. 411541Srgrimes * 421541Srgrimes * Authors: Avadis Tevanian, Jr., Michael Wayne Young 431541Srgrimes * 441541Srgrimes * Permission to use, copy, modify and distribute this software and 451541Srgrimes * its documentation is hereby granted, provided that both the copyright 461541Srgrimes * notice and this permission notice appear in all copies of the 471541Srgrimes * software, derivative works or modified versions, and any portions 481541Srgrimes * thereof, and that both notices appear in supporting documentation. 491541Srgrimes * 501541Srgrimes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 511541Srgrimes * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 521541Srgrimes * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 531541Srgrimes * 541541Srgrimes * Carnegie Mellon requests users of this software to return to 551541Srgrimes * 561541Srgrimes * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 571541Srgrimes * School of Computer Science 581541Srgrimes * Carnegie Mellon University 591541Srgrimes * Pittsburgh PA 15213-3890 601541Srgrimes * 611541Srgrimes * any improvements or extensions that they make and grant Carnegie the 621541Srgrimes * rights to redistribute these changes. 631817Sdg * 641887Sdg * $Id: vm_kern.c,v 1.3 1994/08/02 07:55:22 davidg Exp $ 651541Srgrimes */ 661541Srgrimes 671541Srgrimes/* 681541Srgrimes * Kernel memory management. 691541Srgrimes */ 701541Srgrimes 711541Srgrimes#include <sys/param.h> 721541Srgrimes#include <sys/systm.h> 731541Srgrimes 741541Srgrimes#include <vm/vm.h> 751541Srgrimes#include <vm/vm_page.h> 761541Srgrimes#include <vm/vm_pageout.h> 771541Srgrimes#include <vm/vm_kern.h> 781541Srgrimes 791541Srgrimes/* 801541Srgrimes * kmem_alloc_pageable: 811541Srgrimes * 821541Srgrimes * Allocate pageable memory to the kernel's address map. 831541Srgrimes * map must be "kernel_map" below. 841541Srgrimes */ 851541Srgrimes 861541Srgrimesvm_offset_t kmem_alloc_pageable(map, size) 871541Srgrimes vm_map_t map; 881541Srgrimes register vm_size_t size; 891541Srgrimes{ 901541Srgrimes vm_offset_t addr; 911541Srgrimes register int result; 921541Srgrimes 931541Srgrimes#if 0 941541Srgrimes if (map != kernel_map) 951541Srgrimes panic("kmem_alloc_pageable: not called with kernel_map"); 961541Srgrimes#endif 971541Srgrimes 981541Srgrimes size = round_page(size); 991541Srgrimes 1001541Srgrimes addr = vm_map_min(map); 1011541Srgrimes result = vm_map_find(map, NULL, (vm_offset_t) 0, 1021541Srgrimes &addr, size, TRUE); 1031541Srgrimes if (result != KERN_SUCCESS) { 1041541Srgrimes return(0); 1051541Srgrimes } 1061541Srgrimes 1071541Srgrimes return(addr); 1081541Srgrimes} 1091541Srgrimes 1101541Srgrimes/* 1111541Srgrimes * Allocate wired-down memory in the kernel's address map 1121541Srgrimes * or a submap. 1131541Srgrimes */ 1141541Srgrimesvm_offset_t kmem_alloc(map, size) 1151541Srgrimes register vm_map_t map; 1161541Srgrimes register vm_size_t size; 1171541Srgrimes{ 1181541Srgrimes vm_offset_t addr; 1191541Srgrimes register vm_offset_t offset; 1201541Srgrimes extern vm_object_t kernel_object; 1211541Srgrimes vm_offset_t i; 1221541Srgrimes 1231541Srgrimes size = round_page(size); 1241541Srgrimes 1251541Srgrimes /* 1261541Srgrimes * Use the kernel object for wired-down kernel pages. 1271541Srgrimes * Assume that no region of the kernel object is 1281541Srgrimes * referenced more than once. 1291541Srgrimes */ 1301541Srgrimes 1311541Srgrimes /* 1321541Srgrimes * Locate sufficient space in the map. This will give us the 1331541Srgrimes * final virtual address for the new memory, and thus will tell 1341541Srgrimes * us the offset within the kernel map. 1351541Srgrimes */ 1361541Srgrimes vm_map_lock(map); 1371541Srgrimes if (vm_map_findspace(map, 0, size, &addr)) { 1381541Srgrimes vm_map_unlock(map); 1391541Srgrimes return (0); 1401541Srgrimes } 1411541Srgrimes offset = addr - VM_MIN_KERNEL_ADDRESS; 1421541Srgrimes vm_object_reference(kernel_object); 1431541Srgrimes vm_map_insert(map, kernel_object, offset, addr, addr + size); 1441541Srgrimes vm_map_unlock(map); 1451541Srgrimes 1461541Srgrimes /* 1471541Srgrimes * Guarantee that there are pages already in this object 1481541Srgrimes * before calling vm_map_pageable. This is to prevent the 1491541Srgrimes * following scenario: 1501541Srgrimes * 1511541Srgrimes * 1) Threads have swapped out, so that there is a 1521541Srgrimes * pager for the kernel_object. 1531541Srgrimes * 2) The kmsg zone is empty, and so we are kmem_allocing 1541541Srgrimes * a new page for it. 1551541Srgrimes * 3) vm_map_pageable calls vm_fault; there is no page, 1561541Srgrimes * but there is a pager, so we call 1571541Srgrimes * pager_data_request. But the kmsg zone is empty, 1581541Srgrimes * so we must kmem_alloc. 1591541Srgrimes * 4) goto 1 1601541Srgrimes * 5) Even if the kmsg zone is not empty: when we get 1611541Srgrimes * the data back from the pager, it will be (very 1621541Srgrimes * stale) non-zero data. kmem_alloc is defined to 1631541Srgrimes * return zero-filled memory. 1641541Srgrimes * 1651541Srgrimes * We're intentionally not activating the pages we allocate 1661541Srgrimes * to prevent a race with page-out. vm_map_pageable will wire 1671541Srgrimes * the pages. 1681541Srgrimes */ 1691541Srgrimes 1701541Srgrimes vm_object_lock(kernel_object); 1711541Srgrimes for (i = 0 ; i < size; i+= PAGE_SIZE) { 1721541Srgrimes vm_page_t mem; 1731541Srgrimes 1741541Srgrimes while ((mem = vm_page_alloc(kernel_object, offset+i)) == NULL) { 1751541Srgrimes vm_object_unlock(kernel_object); 1761541Srgrimes VM_WAIT; 1771541Srgrimes vm_object_lock(kernel_object); 1781541Srgrimes } 1791541Srgrimes vm_page_zero_fill(mem); 1801541Srgrimes mem->flags &= ~PG_BUSY; 1811541Srgrimes } 1821541Srgrimes vm_object_unlock(kernel_object); 1831541Srgrimes 1841541Srgrimes /* 1851541Srgrimes * And finally, mark the data as non-pageable. 1861541Srgrimes */ 1871541Srgrimes 1881541Srgrimes (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE); 1891541Srgrimes 1901541Srgrimes /* 1911541Srgrimes * Try to coalesce the map 1921541Srgrimes */ 1931541Srgrimes 1941541Srgrimes vm_map_simplify(map, addr); 1951541Srgrimes 1961541Srgrimes return(addr); 1971541Srgrimes} 1981541Srgrimes 1991541Srgrimes/* 2001541Srgrimes * kmem_free: 2011541Srgrimes * 2021541Srgrimes * Release a region of kernel virtual memory allocated 2031541Srgrimes * with kmem_alloc, and return the physical pages 2041541Srgrimes * associated with that region. 2051541Srgrimes */ 2061541Srgrimesvoid kmem_free(map, addr, size) 2071541Srgrimes vm_map_t map; 2081541Srgrimes register vm_offset_t addr; 2091541Srgrimes vm_size_t size; 2101541Srgrimes{ 2111541Srgrimes (void) vm_map_remove(map, trunc_page(addr), round_page(addr + size)); 2121541Srgrimes} 2131541Srgrimes 2141541Srgrimes/* 2151541Srgrimes * kmem_suballoc: 2161541Srgrimes * 2171541Srgrimes * Allocates a map to manage a subrange 2181541Srgrimes * of the kernel virtual address space. 2191541Srgrimes * 2201541Srgrimes * Arguments are as follows: 2211541Srgrimes * 2221541Srgrimes * parent Map to take range from 2231541Srgrimes * size Size of range to find 2241541Srgrimes * min, max Returned endpoints of map 2251541Srgrimes * pageable Can the region be paged 2261541Srgrimes */ 2271541Srgrimesvm_map_t kmem_suballoc(parent, min, max, size, pageable) 2281541Srgrimes register vm_map_t parent; 2291541Srgrimes vm_offset_t *min, *max; 2301541Srgrimes register vm_size_t size; 2311541Srgrimes boolean_t pageable; 2321541Srgrimes{ 2331541Srgrimes register int ret; 2341541Srgrimes vm_map_t result; 2351541Srgrimes 2361541Srgrimes size = round_page(size); 2371541Srgrimes 2381541Srgrimes *min = (vm_offset_t) vm_map_min(parent); 2391541Srgrimes ret = vm_map_find(parent, NULL, (vm_offset_t) 0, 2401541Srgrimes min, size, TRUE); 2411541Srgrimes if (ret != KERN_SUCCESS) { 2421541Srgrimes printf("kmem_suballoc: bad status return of %d.\n", ret); 2431541Srgrimes panic("kmem_suballoc"); 2441541Srgrimes } 2451541Srgrimes *max = *min + size; 2461541Srgrimes pmap_reference(vm_map_pmap(parent)); 2471541Srgrimes result = vm_map_create(vm_map_pmap(parent), *min, *max, pageable); 2481541Srgrimes if (result == NULL) 2491541Srgrimes panic("kmem_suballoc: cannot create submap"); 2501541Srgrimes if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS) 2511541Srgrimes panic("kmem_suballoc: unable to change range to submap"); 2521541Srgrimes return(result); 2531541Srgrimes} 2541541Srgrimes 2551541Srgrimes/* 2561541Srgrimes * Allocate wired-down memory in the kernel's address map for the higher 2571541Srgrimes * level kernel memory allocator (kern/kern_malloc.c). We cannot use 2581541Srgrimes * kmem_alloc() because we may need to allocate memory at interrupt 2591541Srgrimes * level where we cannot block (canwait == FALSE). 2601541Srgrimes * 2611541Srgrimes * This routine has its own private kernel submap (kmem_map) and object 2621541Srgrimes * (kmem_object). This, combined with the fact that only malloc uses 2631541Srgrimes * this routine, ensures that we will never block in map or object waits. 2641541Srgrimes * 2651541Srgrimes * Note that this still only works in a uni-processor environment and 2661541Srgrimes * when called at splhigh(). 2671541Srgrimes * 2681541Srgrimes * We don't worry about expanding the map (adding entries) since entries 2691541Srgrimes * for wired maps are statically allocated. 2701541Srgrimes */ 2711541Srgrimesvm_offset_t 2721541Srgrimeskmem_malloc(map, size, canwait) 2731541Srgrimes register vm_map_t map; 2741541Srgrimes register vm_size_t size; 2751541Srgrimes boolean_t canwait; 2761541Srgrimes{ 2771541Srgrimes register vm_offset_t offset, i; 2781541Srgrimes vm_map_entry_t entry; 2791541Srgrimes vm_offset_t addr; 2801541Srgrimes vm_page_t m; 2811541Srgrimes extern vm_object_t kmem_object; 2821541Srgrimes 2831541Srgrimes if (map != kmem_map && map != mb_map) 2841541Srgrimes panic("kern_malloc_alloc: map != {kmem,mb}_map"); 2851541Srgrimes 2861541Srgrimes size = round_page(size); 2871541Srgrimes addr = vm_map_min(map); 2881541Srgrimes 2891541Srgrimes /* 2901541Srgrimes * Locate sufficient space in the map. This will give us the 2911541Srgrimes * final virtual address for the new memory, and thus will tell 2921541Srgrimes * us the offset within the kernel map. 2931541Srgrimes */ 2941541Srgrimes vm_map_lock(map); 2951541Srgrimes if (vm_map_findspace(map, 0, size, &addr)) { 2961541Srgrimes vm_map_unlock(map); 2971549Srgrimes#if 0 2981541Srgrimes if (canwait) /* XXX should wait */ 2991541Srgrimes panic("kmem_malloc: %s too small", 3001541Srgrimes map == kmem_map ? "kmem_map" : "mb_map"); 3011549Srgrimes#endif 3021549Srgrimes if (canwait) 3031549Srgrimes panic("kmem_malloc: map too small"); 3041541Srgrimes return (0); 3051541Srgrimes } 3061541Srgrimes offset = addr - vm_map_min(kmem_map); 3071541Srgrimes vm_object_reference(kmem_object); 3081541Srgrimes vm_map_insert(map, kmem_object, offset, addr, addr + size); 3091541Srgrimes 3101541Srgrimes /* 3111541Srgrimes * If we can wait, just mark the range as wired 3121541Srgrimes * (will fault pages as necessary). 3131541Srgrimes */ 3141541Srgrimes if (canwait) { 3151541Srgrimes vm_map_unlock(map); 3161541Srgrimes (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, 3171541Srgrimes FALSE); 3181541Srgrimes vm_map_simplify(map, addr); 3191541Srgrimes return(addr); 3201541Srgrimes } 3211541Srgrimes 3221541Srgrimes /* 3231541Srgrimes * If we cannot wait then we must allocate all memory up front, 3241541Srgrimes * pulling it off the active queue to prevent pageout. 3251541Srgrimes */ 3261541Srgrimes vm_object_lock(kmem_object); 3271541Srgrimes for (i = 0; i < size; i += PAGE_SIZE) { 3281541Srgrimes m = vm_page_alloc(kmem_object, offset + i); 3291541Srgrimes 3301541Srgrimes /* 3311541Srgrimes * Ran out of space, free everything up and return. 3321541Srgrimes * Don't need to lock page queues here as we know 3331541Srgrimes * that the pages we got aren't on any queues. 3341541Srgrimes */ 3351541Srgrimes if (m == NULL) { 3361541Srgrimes while (i != 0) { 3371541Srgrimes i -= PAGE_SIZE; 3381541Srgrimes m = vm_page_lookup(kmem_object, offset + i); 3391541Srgrimes vm_page_free(m); 3401541Srgrimes } 3411541Srgrimes vm_object_unlock(kmem_object); 3421541Srgrimes vm_map_delete(map, addr, addr + size); 3431541Srgrimes vm_map_unlock(map); 3441541Srgrimes return(0); 3451541Srgrimes } 3461541Srgrimes#if 0 3471541Srgrimes vm_page_zero_fill(m); 3481541Srgrimes#endif 3491541Srgrimes m->flags &= ~PG_BUSY; 3501541Srgrimes } 3511541Srgrimes vm_object_unlock(kmem_object); 3521541Srgrimes 3531541Srgrimes /* 3541541Srgrimes * Mark map entry as non-pageable. 3551541Srgrimes * Assert: vm_map_insert() will never be able to extend the previous 3561541Srgrimes * entry so there will be a new entry exactly corresponding to this 3571541Srgrimes * address range and it will have wired_count == 0. 3581541Srgrimes */ 3591541Srgrimes if (!vm_map_lookup_entry(map, addr, &entry) || 3601541Srgrimes entry->start != addr || entry->end != addr + size || 3611541Srgrimes entry->wired_count) 3621541Srgrimes panic("kmem_malloc: entry not found or misaligned"); 3631541Srgrimes entry->wired_count++; 3641541Srgrimes 3651541Srgrimes /* 3661541Srgrimes * Loop thru pages, entering them in the pmap. 3671541Srgrimes * (We cannot add them to the wired count without 3681541Srgrimes * wrapping the vm_page_queue_lock in splimp...) 3691541Srgrimes */ 3701541Srgrimes for (i = 0; i < size; i += PAGE_SIZE) { 3711541Srgrimes vm_object_lock(kmem_object); 3721541Srgrimes m = vm_page_lookup(kmem_object, offset + i); 3731541Srgrimes vm_object_unlock(kmem_object); 3741887Sdg/* 3751541Srgrimes pmap_enter(map->pmap, addr + i, VM_PAGE_TO_PHYS(m), 3761541Srgrimes VM_PROT_DEFAULT, TRUE); 3771887Sdg*/ 3781887Sdg pmap_kenter( addr + i, VM_PAGE_TO_PHYS(m)); 3791541Srgrimes } 3801887Sdg pmap_update(); 3811541Srgrimes vm_map_unlock(map); 3821541Srgrimes 3831541Srgrimes vm_map_simplify(map, addr); 3841541Srgrimes return(addr); 3851541Srgrimes} 3861541Srgrimes 3871541Srgrimes/* 3881541Srgrimes * kmem_alloc_wait 3891541Srgrimes * 3901541Srgrimes * Allocates pageable memory from a sub-map of the kernel. If the submap 3911541Srgrimes * has no room, the caller sleeps waiting for more memory in the submap. 3921541Srgrimes * 3931541Srgrimes */ 3941541Srgrimesvm_offset_t kmem_alloc_wait(map, size) 3951541Srgrimes vm_map_t map; 3961541Srgrimes vm_size_t size; 3971541Srgrimes{ 3981541Srgrimes vm_offset_t addr; 3991541Srgrimes 4001541Srgrimes size = round_page(size); 4011541Srgrimes 4021541Srgrimes for (;;) { 4031541Srgrimes /* 4041541Srgrimes * To make this work for more than one map, 4051541Srgrimes * use the map's lock to lock out sleepers/wakers. 4061541Srgrimes */ 4071541Srgrimes vm_map_lock(map); 4081541Srgrimes if (vm_map_findspace(map, 0, size, &addr) == 0) 4091541Srgrimes break; 4101541Srgrimes /* no space now; see if we can ever get space */ 4111541Srgrimes if (vm_map_max(map) - vm_map_min(map) < size) { 4121541Srgrimes vm_map_unlock(map); 4131541Srgrimes return (0); 4141541Srgrimes } 4151541Srgrimes assert_wait((int)map, TRUE); 4161541Srgrimes vm_map_unlock(map); 4171549Srgrimes thread_block("kmaw"); 4181541Srgrimes } 4191541Srgrimes vm_map_insert(map, NULL, (vm_offset_t)0, addr, addr + size); 4201541Srgrimes vm_map_unlock(map); 4211541Srgrimes return (addr); 4221541Srgrimes} 4231541Srgrimes 4241541Srgrimes/* 4251541Srgrimes * kmem_free_wakeup 4261541Srgrimes * 4271541Srgrimes * Returns memory to a submap of the kernel, and wakes up any threads 4281541Srgrimes * waiting for memory in that map. 4291541Srgrimes */ 4301541Srgrimesvoid kmem_free_wakeup(map, addr, size) 4311541Srgrimes vm_map_t map; 4321541Srgrimes vm_offset_t addr; 4331541Srgrimes vm_size_t size; 4341541Srgrimes{ 4351541Srgrimes vm_map_lock(map); 4361541Srgrimes (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size)); 4371541Srgrimes thread_wakeup((int)map); 4381541Srgrimes vm_map_unlock(map); 4391541Srgrimes} 4401541Srgrimes 4411541Srgrimes/* 4421541Srgrimes * Create the kernel map; insert a mapping covering kernel text, data, bss, 4431541Srgrimes * and all space allocated thus far (`boostrap' data). The new map will thus 4441541Srgrimes * map the range between VM_MIN_KERNEL_ADDRESS and `start' as allocated, and 4451541Srgrimes * the range between `start' and `end' as free. 4461541Srgrimes */ 4471541Srgrimesvoid kmem_init(start, end) 4481541Srgrimes vm_offset_t start, end; 4491541Srgrimes{ 4501541Srgrimes register vm_map_t m; 4511541Srgrimes 4521541Srgrimes m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end, FALSE); 4531541Srgrimes vm_map_lock(m); 4541541Srgrimes /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ 4551541Srgrimes kernel_map = m; 4561541Srgrimes (void) vm_map_insert(m, NULL, (vm_offset_t)0, 4571541Srgrimes VM_MIN_KERNEL_ADDRESS, start); 4581541Srgrimes /* ... and ending with the completion of the above `insert' */ 4591541Srgrimes vm_map_unlock(m); 4601541Srgrimes} 461