vm_kern.c revision 20993
15455Sdg/* 21541Srgrimes * Copyright (c) 1991, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * This code is derived from software contributed to Berkeley by 61541Srgrimes * The Mach Operating System project at Carnegie-Mellon University. 71541Srgrimes * 81541Srgrimes * Redistribution and use in source and binary forms, with or without 91541Srgrimes * modification, are permitted provided that the following conditions 101541Srgrimes * are met: 111541Srgrimes * 1. Redistributions of source code must retain the above copyright 121541Srgrimes * notice, this list of conditions and the following disclaimer. 131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer in the 151541Srgrimes * documentation and/or other materials provided with the distribution. 161541Srgrimes * 3. All advertising materials mentioning features or use of this software 171541Srgrimes * must display the following acknowledgement: 181541Srgrimes * This product includes software developed by the University of 191541Srgrimes * California, Berkeley and its contributors. 201541Srgrimes * 4. Neither the name of the University nor the names of its contributors 211541Srgrimes * may be used to endorse or promote products derived from this software 221541Srgrimes * without specific prior written permission. 231541Srgrimes * 241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 271541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 291541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 301541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 321541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 341541Srgrimes * SUCH DAMAGE. 351541Srgrimes * 361817Sdg * from: @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 371541Srgrimes * 381541Srgrimes * 391541Srgrimes * Copyright (c) 1987, 1990 Carnegie-Mellon University. 401541Srgrimes * All rights reserved. 411541Srgrimes * 421541Srgrimes * Authors: Avadis Tevanian, Jr., Michael Wayne Young 435455Sdg * 441541Srgrimes * Permission to use, copy, modify and distribute this software and 451541Srgrimes * its documentation is hereby granted, provided that both the copyright 461541Srgrimes * notice and this permission notice appear in all copies of the 471541Srgrimes * software, derivative works or modified versions, and any portions 481541Srgrimes * thereof, and that both notices appear in supporting documentation. 495455Sdg * 505455Sdg * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 515455Sdg * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 521541Srgrimes * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 535455Sdg * 541541Srgrimes * Carnegie Mellon requests users of this software to return to 551541Srgrimes * 561541Srgrimes * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 571541Srgrimes * School of Computer Science 581541Srgrimes * Carnegie Mellon University 591541Srgrimes * Pittsburgh PA 15213-3890 601541Srgrimes * 611541Srgrimes * any improvements or extensions that they make and grant Carnegie the 621541Srgrimes * rights to redistribute these changes. 631817Sdg * 6420993Sdyson * $Id: vm_kern.c,v 1.28 1996/11/17 02:38:29 dyson Exp $ 651541Srgrimes */ 661541Srgrimes 671541Srgrimes/* 681541Srgrimes * Kernel memory management. 691541Srgrimes */ 701541Srgrimes 711541Srgrimes#include <sys/param.h> 721541Srgrimes#include <sys/systm.h> 732112Swollman#include <sys/kernel.h> 742112Swollman#include <sys/proc.h> 756129Sdg#include <sys/malloc.h> 767066Sdg#include <sys/syslog.h> 7712662Sdg#include <sys/queue.h> 7812662Sdg#include <sys/vmmeter.h> 791541Srgrimes 801541Srgrimes#include <vm/vm.h> 8112662Sdg#include <vm/vm_param.h> 8212662Sdg#include <vm/vm_prot.h> 8312662Sdg#include <vm/lock.h> 8412662Sdg#include <vm/pmap.h> 8512662Sdg#include <vm/vm_map.h> 8612662Sdg#include <vm/vm_object.h> 871541Srgrimes#include <vm/vm_page.h> 881541Srgrimes#include <vm/vm_pageout.h> 891541Srgrimes#include <vm/vm_kern.h> 9012726Sbde#include <vm/vm_extern.h> 911541Srgrimes 9219830Sdysonvm_map_t kernel_map=0; 9319830Sdysonvm_map_t kmem_map=0; 9419830Sdysonvm_map_t exec_map=0; 9519830Sdysonvm_map_t exech_map=0; 9619830Sdysonvm_map_t clean_map=0; 9719830Sdysonvm_map_t u_map=0; 9819830Sdysonvm_map_t buffer_map=0; 9919830Sdysonvm_map_t mb_map=0; 10019830Sdysonint mb_map_full=0; 10119830Sdysonvm_map_t mcl_map=0; 10219830Sdysonint mcl_map_full=0; 10319830Sdysonvm_map_t io_map=0; 10419830Sdysonvm_map_t phys_map=0; 1052112Swollman 1061541Srgrimes/* 1071541Srgrimes * kmem_alloc_pageable: 1081541Srgrimes * 1091541Srgrimes * Allocate pageable memory to the kernel's address map. 11012259Sdg * "map" must be kernel_map or a submap of kernel_map. 1111541Srgrimes */ 1121541Srgrimes 1138876Srgrimesvm_offset_t 1145455Sdgkmem_alloc_pageable(map, size) 1155455Sdg vm_map_t map; 1165455Sdg register vm_size_t size; 1171541Srgrimes{ 1185455Sdg vm_offset_t addr; 1195455Sdg register int result; 1201541Srgrimes 1211541Srgrimes size = round_page(size); 1221541Srgrimes addr = vm_map_min(map); 1231541Srgrimes result = vm_map_find(map, NULL, (vm_offset_t) 0, 12413490Sdyson &addr, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, 0); 1251541Srgrimes if (result != KERN_SUCCESS) { 1265455Sdg return (0); 1271541Srgrimes } 1285455Sdg return (addr); 1291541Srgrimes} 1301541Srgrimes 1311541Srgrimes/* 1321541Srgrimes * Allocate wired-down memory in the kernel's address map 1331541Srgrimes * or a submap. 1341541Srgrimes */ 1358876Srgrimesvm_offset_t 1365455Sdgkmem_alloc(map, size) 1375455Sdg register vm_map_t map; 1385455Sdg register vm_size_t size; 1391541Srgrimes{ 1405455Sdg vm_offset_t addr; 1415455Sdg register vm_offset_t offset; 1425455Sdg vm_offset_t i; 1431541Srgrimes 1441541Srgrimes size = round_page(size); 1451541Srgrimes 1461541Srgrimes /* 1475455Sdg * Use the kernel object for wired-down kernel pages. Assume that no 1485455Sdg * region of the kernel object is referenced more than once. 1491541Srgrimes */ 1501541Srgrimes 1511541Srgrimes /* 1525455Sdg * Locate sufficient space in the map. This will give us the final 1535455Sdg * virtual address for the new memory, and thus will tell us the 1545455Sdg * offset within the kernel map. 1551541Srgrimes */ 1561541Srgrimes vm_map_lock(map); 1571541Srgrimes if (vm_map_findspace(map, 0, size, &addr)) { 1581541Srgrimes vm_map_unlock(map); 1591541Srgrimes return (0); 1601541Srgrimes } 1611541Srgrimes offset = addr - VM_MIN_KERNEL_ADDRESS; 1621541Srgrimes vm_object_reference(kernel_object); 16313490Sdyson vm_map_insert(map, kernel_object, offset, addr, addr + size, 16413490Sdyson VM_PROT_ALL, VM_PROT_ALL, 0); 1651541Srgrimes vm_map_unlock(map); 1661541Srgrimes 1671541Srgrimes /* 1685455Sdg * Guarantee that there are pages already in this object before 1695455Sdg * calling vm_map_pageable. This is to prevent the following 1705455Sdg * scenario: 1718876Srgrimes * 1725455Sdg * 1) Threads have swapped out, so that there is a pager for the 1735455Sdg * kernel_object. 2) The kmsg zone is empty, and so we are 1745455Sdg * kmem_allocing a new page for it. 3) vm_map_pageable calls vm_fault; 1755455Sdg * there is no page, but there is a pager, so we call 1765455Sdg * pager_data_request. But the kmsg zone is empty, so we must 1775455Sdg * kmem_alloc. 4) goto 1 5) Even if the kmsg zone is not empty: when 1785455Sdg * we get the data back from the pager, it will be (very stale) 1795455Sdg * non-zero data. kmem_alloc is defined to return zero-filled memory. 1808876Srgrimes * 1815455Sdg * We're intentionally not activating the pages we allocate to prevent a 1825455Sdg * race with page-out. vm_map_pageable will wire the pages. 1831541Srgrimes */ 1841541Srgrimes 1855455Sdg for (i = 0; i < size; i += PAGE_SIZE) { 1865455Sdg vm_page_t mem; 1871541Srgrimes 18812767Sdyson while ((mem = vm_page_alloc(kernel_object, 18913490Sdyson OFF_TO_IDX(offset + i), VM_ALLOC_ZERO)) == NULL) { 1901541Srgrimes VM_WAIT; 1911541Srgrimes } 19210548Sdyson if ((mem->flags & PG_ZERO) == 0) 19310548Sdyson vm_page_zero_fill(mem); 19410548Sdyson mem->flags &= ~(PG_BUSY|PG_ZERO); 1956585Sdg mem->valid = VM_PAGE_BITS_ALL; 1961541Srgrimes } 1975455Sdg 1981541Srgrimes /* 1995455Sdg * And finally, mark the data as non-pageable. 2001541Srgrimes */ 2011541Srgrimes 2021541Srgrimes (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE); 2031541Srgrimes 2045455Sdg return (addr); 2051541Srgrimes} 2061541Srgrimes 2071541Srgrimes/* 2081541Srgrimes * kmem_free: 2091541Srgrimes * 2101541Srgrimes * Release a region of kernel virtual memory allocated 2111541Srgrimes * with kmem_alloc, and return the physical pages 2121541Srgrimes * associated with that region. 2131541Srgrimes */ 2148876Srgrimesvoid 2155455Sdgkmem_free(map, addr, size) 2165455Sdg vm_map_t map; 2175455Sdg register vm_offset_t addr; 2185455Sdg vm_size_t size; 2191541Srgrimes{ 2201541Srgrimes (void) vm_map_remove(map, trunc_page(addr), round_page(addr + size)); 2211541Srgrimes} 2221541Srgrimes 2231541Srgrimes/* 2241541Srgrimes * kmem_suballoc: 2251541Srgrimes * 2261541Srgrimes * Allocates a map to manage a subrange 2271541Srgrimes * of the kernel virtual address space. 2281541Srgrimes * 2291541Srgrimes * Arguments are as follows: 2301541Srgrimes * 2311541Srgrimes * parent Map to take range from 2321541Srgrimes * size Size of range to find 2331541Srgrimes * min, max Returned endpoints of map 2341541Srgrimes * pageable Can the region be paged 2351541Srgrimes */ 2368876Srgrimesvm_map_t 2375455Sdgkmem_suballoc(parent, min, max, size, pageable) 2385455Sdg register vm_map_t parent; 2395455Sdg vm_offset_t *min, *max; 2405455Sdg register vm_size_t size; 2415455Sdg boolean_t pageable; 2421541Srgrimes{ 2435455Sdg register int ret; 2445455Sdg vm_map_t result; 2451541Srgrimes 2461541Srgrimes size = round_page(size); 2471541Srgrimes 2481541Srgrimes *min = (vm_offset_t) vm_map_min(parent); 2491541Srgrimes ret = vm_map_find(parent, NULL, (vm_offset_t) 0, 25013490Sdyson min, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, 0); 2511541Srgrimes if (ret != KERN_SUCCESS) { 2521541Srgrimes printf("kmem_suballoc: bad status return of %d.\n", ret); 2531541Srgrimes panic("kmem_suballoc"); 2541541Srgrimes } 2551541Srgrimes *max = *min + size; 2561541Srgrimes pmap_reference(vm_map_pmap(parent)); 2571541Srgrimes result = vm_map_create(vm_map_pmap(parent), *min, *max, pageable); 2581541Srgrimes if (result == NULL) 2591541Srgrimes panic("kmem_suballoc: cannot create submap"); 2601541Srgrimes if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS) 2611541Srgrimes panic("kmem_suballoc: unable to change range to submap"); 2625455Sdg return (result); 2631541Srgrimes} 2641541Srgrimes 2651541Srgrimes/* 2661541Srgrimes * Allocate wired-down memory in the kernel's address map for the higher 2671541Srgrimes * level kernel memory allocator (kern/kern_malloc.c). We cannot use 2681541Srgrimes * kmem_alloc() because we may need to allocate memory at interrupt 2691541Srgrimes * level where we cannot block (canwait == FALSE). 2701541Srgrimes * 2711541Srgrimes * This routine has its own private kernel submap (kmem_map) and object 2721541Srgrimes * (kmem_object). This, combined with the fact that only malloc uses 2731541Srgrimes * this routine, ensures that we will never block in map or object waits. 2741541Srgrimes * 2751541Srgrimes * Note that this still only works in a uni-processor environment and 2761541Srgrimes * when called at splhigh(). 2771541Srgrimes * 2781541Srgrimes * We don't worry about expanding the map (adding entries) since entries 2791541Srgrimes * for wired maps are statically allocated. 2801541Srgrimes */ 2811541Srgrimesvm_offset_t 2826129Sdgkmem_malloc(map, size, waitflag) 2835455Sdg register vm_map_t map; 2845455Sdg register vm_size_t size; 2856129Sdg boolean_t waitflag; 2861541Srgrimes{ 2875455Sdg register vm_offset_t offset, i; 2885455Sdg vm_map_entry_t entry; 2895455Sdg vm_offset_t addr; 2905455Sdg vm_page_t m; 2911541Srgrimes 29215722Swollman if (map != kmem_map && map != mb_map && map != mcl_map) 29315722Swollman panic("kmem_malloc: map != {kmem,mb,mcl}_map"); 2941541Srgrimes 2951541Srgrimes size = round_page(size); 2961541Srgrimes addr = vm_map_min(map); 2971541Srgrimes 2981541Srgrimes /* 2995455Sdg * Locate sufficient space in the map. This will give us the final 3005455Sdg * virtual address for the new memory, and thus will tell us the 3015455Sdg * offset within the kernel map. 3021541Srgrimes */ 3031541Srgrimes vm_map_lock(map); 3041541Srgrimes if (vm_map_findspace(map, 0, size, &addr)) { 3051541Srgrimes vm_map_unlock(map); 3067066Sdg if (map == mb_map) { 3077066Sdg mb_map_full = TRUE; 30815722Swollman log(LOG_ERR, "Out of mbufs - increase maxusers!\n"); 3097066Sdg return (0); 3107066Sdg } 31115722Swollman if (map == mcl_map) { 31215722Swollman mcl_map_full = TRUE; 31315722Swollman log(LOG_ERR, 31415722Swollman "Out of mbuf clusters - increase maxusers!\n"); 31515722Swollman return (0); 31615722Swollman } 3176129Sdg if (waitflag == M_WAITOK) 3187066Sdg panic("kmem_malloc: kmem_map too small"); 3191541Srgrimes return (0); 3201541Srgrimes } 32115367Sdyson offset = addr - VM_MIN_KERNEL_ADDRESS; 3221541Srgrimes vm_object_reference(kmem_object); 32313490Sdyson vm_map_insert(map, kmem_object, offset, addr, addr + size, 32413490Sdyson VM_PROT_ALL, VM_PROT_ALL, 0); 3251541Srgrimes 3261541Srgrimes for (i = 0; i < size; i += PAGE_SIZE) { 32715809Sdysonretry: 32812767Sdyson m = vm_page_alloc(kmem_object, OFF_TO_IDX(offset + i), 3296129Sdg (waitflag == M_NOWAIT) ? VM_ALLOC_INTERRUPT : VM_ALLOC_SYSTEM); 3301541Srgrimes 3311541Srgrimes /* 3325455Sdg * Ran out of space, free everything up and return. Don't need 3335455Sdg * to lock page queues here as we know that the pages we got 3345455Sdg * aren't on any queues. 3351541Srgrimes */ 3361541Srgrimes if (m == NULL) { 33715809Sdyson if (waitflag == M_WAITOK) { 33815809Sdyson VM_WAIT; 33915809Sdyson goto retry; 34015809Sdyson } 3411541Srgrimes while (i != 0) { 3421541Srgrimes i -= PAGE_SIZE; 34312767Sdyson m = vm_page_lookup(kmem_object, 34412767Sdyson OFF_TO_IDX(offset + i)); 34515876Sdyson PAGE_WAKEUP(m); 3461541Srgrimes vm_page_free(m); 3471541Srgrimes } 3481541Srgrimes vm_map_delete(map, addr, addr + size); 3491541Srgrimes vm_map_unlock(map); 3505455Sdg return (0); 3511541Srgrimes } 35215809Sdyson m->flags &= ~PG_ZERO; 3536585Sdg m->valid = VM_PAGE_BITS_ALL; 3541541Srgrimes } 3551541Srgrimes 3561541Srgrimes /* 3575455Sdg * Mark map entry as non-pageable. Assert: vm_map_insert() will never 3585455Sdg * be able to extend the previous entry so there will be a new entry 3595455Sdg * exactly corresponding to this address range and it will have 3605455Sdg * wired_count == 0. 3611541Srgrimes */ 3621541Srgrimes if (!vm_map_lookup_entry(map, addr, &entry) || 3631541Srgrimes entry->start != addr || entry->end != addr + size || 3641541Srgrimes entry->wired_count) 3651541Srgrimes panic("kmem_malloc: entry not found or misaligned"); 3661541Srgrimes entry->wired_count++; 3671541Srgrimes 36820993Sdyson vm_map_simplify_entry(map, entry); 36920993Sdyson 3701541Srgrimes /* 3715455Sdg * Loop thru pages, entering them in the pmap. (We cannot add them to 3725455Sdg * the wired count without wrapping the vm_page_queue_lock in 3735455Sdg * splimp...) 3741541Srgrimes */ 3751541Srgrimes for (i = 0; i < size; i += PAGE_SIZE) { 37612767Sdyson m = vm_page_lookup(kmem_object, OFF_TO_IDX(offset + i)); 37713490Sdyson vm_page_wire(m); 37815809Sdyson PAGE_WAKEUP(m); 37915809Sdyson pmap_enter(kernel_pmap, addr + i, VM_PAGE_TO_PHYS(m), 38015809Sdyson VM_PROT_ALL, 1); 38116892Sdyson m->flags |= PG_MAPPED|PG_WRITEABLE; 3821541Srgrimes } 3831541Srgrimes vm_map_unlock(map); 3841541Srgrimes 3855455Sdg return (addr); 3861541Srgrimes} 3871541Srgrimes 3881541Srgrimes/* 3891541Srgrimes * kmem_alloc_wait 3901541Srgrimes * 3911541Srgrimes * Allocates pageable memory from a sub-map of the kernel. If the submap 3921541Srgrimes * has no room, the caller sleeps waiting for more memory in the submap. 3931541Srgrimes * 3941541Srgrimes */ 3958876Srgrimesvm_offset_t 3965455Sdgkmem_alloc_wait(map, size) 3975455Sdg vm_map_t map; 3985455Sdg vm_size_t size; 3991541Srgrimes{ 4005455Sdg vm_offset_t addr; 4011541Srgrimes 4021541Srgrimes size = round_page(size); 4031541Srgrimes 4041541Srgrimes for (;;) { 4051541Srgrimes /* 4065455Sdg * To make this work for more than one map, use the map's lock 4075455Sdg * to lock out sleepers/wakers. 4081541Srgrimes */ 4091541Srgrimes vm_map_lock(map); 4101541Srgrimes if (vm_map_findspace(map, 0, size, &addr) == 0) 4111541Srgrimes break; 4121541Srgrimes /* no space now; see if we can ever get space */ 4131541Srgrimes if (vm_map_max(map) - vm_map_min(map) < size) { 4141541Srgrimes vm_map_unlock(map); 4151541Srgrimes return (0); 4161541Srgrimes } 4171541Srgrimes vm_map_unlock(map); 4189507Sdg tsleep(map, PVM, "kmaw", 0); 4191541Srgrimes } 42013490Sdyson vm_map_insert(map, NULL, (vm_offset_t) 0, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0); 4211541Srgrimes vm_map_unlock(map); 4221541Srgrimes return (addr); 4231541Srgrimes} 4241541Srgrimes 4251541Srgrimes/* 4261541Srgrimes * kmem_free_wakeup 4271541Srgrimes * 4289507Sdg * Returns memory to a submap of the kernel, and wakes up any processes 4291541Srgrimes * waiting for memory in that map. 4301541Srgrimes */ 4318876Srgrimesvoid 4325455Sdgkmem_free_wakeup(map, addr, size) 4335455Sdg vm_map_t map; 4345455Sdg vm_offset_t addr; 4355455Sdg vm_size_t size; 4361541Srgrimes{ 4371541Srgrimes vm_map_lock(map); 4381541Srgrimes (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size)); 4399507Sdg wakeup(map); 4401541Srgrimes vm_map_unlock(map); 4411541Srgrimes} 4421541Srgrimes 4431541Srgrimes/* 4441541Srgrimes * Create the kernel map; insert a mapping covering kernel text, data, bss, 4451541Srgrimes * and all space allocated thus far (`boostrap' data). The new map will thus 4461541Srgrimes * map the range between VM_MIN_KERNEL_ADDRESS and `start' as allocated, and 4471541Srgrimes * the range between `start' and `end' as free. 4481541Srgrimes */ 4498876Srgrimesvoid 4505455Sdgkmem_init(start, end) 4511541Srgrimes vm_offset_t start, end; 4521541Srgrimes{ 4531541Srgrimes register vm_map_t m; 4541541Srgrimes 4551541Srgrimes m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end, FALSE); 4561541Srgrimes vm_map_lock(m); 4571541Srgrimes /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ 4581541Srgrimes kernel_map = m; 4595455Sdg (void) vm_map_insert(m, NULL, (vm_offset_t) 0, 46013490Sdyson VM_MIN_KERNEL_ADDRESS, start, VM_PROT_ALL, VM_PROT_ALL, 0); 4611541Srgrimes /* ... and ending with the completion of the above `insert' */ 4621541Srgrimes vm_map_unlock(m); 4631541Srgrimes} 464