vm_kern.c revision 32702
1202878Srdivacky/* 2202878Srdivacky * Copyright (c) 1991, 1993 3202878Srdivacky * The Regents of the University of California. All rights reserved. 4202878Srdivacky * 5202878Srdivacky * This code is derived from software contributed to Berkeley by 6202878Srdivacky * The Mach Operating System project at Carnegie-Mellon University. 7202878Srdivacky * 8202878Srdivacky * Redistribution and use in source and binary forms, with or without 9202878Srdivacky * modification, are permitted provided that the following conditions 10202878Srdivacky * are met: 11202878Srdivacky * 1. Redistributions of source code must retain the above copyright 12202878Srdivacky * notice, this list of conditions and the following disclaimer. 13202878Srdivacky * 2. Redistributions in binary form must reproduce the above copyright 14202878Srdivacky * notice, this list of conditions and the following disclaimer in the 15202878Srdivacky * documentation and/or other materials provided with the distribution. 16202878Srdivacky * 3. All advertising materials mentioning features or use of this software 17202878Srdivacky * must display the following acknowledgement: 18218893Sdim * This product includes software developed by the University of 19202878Srdivacky * California, Berkeley and its contributors. 20202878Srdivacky * 4. Neither the name of the University nor the names of its contributors 21202878Srdivacky * may be used to endorse or promote products derived from this software 22202878Srdivacky * without specific prior written permission. 23202878Srdivacky * 24202878Srdivacky * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25202878Srdivacky * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26202878Srdivacky * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27226633Sdim * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28202878Srdivacky * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29202878Srdivacky * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30202878Srdivacky * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31202878Srdivacky * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32202878Srdivacky * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33202878Srdivacky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34202878Srdivacky * SUCH DAMAGE. 35218893Sdim * 36202878Srdivacky * from: @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 37202878Srdivacky * 38202878Srdivacky * 39202878Srdivacky * Copyright (c) 1987, 1990 Carnegie-Mellon University. 40218893Sdim * All rights reserved. 41202878Srdivacky * 42202878Srdivacky * Authors: Avadis Tevanian, Jr., Michael Wayne Young 43202878Srdivacky * 44202878Srdivacky * Permission to use, copy, modify and distribute this software and 45202878Srdivacky * its documentation is hereby granted, provided that both the copyright 46202878Srdivacky * notice and this permission notice appear in all copies of the 47202878Srdivacky * software, derivative works or modified versions, and any portions 48218893Sdim * thereof, and that both notices appear in supporting documentation. 49202878Srdivacky * 50202878Srdivacky * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51202878Srdivacky * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52202878Srdivacky * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53202878Srdivacky * 54202878Srdivacky * Carnegie Mellon requests users of this software to return to 55202878Srdivacky * 56202878Srdivacky * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57202878Srdivacky * School of Computer Science 58202878Srdivacky * Carnegie Mellon University 59202878Srdivacky * Pittsburgh PA 15213-3890 60202878Srdivacky * 61202878Srdivacky * any improvements or extensions that they make and grant Carnegie the 62218893Sdim * rights to redistribute these changes. 63202878Srdivacky * 64218893Sdim * $Id: vm_kern.c,v 1.39 1997/08/05 00:01:52 dyson Exp $ 65202878Srdivacky */ 66202878Srdivacky 67202878Srdivacky/* 68202878Srdivacky * Kernel memory management. 69218893Sdim */ 70218893Sdim 71218893Sdim#include <sys/param.h> 72218893Sdim#include <sys/systm.h> 73218893Sdim#include <sys/proc.h> 74218893Sdim#include <sys/malloc.h> 75218893Sdim#include <sys/syslog.h> 76218893Sdim 77218893Sdim#include <vm/vm.h> 78218893Sdim#include <vm/vm_param.h> 79218893Sdim#include <vm/vm_prot.h> 80218893Sdim#include <sys/lock.h> 81218893Sdim#include <vm/pmap.h> 82218893Sdim#include <vm/vm_map.h> 83218893Sdim#include <vm/vm_object.h> 84218893Sdim#include <vm/vm_page.h> 85218893Sdim#include <vm/vm_pageout.h> 86218893Sdim#include <vm/vm_extern.h> 87218893Sdim 88218893Sdimvm_map_t kernel_map=0; 89218893Sdimvm_map_t kmem_map=0; 90218893Sdimvm_map_t exec_map=0; 91218893Sdimvm_map_t clean_map=0; 92218893Sdimvm_map_t u_map=0; 93218893Sdimvm_map_t buffer_map=0; 94202878Srdivackyvm_map_t mb_map=0; 95218893Sdimint mb_map_full=0; 96218893Sdimvm_map_t io_map=0; 97218893Sdimvm_map_t phys_map=0; 98202878Srdivacky 99218893Sdim/* 100218893Sdim * kmem_alloc_pageable: 101218893Sdim * 102218893Sdim * Allocate pageable memory to the kernel's address map. 103218893Sdim * "map" must be kernel_map or a submap of kernel_map. 104218893Sdim */ 105218893Sdim 106218893Sdimvm_offset_t 107218893Sdimkmem_alloc_pageable(map, size) 108218893Sdim vm_map_t map; 109202878Srdivacky register vm_size_t size; 110218893Sdim{ 111207618Srdivacky vm_offset_t addr; 112207618Srdivacky register int result; 113207618Srdivacky 114218893Sdim size = round_page(size); 115202878Srdivacky addr = vm_map_min(map); 116202878Srdivacky result = vm_map_find(map, NULL, (vm_offset_t) 0, 117202878Srdivacky &addr, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, 0); 118202878Srdivacky if (result != KERN_SUCCESS) { 119202878Srdivacky return (0); 120202878Srdivacky } 121202878Srdivacky return (addr); 122202878Srdivacky} 123202878Srdivacky 124218893Sdim/* 125202878Srdivacky * Allocate wired-down memory in the kernel's address map 126202878Srdivacky * or a submap. 127202878Srdivacky */ 128202878Srdivackyvm_offset_t 129202878Srdivackykmem_alloc(map, size) 130202878Srdivacky register vm_map_t map; 131202878Srdivacky register vm_size_t size; 132202878Srdivacky{ 133202878Srdivacky vm_offset_t addr; 134202878Srdivacky register vm_offset_t offset; 135202878Srdivacky vm_offset_t i; 136202878Srdivacky 137218893Sdim size = round_page(size); 138202878Srdivacky 139202878Srdivacky /* 140202878Srdivacky * Use the kernel object for wired-down kernel pages. Assume that no 141202878Srdivacky * region of the kernel object is referenced more than once. 142202878Srdivacky */ 143202878Srdivacky 144202878Srdivacky /* 145202878Srdivacky * Locate sufficient space in the map. This will give us the final 146202878Srdivacky * virtual address for the new memory, and thus will tell us the 147202878Srdivacky * offset within the kernel map. 148202878Srdivacky */ 149202878Srdivacky vm_map_lock(map); 150226633Sdim if (vm_map_findspace(map, 0, size, &addr)) { 151202878Srdivacky vm_map_unlock(map); 152218893Sdim return (0); 153202878Srdivacky } 154202878Srdivacky offset = addr - VM_MIN_KERNEL_ADDRESS; 155202878Srdivacky vm_object_reference(kernel_object); 156202878Srdivacky vm_map_insert(map, kernel_object, offset, addr, addr + size, 157202878Srdivacky VM_PROT_ALL, VM_PROT_ALL, 0); 158212904Sdim vm_map_unlock(map); 159212904Sdim 160212904Sdim /* 161212904Sdim * Guarantee that there are pages already in this object before 162212904Sdim * calling vm_map_pageable. This is to prevent the following 163212904Sdim * scenario: 164202878Srdivacky * 165202878Srdivacky * 1) Threads have swapped out, so that there is a pager for the 166202878Srdivacky * kernel_object. 2) The kmsg zone is empty, and so we are 167202878Srdivacky * kmem_allocing a new page for it. 3) vm_map_pageable calls vm_fault; 168202878Srdivacky * there is no page, but there is a pager, so we call 169202878Srdivacky * pager_data_request. But the kmsg zone is empty, so we must 170202878Srdivacky * kmem_alloc. 4) goto 1 5) Even if the kmsg zone is not empty: when 171202878Srdivacky * we get the data back from the pager, it will be (very stale) 172202878Srdivacky * non-zero data. kmem_alloc is defined to return zero-filled memory. 173202878Srdivacky * 174218893Sdim * We're intentionally not activating the pages we allocate to prevent a 175202878Srdivacky * race with page-out. vm_map_pageable will wire the pages. 176202878Srdivacky */ 177218893Sdim 178218893Sdim for (i = 0; i < size; i += PAGE_SIZE) { 179218893Sdim vm_page_t mem; 180218893Sdim 181218893Sdim while ((mem = vm_page_alloc(kernel_object, 182218893Sdim OFF_TO_IDX(offset + i), VM_ALLOC_ZERO)) == NULL) { 183218893Sdim VM_WAIT; 184202878Srdivacky } 185212904Sdim if ((mem->flags & PG_ZERO) == 0) 186202878Srdivacky vm_page_zero_fill(mem); 187205218Srdivacky mem->flags &= ~(PG_BUSY|PG_ZERO); 188218893Sdim mem->valid = VM_PAGE_BITS_ALL; 189218893Sdim } 190218893Sdim 191218893Sdim /* 192218893Sdim * And finally, mark the data as non-pageable. 193218893Sdim */ 194205218Srdivacky 195218893Sdim (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE); 196212904Sdim 197212904Sdim return (addr); 198212904Sdim} 199218893Sdim 200202878Srdivacky/* 201202878Srdivacky * kmem_free: 202218893Sdim * 203202878Srdivacky * Release a region of kernel virtual memory allocated 204202878Srdivacky * with kmem_alloc, and return the physical pages 205208599Srdivacky * associated with that region. 206208599Srdivacky */ 207208599Srdivackyvoid 208208599Srdivackykmem_free(map, addr, size) 209208599Srdivacky vm_map_t map; 210208599Srdivacky register vm_offset_t addr; 211202878Srdivacky vm_size_t size; 212202878Srdivacky{ 213202878Srdivacky (void) vm_map_remove(map, trunc_page(addr), round_page(addr + size)); 214218893Sdim} 215202878Srdivacky 216202878Srdivacky/* 217221345Sdim * kmem_suballoc: 218218893Sdim * 219202878Srdivacky * Allocates a map to manage a subrange 220218893Sdim * of the kernel virtual address space. 221202878Srdivacky * 222212904Sdim * Arguments are as follows: 223221345Sdim * 224218893Sdim * parent Map to take range from 225212904Sdim * size Size of range to find 226212904Sdim * min, max Returned endpoints of map 227212904Sdim * pageable Can the region be paged 228218893Sdim */ 229202878Srdivackyvm_map_t 230202878Srdivackykmem_suballoc(parent, min, max, size) 231218893Sdim register vm_map_t parent; 232202878Srdivacky vm_offset_t *min, *max; 233202878Srdivacky register vm_size_t size; 234202878Srdivacky{ 235202878Srdivacky register int ret; 236202878Srdivacky vm_map_t result; 237218893Sdim 238202878Srdivacky size = round_page(size); 239202878Srdivacky 240221345Sdim *min = (vm_offset_t) vm_map_min(parent); 241202878Srdivacky ret = vm_map_find(parent, NULL, (vm_offset_t) 0, 242202878Srdivacky min, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, 0); 243202878Srdivacky if (ret != KERN_SUCCESS) { 244221345Sdim printf("kmem_suballoc: bad status return of %d.\n", ret); 245218893Sdim panic("kmem_suballoc"); 246212904Sdim } 247212904Sdim *max = *min + size; 248212904Sdim pmap_reference(vm_map_pmap(parent)); 249218893Sdim result = vm_map_create(vm_map_pmap(parent), *min, *max); 250202878Srdivacky if (result == NULL) 251202878Srdivacky panic("kmem_suballoc: cannot create submap"); 252202878Srdivacky if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS) 253218893Sdim panic("kmem_suballoc: unable to change range to submap"); 254202878Srdivacky return (result); 255221345Sdim} 256202878Srdivacky 257218893Sdim/* 258202878Srdivacky * Allocate wired-down memory in the kernel's address map for the higher 259202878Srdivacky * level kernel memory allocator (kern/kern_malloc.c). We cannot use 260202878Srdivacky * kmem_alloc() because we may need to allocate memory at interrupt 261221345Sdim * level where we cannot block (canwait == FALSE). 262218893Sdim * 263212904Sdim * This routine has its own private kernel submap (kmem_map) and object 264212904Sdim * (kmem_object). This, combined with the fact that only malloc uses 265212904Sdim * this routine, ensures that we will never block in map or object waits. 266218893Sdim * 267202878Srdivacky * Note that this still only works in a uni-processor environment and 268202878Srdivacky * when called at splhigh(). 269202878Srdivacky * 270218893Sdim * We don't worry about expanding the map (adding entries) since entries 271218893Sdim * for wired maps are statically allocated. 272218893Sdim */ 273218893Sdimvm_offset_t 274218893Sdimkmem_malloc(map, size, waitflag) 275218893Sdim register vm_map_t map; 276218893Sdim register vm_size_t size; 277218893Sdim boolean_t waitflag; 278218893Sdim{ 279218893Sdim register vm_offset_t offset, i; 280218893Sdim vm_map_entry_t entry; 281218893Sdim vm_offset_t addr; 282218893Sdim vm_page_t m; 283218893Sdim 284218893Sdim if (map != kmem_map && map != mb_map) 285218893Sdim panic("kmem_malloc: map != {kmem,mb}_map"); 286218893Sdim 287218893Sdim size = round_page(size); 288218893Sdim addr = vm_map_min(map); 289218893Sdim 290218893Sdim /* 291218893Sdim * Locate sufficient space in the map. This will give us the final 292218893Sdim * virtual address for the new memory, and thus will tell us the 293218893Sdim * offset within the kernel map. 294218893Sdim */ 295218893Sdim vm_map_lock(map); 296218893Sdim if (vm_map_findspace(map, 0, size, &addr)) { 297218893Sdim vm_map_unlock(map); 298218893Sdim if (map == mb_map) { 299218893Sdim mb_map_full = TRUE; 300218893Sdim log(LOG_ERR, "Out of mbuf clusters - increase maxusers!\n"); 301218893Sdim return (0); 302218893Sdim } 303218893Sdim if (waitflag == M_WAITOK) 304218893Sdim panic("kmem_malloc: kmem_map too small"); 305218893Sdim return (0); 306202878Srdivacky } 307202878Srdivacky offset = addr - VM_MIN_KERNEL_ADDRESS; 308202878Srdivacky vm_object_reference(kmem_object); 309202878Srdivacky vm_map_insert(map, kmem_object, offset, addr, addr + size, 310202878Srdivacky VM_PROT_ALL, VM_PROT_ALL, 0); 311202878Srdivacky 312202878Srdivacky for (i = 0; i < size; i += PAGE_SIZE) { 313202878Srdivackyretry: 314202878Srdivacky m = vm_page_alloc(kmem_object, OFF_TO_IDX(offset + i), 315218893Sdim (waitflag == M_NOWAIT) ? VM_ALLOC_INTERRUPT : VM_ALLOC_SYSTEM); 316202878Srdivacky 317202878Srdivacky /* 318202878Srdivacky * Ran out of space, free everything up and return. Don't need 319202878Srdivacky * to lock page queues here as we know that the pages we got 320202878Srdivacky * aren't on any queues. 321218893Sdim */ 322202878Srdivacky if (m == NULL) { 323202878Srdivacky if (waitflag == M_WAITOK) { 324202878Srdivacky VM_WAIT; 325202878Srdivacky goto retry; 326202878Srdivacky } 327202878Srdivacky while (i != 0) { 328221345Sdim i -= PAGE_SIZE; 329221345Sdim m = vm_page_lookup(kmem_object, 330202878Srdivacky OFF_TO_IDX(offset + i)); 331202878Srdivacky PAGE_WAKEUP(m); 332202878Srdivacky vm_page_free(m); 333202878Srdivacky } 334202878Srdivacky vm_map_delete(map, addr, addr + size); 335202878Srdivacky vm_map_unlock(map); 336202878Srdivacky return (0); 337202878Srdivacky } 338226633Sdim m->flags &= ~PG_ZERO; 339226633Sdim m->valid = VM_PAGE_BITS_ALL; 340226633Sdim } 341226633Sdim 342226633Sdim /* 343226633Sdim * Mark map entry as non-pageable. Assert: vm_map_insert() will never 344226633Sdim * be able to extend the previous entry so there will be a new entry 345226633Sdim * exactly corresponding to this address range and it will have 346226633Sdim * wired_count == 0. 347226633Sdim */ 348226633Sdim if (!vm_map_lookup_entry(map, addr, &entry) || 349202878Srdivacky entry->start != addr || entry->end != addr + size || 350202878Srdivacky entry->wired_count) 351202878Srdivacky panic("kmem_malloc: entry not found or misaligned"); 352202878Srdivacky entry->wired_count++; 353202878Srdivacky 354221345Sdim vm_map_simplify_entry(map, entry); 355221345Sdim 356221345Sdim /* 357221345Sdim * Loop thru pages, entering them in the pmap. (We cannot add them to 358221345Sdim * the wired count without wrapping the vm_page_queue_lock in 359202878Srdivacky * splimp...) 360202878Srdivacky */ 361202878Srdivacky for (i = 0; i < size; i += PAGE_SIZE) { 362202878Srdivacky m = vm_page_lookup(kmem_object, OFF_TO_IDX(offset + i)); 363218893Sdim vm_page_wire(m); 364226633Sdim PAGE_WAKEUP(m); 365226633Sdim pmap_enter(kernel_pmap, addr + i, VM_PAGE_TO_PHYS(m), 366226633Sdim VM_PROT_ALL, 1); 367226633Sdim m->flags |= PG_MAPPED|PG_WRITEABLE; 368226633Sdim } 369226633Sdim vm_map_unlock(map); 370226633Sdim 371202878Srdivacky return (addr); 372226633Sdim} 373221345Sdim 374221345Sdim/* 375221345Sdim * kmem_alloc_wait 376221345Sdim * 377221345Sdim * Allocates pageable memory from a sub-map of the kernel. If the submap 378202878Srdivacky * has no room, the caller sleeps waiting for more memory in the submap. 379226633Sdim * 380226633Sdim */ 381226633Sdimvm_offset_t 382226633Sdimkmem_alloc_wait(map, size) 383226633Sdim vm_map_t map; 384226633Sdim vm_size_t size; 385226633Sdim{ 386226633Sdim vm_offset_t addr; 387202878Srdivacky 388202878Srdivacky size = round_page(size); 389202878Srdivacky 390202878Srdivacky for (;;) { 391202878Srdivacky /* 392218893Sdim * To make this work for more than one map, use the map's lock 393202878Srdivacky * to lock out sleepers/wakers. 394202878Srdivacky */ 395202878Srdivacky vm_map_lock(map); 396202878Srdivacky if (vm_map_findspace(map, 0, size, &addr) == 0) 397202878Srdivacky break; 398202878Srdivacky /* no space now; see if we can ever get space */ 399202878Srdivacky if (vm_map_max(map) - vm_map_min(map) < size) { 400202878Srdivacky vm_map_unlock(map); 401202878Srdivacky return (0); 402221345Sdim } 403226633Sdim vm_map_unlock(map); 404221345Sdim tsleep(map, PVM, "kmaw", 0); 405202878Srdivacky } 406202878Srdivacky vm_map_insert(map, NULL, (vm_offset_t) 0, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0); 407202878Srdivacky vm_map_unlock(map); 408202878Srdivacky return (addr); 409202878Srdivacky} 410202878Srdivacky 411202878Srdivacky/* 412202878Srdivacky * kmem_free_wakeup 413202878Srdivacky * 414202878Srdivacky * Returns memory to a submap of the kernel, and wakes up any processes 415202878Srdivacky * waiting for memory in that map. 416202878Srdivacky */ 417202878Srdivackyvoid 418208599Srdivackykmem_free_wakeup(map, addr, size) 419223017Sdim vm_map_t map; 420218893Sdim vm_offset_t addr; 421202878Srdivacky vm_size_t size; 422202878Srdivacky{ 423202878Srdivacky vm_map_lock(map); 424218893Sdim (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size)); 425202878Srdivacky wakeup(map); 426202878Srdivacky vm_map_unlock(map); 427202878Srdivacky} 428202878Srdivacky 429218893Sdim/* 430202878Srdivacky * Create the kernel map; insert a mapping covering kernel text, data, bss, 431202878Srdivacky * and all space allocated thus far (`boostrap' data). The new map will thus 432202878Srdivacky * map the range between VM_MIN_KERNEL_ADDRESS and `start' as allocated, and 433218893Sdim * the range between `start' and `end' as free. 434202878Srdivacky */ 435202878Srdivackyvoid 436202878Srdivackykmem_init(start, end) 437202878Srdivacky vm_offset_t start, end; 438202878Srdivacky{ 439202878Srdivacky register vm_map_t m; 440218893Sdim 441202878Srdivacky m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end); 442202878Srdivacky vm_map_lock(m); 443202878Srdivacky /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ 444202878Srdivacky kernel_map = m; 445202878Srdivacky kernel_map->system_map = 1; 446202878Srdivacky (void) vm_map_insert(m, NULL, (vm_offset_t) 0, 447218893Sdim VM_MIN_KERNEL_ADDRESS, start, VM_PROT_ALL, VM_PROT_ALL, 0); 448202878Srdivacky /* ... and ending with the completion of the above `insert' */ 449218893Sdim vm_map_unlock(m); 450202878Srdivacky} 451218893Sdim