vm_kern.c revision 91946
160484Sobrien/* 278828Sobrien * Copyright (c) 1991, 1993 360484Sobrien * The Regents of the University of California. All rights reserved. 460484Sobrien * 560484Sobrien * This code is derived from software contributed to Berkeley by 660484Sobrien * The Mach Operating System project at Carnegie-Mellon University. 760484Sobrien * 860484Sobrien * Redistribution and use in source and binary forms, with or without 960484Sobrien * modification, are permitted provided that the following conditions 1060484Sobrien * are met: 1160484Sobrien * 1. Redistributions of source code must retain the above copyright 1260484Sobrien * notice, this list of conditions and the following disclaimer. 1360484Sobrien * 2. Redistributions in binary form must reproduce the above copyright 1460484Sobrien * notice, this list of conditions and the following disclaimer in the 1560484Sobrien * documentation and/or other materials provided with the distribution. 1660484Sobrien * 3. All advertising materials mentioning features or use of this software 1760484Sobrien * must display the following acknowledgement: 1860484Sobrien * This product includes software developed by the University of 1960484Sobrien * California, Berkeley and its contributors. 2060484Sobrien * 4. Neither the name of the University nor the names of its contributors 2160484Sobrien * may be used to endorse or promote products derived from this software 2260484Sobrien * without specific prior written permission. 2360484Sobrien * 2460484Sobrien * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2560484Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2660484Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2760484Sobrien * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2860484Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2960484Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3060484Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3160484Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3260484Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3360484Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3460484Sobrien * SUCH DAMAGE. 3560484Sobrien * 3660484Sobrien * from: @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 3760484Sobrien * 3860484Sobrien * 3960484Sobrien * Copyright (c) 1987, 1990 Carnegie-Mellon University. 4060484Sobrien * All rights reserved. 4160484Sobrien * 4260484Sobrien * Authors: Avadis Tevanian, Jr., Michael Wayne Young 4360484Sobrien * 4460484Sobrien * Permission to use, copy, modify and distribute this software and 4560484Sobrien * its documentation is hereby granted, provided that both the copyright 4660484Sobrien * notice and this permission notice appear in all copies of the 4760484Sobrien * software, derivative works or modified versions, and any portions 4860484Sobrien * thereof, and that both notices appear in supporting documentation. 4960484Sobrien * 5060484Sobrien * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 5160484Sobrien * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 5260484Sobrien * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 5360484Sobrien * 5460484Sobrien * Carnegie Mellon requests users of this software to return to 5560484Sobrien * 5660484Sobrien * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 5760484Sobrien * School of Computer Science 5860484Sobrien * Carnegie Mellon University 5960484Sobrien * Pittsburgh PA 15213-3890 6060484Sobrien * 6160484Sobrien * any improvements or extensions that they make and grant Carnegie the 6260484Sobrien * rights to redistribute these changes. 6360484Sobrien * 6460484Sobrien * $FreeBSD: head/sys/vm/vm_kern.c 91946 2002-03-09 16:24:27Z tegge $ 6560484Sobrien */ 6660484Sobrien 6760484Sobrien/* 6860484Sobrien * Kernel memory management. 6960484Sobrien */ 7060484Sobrien 7160484Sobrien#include <sys/param.h> 7260484Sobrien#include <sys/systm.h> 7360484Sobrien#include <sys/kernel.h> /* for ticks and hz */ 7460484Sobrien#include <sys/lock.h> 7560484Sobrien#include <sys/mutex.h> 7660484Sobrien#include <sys/proc.h> 7760484Sobrien#include <sys/malloc.h> 7860484Sobrien 7960484Sobrien#include <vm/vm.h> 8060484Sobrien#include <vm/vm_param.h> 8160484Sobrien#include <vm/pmap.h> 8260484Sobrien#include <vm/vm_map.h> 8360484Sobrien#include <vm/vm_object.h> 8460484Sobrien#include <vm/vm_page.h> 8560484Sobrien#include <vm/vm_pageout.h> 8660484Sobrien#include <vm/vm_extern.h> 8760484Sobrien 8860484Sobrienvm_map_t kernel_map=0; 8960484Sobrienvm_map_t kmem_map=0; 9060484Sobrienvm_map_t exec_map=0; 9160484Sobrienvm_map_t clean_map=0; 9260484Sobrienvm_map_t buffer_map=0; 9360484Sobrien 9460484Sobrien/* 9560484Sobrien * kmem_alloc_pageable: 9660484Sobrien * 9760484Sobrien * Allocate pageable memory to the kernel's address map. 9860484Sobrien * "map" must be kernel_map or a submap of kernel_map. 9960484Sobrien */ 10060484Sobrien 10160484Sobrienvm_offset_t 10260484Sobrienkmem_alloc_pageable(map, size) 10360484Sobrien vm_map_t map; 10460484Sobrien vm_size_t size; 10560484Sobrien{ 10660484Sobrien vm_offset_t addr; 10760484Sobrien int result; 10860484Sobrien 10960484Sobrien GIANT_REQUIRED; 11060484Sobrien 11160484Sobrien size = round_page(size); 11260484Sobrien addr = vm_map_min(map); 11360484Sobrien result = vm_map_find(map, NULL, (vm_offset_t) 0, 11460484Sobrien &addr, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, 0); 11560484Sobrien if (result != KERN_SUCCESS) { 11660484Sobrien return (0); 11760484Sobrien } 11860484Sobrien return (addr); 11960484Sobrien} 12060484Sobrien 12160484Sobrien/* 12260484Sobrien * kmem_alloc_nofault: 12360484Sobrien * 12460484Sobrien * Same as kmem_alloc_pageable, except that it create a nofault entry. 12560484Sobrien */ 12660484Sobrien 12760484Sobrienvm_offset_t 12860484Sobrienkmem_alloc_nofault(map, size) 12960484Sobrien vm_map_t map; 13060484Sobrien vm_size_t size; 13160484Sobrien{ 13260484Sobrien vm_offset_t addr; 13360484Sobrien int result; 13460484Sobrien 13560484Sobrien GIANT_REQUIRED; 13660484Sobrien 13760484Sobrien size = round_page(size); 13860484Sobrien addr = vm_map_min(map); 13960484Sobrien result = vm_map_find(map, NULL, (vm_offset_t) 0, 14060484Sobrien &addr, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); 14160484Sobrien if (result != KERN_SUCCESS) { 14260484Sobrien return (0); 14360484Sobrien } 14460484Sobrien return (addr); 14560484Sobrien} 14660484Sobrien 14760484Sobrien/* 14860484Sobrien * Allocate wired-down memory in the kernel's address map 14960484Sobrien * or a submap. 15060484Sobrien */ 15160484Sobrienvm_offset_t 15260484Sobrienkmem_alloc(map, size) 15360484Sobrien vm_map_t map; 15460484Sobrien vm_size_t size; 15560484Sobrien{ 15660484Sobrien vm_offset_t addr; 15760484Sobrien vm_offset_t offset; 15860484Sobrien vm_offset_t i; 15960484Sobrien 16060484Sobrien GIANT_REQUIRED; 16160484Sobrien 16260484Sobrien size = round_page(size); 16360484Sobrien 16460484Sobrien /* 16560484Sobrien * Use the kernel object for wired-down kernel pages. Assume that no 16660484Sobrien * region of the kernel object is referenced more than once. 16760484Sobrien */ 168104834Sobrien 169104834Sobrien /* 17060484Sobrien * Locate sufficient space in the map. This will give us the final 17160484Sobrien * virtual address for the new memory, and thus will tell us the 17260484Sobrien * offset within the kernel map. 17360484Sobrien */ 17460484Sobrien vm_map_lock(map); 17560484Sobrien if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { 17660484Sobrien vm_map_unlock(map); 17760484Sobrien return (0); 17860484Sobrien } 17960484Sobrien offset = addr - VM_MIN_KERNEL_ADDRESS; 18060484Sobrien vm_object_reference(kernel_object); 18160484Sobrien vm_map_insert(map, kernel_object, offset, addr, addr + size, 18260484Sobrien VM_PROT_ALL, VM_PROT_ALL, 0); 18360484Sobrien vm_map_unlock(map); 18460484Sobrien 18560484Sobrien /* 18660484Sobrien * Guarantee that there are pages already in this object before 18760484Sobrien * calling vm_map_pageable. This is to prevent the following 18860484Sobrien * scenario: 18960484Sobrien * 19060484Sobrien * 1) Threads have swapped out, so that there is a pager for the 19160484Sobrien * kernel_object. 2) The kmsg zone is empty, and so we are 19260484Sobrien * kmem_allocing a new page for it. 3) vm_map_pageable calls vm_fault; 19360484Sobrien * there is no page, but there is a pager, so we call 19460484Sobrien * pager_data_request. But the kmsg zone is empty, so we must 19560484Sobrien * kmem_alloc. 4) goto 1 5) Even if the kmsg zone is not empty: when 19660484Sobrien * we get the data back from the pager, it will be (very stale) 19760484Sobrien * non-zero data. kmem_alloc is defined to return zero-filled memory. 19860484Sobrien * 19960484Sobrien * We're intentionally not activating the pages we allocate to prevent a 20060484Sobrien * race with page-out. vm_map_pageable will wire the pages. 20160484Sobrien */ 202104834Sobrien 20360484Sobrien for (i = 0; i < size; i += PAGE_SIZE) { 20460484Sobrien vm_page_t mem; 20560484Sobrien 20660484Sobrien mem = vm_page_grab(kernel_object, OFF_TO_IDX(offset + i), 20760484Sobrien VM_ALLOC_ZERO | VM_ALLOC_RETRY); 20860484Sobrien if ((mem->flags & PG_ZERO) == 0) 20960484Sobrien vm_page_zero_fill(mem); 21060484Sobrien mem->valid = VM_PAGE_BITS_ALL; 21160484Sobrien vm_page_flag_clear(mem, PG_ZERO); 21260484Sobrien vm_page_wakeup(mem); 21360484Sobrien } 21460484Sobrien 21560484Sobrien /* 21660484Sobrien * And finally, mark the data as non-pageable. 21760484Sobrien */ 21860484Sobrien 21960484Sobrien (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE); 22060484Sobrien 221 return (addr); 222} 223 224/* 225 * kmem_free: 226 * 227 * Release a region of kernel virtual memory allocated 228 * with kmem_alloc, and return the physical pages 229 * associated with that region. 230 * 231 * This routine may not block on kernel maps. 232 */ 233void 234kmem_free(map, addr, size) 235 vm_map_t map; 236 vm_offset_t addr; 237 vm_size_t size; 238{ 239 GIANT_REQUIRED; 240 241 (void) vm_map_remove(map, trunc_page(addr), round_page(addr + size)); 242} 243 244/* 245 * kmem_suballoc: 246 * 247 * Allocates a map to manage a subrange 248 * of the kernel virtual address space. 249 * 250 * Arguments are as follows: 251 * 252 * parent Map to take range from 253 * min, max Returned endpoints of map 254 * size Size of range to find 255 */ 256vm_map_t 257kmem_suballoc(parent, min, max, size) 258 vm_map_t parent; 259 vm_offset_t *min, *max; 260 vm_size_t size; 261{ 262 int ret; 263 vm_map_t result; 264 265 GIANT_REQUIRED; 266 267 size = round_page(size); 268 269 *min = (vm_offset_t) vm_map_min(parent); 270 ret = vm_map_find(parent, NULL, (vm_offset_t) 0, 271 min, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, 0); 272 if (ret != KERN_SUCCESS) { 273 printf("kmem_suballoc: bad status return of %d.\n", ret); 274 panic("kmem_suballoc"); 275 } 276 *max = *min + size; 277 pmap_reference(vm_map_pmap(parent)); 278 result = vm_map_create(vm_map_pmap(parent), *min, *max); 279 if (result == NULL) 280 panic("kmem_suballoc: cannot create submap"); 281 if (vm_map_submap(parent, *min, *max, result) != KERN_SUCCESS) 282 panic("kmem_suballoc: unable to change range to submap"); 283 return (result); 284} 285 286/* 287 * kmem_malloc: 288 * 289 * Allocate wired-down memory in the kernel's address map for the higher 290 * level kernel memory allocator (kern/kern_malloc.c). We cannot use 291 * kmem_alloc() because we may need to allocate memory at interrupt 292 * level where we cannot block (canwait == FALSE). 293 * 294 * This routine has its own private kernel submap (kmem_map) and object 295 * (kmem_object). This, combined with the fact that only malloc uses 296 * this routine, ensures that we will never block in map or object waits. 297 * 298 * Note that this still only works in a uni-processor environment and 299 * when called at splhigh(). 300 * 301 * We don't worry about expanding the map (adding entries) since entries 302 * for wired maps are statically allocated. 303 * 304 * NOTE: This routine is not supposed to block if M_NOWAIT is set, but 305 * I have not verified that it actually does not block. 306 * 307 * `map' is ONLY allowed to be kmem_map or one of the mbuf submaps to 308 * which we never free. 309 */ 310vm_offset_t 311kmem_malloc(map, size, flags) 312 vm_map_t map; 313 vm_size_t size; 314 int flags; 315{ 316 vm_offset_t offset, i; 317 vm_map_entry_t entry; 318 vm_offset_t addr; 319 vm_page_t m; 320 321 GIANT_REQUIRED; 322 323 size = round_page(size); 324 addr = vm_map_min(map); 325 326 /* 327 * Locate sufficient space in the map. This will give us the final 328 * virtual address for the new memory, and thus will tell us the 329 * offset within the kernel map. 330 */ 331 vm_map_lock(map); 332 if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { 333 vm_map_unlock(map); 334 if (map != kmem_map) { 335 static int last_report; /* when we did it (in ticks) */ 336 if (ticks < last_report || 337 (ticks - last_report) >= hz) { 338 last_report = ticks; 339 printf("Out of mbuf address space!\n"); 340 printf("Consider increasing NMBCLUSTERS\n"); 341 } 342 goto bad; 343 } 344 if ((flags & M_NOWAIT) == 0) 345 panic("kmem_malloc(%ld): kmem_map too small: %ld total allocated", 346 (long)size, (long)map->size); 347 goto bad; 348 } 349 offset = addr - VM_MIN_KERNEL_ADDRESS; 350 vm_object_reference(kmem_object); 351 vm_map_insert(map, kmem_object, offset, addr, addr + size, 352 VM_PROT_ALL, VM_PROT_ALL, 0); 353 354 for (i = 0; i < size; i += PAGE_SIZE) { 355 /* 356 * Note: if M_NOWAIT specified alone, allocate from 357 * interrupt-safe queues only (just the free list). If 358 * M_USE_RESERVE is also specified, we can also 359 * allocate from the cache. Neither of the latter two 360 * flags may be specified from an interrupt since interrupts 361 * are not allowed to mess with the cache queue. 362 */ 363retry: 364 m = vm_page_alloc(kmem_object, OFF_TO_IDX(offset + i), 365 ((flags & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT) ? 366 VM_ALLOC_INTERRUPT : 367 VM_ALLOC_SYSTEM); 368 369 /* 370 * Ran out of space, free everything up and return. Don't need 371 * to lock page queues here as we know that the pages we got 372 * aren't on any queues. 373 */ 374 if (m == NULL) { 375 if ((flags & M_NOWAIT) == 0) { 376 vm_map_unlock(map); 377 VM_WAIT; 378 vm_map_lock(map); 379 goto retry; 380 } 381 /* 382 * Free the pages before removing the map entry. 383 * They are already marked busy. Calling 384 * vm_map_delete before the pages has been freed or 385 * unbusied will cause a deadlock. 386 */ 387 while (i != 0) { 388 i -= PAGE_SIZE; 389 m = vm_page_lookup(kmem_object, 390 OFF_TO_IDX(offset + i)); 391 vm_page_free(m); 392 } 393 vm_map_delete(map, addr, addr + size); 394 vm_map_unlock(map); 395 goto bad; 396 } 397 vm_page_flag_clear(m, PG_ZERO); 398 m->valid = VM_PAGE_BITS_ALL; 399 } 400 401 /* 402 * Mark map entry as non-pageable. Assert: vm_map_insert() will never 403 * be able to extend the previous entry so there will be a new entry 404 * exactly corresponding to this address range and it will have 405 * wired_count == 0. 406 */ 407 if (!vm_map_lookup_entry(map, addr, &entry) || 408 entry->start != addr || entry->end != addr + size || 409 entry->wired_count != 0) 410 panic("kmem_malloc: entry not found or misaligned"); 411 entry->wired_count = 1; 412 413 vm_map_simplify_entry(map, entry); 414 415 /* 416 * Loop thru pages, entering them in the pmap. (We cannot add them to 417 * the wired count without wrapping the vm_page_queue_lock in 418 * splimp...) 419 */ 420 for (i = 0; i < size; i += PAGE_SIZE) { 421 m = vm_page_lookup(kmem_object, OFF_TO_IDX(offset + i)); 422 vm_page_wire(m); 423 vm_page_wakeup(m); 424 /* 425 * Because this is kernel_pmap, this call will not block. 426 */ 427 pmap_enter(kernel_pmap, addr + i, m, VM_PROT_ALL, 1); 428 vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE | PG_REFERENCED); 429 } 430 vm_map_unlock(map); 431 432 return (addr); 433 434bad: 435 return (0); 436} 437 438/* 439 * kmem_alloc_wait: 440 * 441 * Allocates pageable memory from a sub-map of the kernel. If the submap 442 * has no room, the caller sleeps waiting for more memory in the submap. 443 * 444 * This routine may block. 445 */ 446 447vm_offset_t 448kmem_alloc_wait(map, size) 449 vm_map_t map; 450 vm_size_t size; 451{ 452 vm_offset_t addr; 453 454 GIANT_REQUIRED; 455 456 size = round_page(size); 457 458 for (;;) { 459 /* 460 * To make this work for more than one map, use the map's lock 461 * to lock out sleepers/wakers. 462 */ 463 vm_map_lock(map); 464 if (vm_map_findspace(map, vm_map_min(map), size, &addr) == 0) 465 break; 466 /* no space now; see if we can ever get space */ 467 if (vm_map_max(map) - vm_map_min(map) < size) { 468 vm_map_unlock(map); 469 return (0); 470 } 471 vm_map_unlock(map); 472 tsleep(map, PVM, "kmaw", 0); 473 } 474 vm_map_insert(map, NULL, (vm_offset_t) 0, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0); 475 vm_map_unlock(map); 476 return (addr); 477} 478 479/* 480 * kmem_free_wakeup: 481 * 482 * Returns memory to a submap of the kernel, and wakes up any processes 483 * waiting for memory in that map. 484 */ 485void 486kmem_free_wakeup(map, addr, size) 487 vm_map_t map; 488 vm_offset_t addr; 489 vm_size_t size; 490{ 491 GIANT_REQUIRED; 492 493 vm_map_lock(map); 494 (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size)); 495 wakeup(map); 496 vm_map_unlock(map); 497} 498 499/* 500 * kmem_init: 501 * 502 * Create the kernel map; insert a mapping covering kernel text, 503 * data, bss, and all space allocated thus far (`boostrap' data). The 504 * new map will thus map the range between VM_MIN_KERNEL_ADDRESS and 505 * `start' as allocated, and the range between `start' and `end' as free. 506 */ 507 508void 509kmem_init(start, end) 510 vm_offset_t start, end; 511{ 512 vm_map_t m; 513 514 m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end); 515 vm_map_lock(m); 516 /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ 517 kernel_map = m; 518 kernel_map->system_map = 1; 519 (void) vm_map_insert(m, NULL, (vm_offset_t) 0, 520 VM_MIN_KERNEL_ADDRESS, start, VM_PROT_ALL, VM_PROT_ALL, 0); 521 /* ... and ending with the completion of the above `insert' */ 522 vm_map_unlock(m); 523} 524