vm_page.c revision 14865
1238104Sdes/* 2238104Sdes * Copyright (c) 1991 Regents of the University of California. 3238104Sdes * All rights reserved. 4238104Sdes * 5238104Sdes * This code is derived from software contributed to Berkeley by 6238104Sdes * The Mach Operating System project at Carnegie-Mellon University. 7238104Sdes * 8238104Sdes * Redistribution and use in source and binary forms, with or without 9238104Sdes * modification, are permitted provided that the following conditions 10238104Sdes * are met: 11238104Sdes * 1. Redistributions of source code must retain the above copyright 12238104Sdes * notice, this list of conditions and the following disclaimer. 13238104Sdes * 2. Redistributions in binary form must reproduce the above copyright 14238104Sdes * notice, this list of conditions and the following disclaimer in the 15238104Sdes * documentation and/or other materials provided with the distribution. 16238104Sdes * 3. All advertising materials mentioning features or use of this software 17238104Sdes * must display the following acknowledgement: 18238104Sdes * This product includes software developed by the University of 19238104Sdes * California, Berkeley and its contributors. 20238104Sdes * 4. Neither the name of the University nor the names of its contributors 21238104Sdes * may be used to endorse or promote products derived from this software 22238104Sdes * without specific prior written permission. 23238104Sdes * 24238104Sdes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25238104Sdes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26238104Sdes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27238104Sdes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28238104Sdes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29238104Sdes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30238104Sdes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31238104Sdes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32238104Sdes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33238104Sdes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34238104Sdes * SUCH DAMAGE. 35238104Sdes * 36238104Sdes * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 37238104Sdes * $Id: vm_page.c,v 1.49 1996/03/09 06:56:39 dyson Exp $ 38238104Sdes */ 39238104Sdes 40238104Sdes/* 41238104Sdes * Copyright (c) 1987, 1990 Carnegie-Mellon University. 42238104Sdes * All rights reserved. 43238104Sdes * 44238104Sdes * Authors: Avadis Tevanian, Jr., Michael Wayne Young 45238104Sdes * 46238104Sdes * Permission to use, copy, modify and distribute this software and 47238104Sdes * its documentation is hereby granted, provided that both the copyright 48238104Sdes * notice and this permission notice appear in all copies of the 49238104Sdes * software, derivative works or modified versions, and any portions 50238104Sdes * thereof, and that both notices appear in supporting documentation. 51238104Sdes * 52238104Sdes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 53238104Sdes * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 54246827Sdes * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 55238104Sdes * 56238104Sdes * Carnegie Mellon requests users of this software to return to 57238104Sdes * 58238104Sdes * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 59238104Sdes * School of Computer Science 60238104Sdes * Carnegie Mellon University 61238104Sdes * Pittsburgh PA 15213-3890 62238104Sdes * 63238104Sdes * any improvements or extensions that they make and grant Carnegie the 64238104Sdes * rights to redistribute these changes. 65238104Sdes */ 66238104Sdes 67238104Sdes/* 68238104Sdes * Resident memory management module. 69238104Sdes */ 70238104Sdes#include "opt_ddb.h" 71238104Sdes 72238104Sdes#include <sys/param.h> 73238104Sdes#include <sys/systm.h> 74238104Sdes#include <sys/malloc.h> 75238104Sdes#include <sys/proc.h> 76238104Sdes#include <sys/queue.h> 77238104Sdes#include <sys/vmmeter.h> 78238104Sdes 79238104Sdes#include <vm/vm.h> 80238104Sdes#include <vm/vm_param.h> 81238104Sdes#include <vm/vm_prot.h> 82238104Sdes#include <vm/lock.h> 83238104Sdes#include <vm/vm_kern.h> 84238104Sdes#include <vm/vm_object.h> 85238104Sdes#include <vm/vm_page.h> 86238104Sdes#include <vm/vm_map.h> 87238104Sdes#include <vm/vm_pageout.h> 88238104Sdes#include <vm/vm_extern.h> 89238104Sdes 90238104Sdes#ifdef DDB 91238104Sdesextern void DDB_print_page_info __P((void)); 92238104Sdes#endif 93238104Sdes 94238104Sdes/* 95238104Sdes * Associated with page of user-allocatable memory is a 96238104Sdes * page structure. 97238104Sdes */ 98238104Sdes 99238104Sdesstatic struct pglist *vm_page_buckets; /* Array of buckets */ 100238104Sdesstatic int vm_page_bucket_count; /* How big is array? */ 101238104Sdesstatic int vm_page_hash_mask; /* Mask for hash function */ 102238104Sdes 103238104Sdesstruct pglist vm_page_queue_free; 104238104Sdesstruct pglist vm_page_queue_zero; 105238104Sdesstruct pglist vm_page_queue_active; 106238104Sdesstruct pglist vm_page_queue_inactive; 107238104Sdesstruct pglist vm_page_queue_cache; 108238104Sdes 109238104Sdesint no_queue; 110238104Sdes 111238104Sdesstruct { 112238104Sdes struct pglist *pl; 113238104Sdes int *cnt; 114238104Sdes} vm_page_queues[PQ_CACHE+1] = { 115238104Sdes {NULL, &no_queue}, 116238104Sdes { &vm_page_queue_free, &cnt.v_free_count}, 117238104Sdes { &vm_page_queue_zero, &cnt.v_free_count}, 118238104Sdes { &vm_page_queue_inactive, &cnt.v_inactive_count}, 119238104Sdes { &vm_page_queue_active, &cnt.v_active_count}, 120238104Sdes { &vm_page_queue_cache, &cnt.v_cache_count} 121238104Sdes}; 122238104Sdes 123238104Sdesvm_page_t vm_page_array; 124238104Sdesstatic int vm_page_array_size; 125238104Sdeslong first_page; 126238104Sdesstatic long last_page; 127238104Sdesstatic vm_size_t page_mask; 128238104Sdesstatic int page_shift; 129238104Sdesint vm_page_zero_count; 130238104Sdes 131238104Sdes/* 132238104Sdes * map of contiguous valid DEV_BSIZE chunks in a page 133238104Sdes * (this list is valid for page sizes upto 16*DEV_BSIZE) 134238104Sdes */ 135238104Sdesstatic u_short vm_page_dev_bsize_chunks[] = { 136238104Sdes 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 137269257Sdes 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff 138238104Sdes}; 139238104Sdes 140238104Sdesstatic inline __pure int 141238104Sdes vm_page_hash __P((vm_object_t object, vm_pindex_t pindex)) 142238104Sdes __pure2; 143269257Sdesstatic void vm_page_unqueue __P((vm_page_t )); 144238104Sdes 145269257Sdes/* 146238104Sdes * vm_set_page_size: 147238104Sdes * 148238104Sdes * Sets the page size, perhaps based upon the memory 149238104Sdes * size. Must be called before any use of page-size 150238104Sdes * dependent functions. 151238104Sdes * 152238104Sdes * Sets page_shift and page_mask from cnt.v_page_size. 153238104Sdes */ 154238104Sdesvoid 155238104Sdesvm_set_page_size() 156238104Sdes{ 157238104Sdes 158238104Sdes if (cnt.v_page_size == 0) 159238104Sdes cnt.v_page_size = DEFAULT_PAGE_SIZE; 160238104Sdes page_mask = cnt.v_page_size - 1; 161238104Sdes if ((page_mask & cnt.v_page_size) != 0) 162238104Sdes panic("vm_set_page_size: page size not a power of two"); 163238104Sdes for (page_shift = 0;; page_shift++) 164238104Sdes if ((1 << page_shift) == cnt.v_page_size) 165238104Sdes break; 166238104Sdes} 167238104Sdes 168238104Sdes/* 169238104Sdes * vm_page_startup: 170238104Sdes * 171238104Sdes * Initializes the resident memory module. 172238104Sdes * 173238104Sdes * Allocates memory for the page cells, and 174238104Sdes * for the object/offset-to-page hash table headers. 175238104Sdes * Each page cell is initialized and placed on the free list. 176238104Sdes */ 177238104Sdes 178246854Sdesvm_offset_t 179246854Sdesvm_page_startup(starta, enda, vaddr) 180246854Sdes register vm_offset_t starta; 181238104Sdes vm_offset_t enda; 182246854Sdes register vm_offset_t vaddr; 183246854Sdes{ 184238104Sdes register vm_offset_t mapped; 185238104Sdes register vm_page_t m; 186238104Sdes register struct pglist *bucket; 187238104Sdes vm_size_t npages, page_range; 188238104Sdes register vm_offset_t new_start; 189238104Sdes int i; 190238104Sdes vm_offset_t pa; 191238104Sdes int nblocks; 192238104Sdes vm_offset_t first_managed_page; 193238104Sdes 194238104Sdes /* the biggest memory array is the second group of pages */ 195238104Sdes vm_offset_t start; 196238104Sdes vm_offset_t biggestone, biggestsize; 197238104Sdes 198238104Sdes vm_offset_t total; 199238104Sdes 200238104Sdes total = 0; 201238104Sdes biggestsize = 0; 202238104Sdes biggestone = 0; 203238104Sdes nblocks = 0; 204238104Sdes vaddr = round_page(vaddr); 205238104Sdes 206238104Sdes for (i = 0; phys_avail[i + 1]; i += 2) { 207238104Sdes phys_avail[i] = round_page(phys_avail[i]); 208238104Sdes phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 209238104Sdes } 210238104Sdes 211238104Sdes for (i = 0; phys_avail[i + 1]; i += 2) { 212238104Sdes int size = phys_avail[i + 1] - phys_avail[i]; 213238104Sdes 214238104Sdes if (size > biggestsize) { 215238104Sdes biggestone = i; 216238104Sdes biggestsize = size; 217238104Sdes } 218238104Sdes ++nblocks; 219238104Sdes total += size; 220238104Sdes } 221238104Sdes 222238104Sdes start = phys_avail[biggestone]; 223238104Sdes 224238104Sdes /* 225238104Sdes * Initialize the queue headers for the free queue, the active queue 226238104Sdes * and the inactive queue. 227238104Sdes */ 228238104Sdes 229238104Sdes TAILQ_INIT(&vm_page_queue_free); 230238104Sdes TAILQ_INIT(&vm_page_queue_zero); 231238104Sdes TAILQ_INIT(&vm_page_queue_active); 232238104Sdes TAILQ_INIT(&vm_page_queue_inactive); 233238104Sdes TAILQ_INIT(&vm_page_queue_cache); 234238104Sdes 235238104Sdes /* 236238104Sdes * Allocate (and initialize) the hash table buckets. 237238104Sdes * 238238104Sdes * The number of buckets MUST BE a power of 2, and the actual value is 239238104Sdes * the next power of 2 greater than the number of physical pages in 240238104Sdes * the system. 241238104Sdes * 242238104Sdes * Note: This computation can be tweaked if desired. 243238104Sdes */ 244238104Sdes vm_page_buckets = (struct pglist *) vaddr; 245238104Sdes bucket = vm_page_buckets; 246238104Sdes if (vm_page_bucket_count == 0) { 247238104Sdes vm_page_bucket_count = 2; 248238104Sdes while (vm_page_bucket_count < atop(total)) 249238104Sdes vm_page_bucket_count <<= 1; 250238104Sdes } 251238104Sdes vm_page_hash_mask = vm_page_bucket_count - 1; 252238104Sdes 253238104Sdes /* 254238104Sdes * Validate these addresses. 255238104Sdes */ 256238104Sdes 257238104Sdes new_start = start + vm_page_bucket_count * sizeof(struct pglist); 258238104Sdes new_start = round_page(new_start); 259238104Sdes mapped = vaddr; 260238104Sdes vaddr = pmap_map(mapped, start, new_start, 261238104Sdes VM_PROT_READ | VM_PROT_WRITE); 262238104Sdes start = new_start; 263238104Sdes bzero((caddr_t) mapped, vaddr - mapped); 264238104Sdes mapped = vaddr; 265238104Sdes 266238104Sdes for (i = 0; i < vm_page_bucket_count; i++) { 267246854Sdes TAILQ_INIT(bucket); 268246854Sdes bucket++; 269238104Sdes } 270238104Sdes 271238104Sdes /* 272238104Sdes * round (or truncate) the addresses to our page size. 273238104Sdes */ 274238104Sdes 275238104Sdes /* 276238104Sdes * Pre-allocate maps and map entries that cannot be dynamically 277238104Sdes * allocated via malloc(). The maps include the kernel_map and 278238104Sdes * kmem_map which must be initialized before malloc() will work 279238104Sdes * (obviously). Also could include pager maps which would be 280238104Sdes * allocated before kmeminit. 281238104Sdes * 282238104Sdes * Allow some kernel map entries... this should be plenty since people 283238104Sdes * shouldn't be cluttering up the kernel map (they should use their 284238104Sdes * own maps). 285238104Sdes */ 286238104Sdes 287238104Sdes kentry_data_size = MAX_KMAP * sizeof(struct vm_map) + 288238104Sdes MAX_KMAPENT * sizeof(struct vm_map_entry); 289238104Sdes kentry_data_size = round_page(kentry_data_size); 290238104Sdes kentry_data = (vm_offset_t) vaddr; 291238104Sdes vaddr += kentry_data_size; 292238104Sdes 293238104Sdes /* 294238104Sdes * Validate these zone addresses. 295238104Sdes */ 296238104Sdes 297238104Sdes new_start = start + (vaddr - mapped); 298238104Sdes pmap_map(mapped, start, new_start, VM_PROT_READ | VM_PROT_WRITE); 299238104Sdes bzero((caddr_t) mapped, (vaddr - mapped)); 300238104Sdes start = round_page(new_start); 301238104Sdes 302238104Sdes /* 303238104Sdes * Compute the number of pages of memory that will be available for 304238104Sdes * use (taking into account the overhead of a page structure per 305238104Sdes * page). 306238104Sdes */ 307238104Sdes 308238104Sdes first_page = phys_avail[0] / PAGE_SIZE; 309238104Sdes last_page = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE; 310238104Sdes 311238104Sdes page_range = last_page - (phys_avail[0] / PAGE_SIZE); 312238104Sdes npages = (total - (page_range * sizeof(struct vm_page)) - 313238104Sdes (start - phys_avail[biggestone])) / PAGE_SIZE; 314238104Sdes 315238104Sdes /* 316238104Sdes * Initialize the mem entry structures now, and put them in the free 317238104Sdes * queue. 318238104Sdes */ 319238104Sdes 320238104Sdes vm_page_array = (vm_page_t) vaddr; 321238104Sdes mapped = vaddr; 322238104Sdes 323238104Sdes /* 324238104Sdes * Validate these addresses. 325238104Sdes */ 326238104Sdes 327238104Sdes new_start = round_page(start + page_range * sizeof(struct vm_page)); 328238104Sdes mapped = pmap_map(mapped, start, new_start, 329238104Sdes VM_PROT_READ | VM_PROT_WRITE); 330238104Sdes start = new_start; 331238104Sdes 332238104Sdes first_managed_page = start / PAGE_SIZE; 333238104Sdes 334238104Sdes /* 335238104Sdes * Clear all of the page structures 336238104Sdes */ 337238104Sdes bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page)); 338238104Sdes vm_page_array_size = page_range; 339238104Sdes 340238104Sdes cnt.v_page_count = 0; 341238104Sdes cnt.v_free_count = 0; 342238104Sdes for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) { 343238104Sdes if (i == biggestone) 344238104Sdes pa = ptoa(first_managed_page); 345238104Sdes else 346238104Sdes pa = phys_avail[i]; 347238104Sdes while (pa < phys_avail[i + 1] && npages-- > 0) { 348238104Sdes ++cnt.v_page_count; 349238104Sdes ++cnt.v_free_count; 350238104Sdes m = PHYS_TO_VM_PAGE(pa); 351238104Sdes m->queue = PQ_FREE; 352238104Sdes m->flags = 0; 353238104Sdes m->phys_addr = pa; 354238104Sdes TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq); 355238104Sdes pa += PAGE_SIZE; 356238104Sdes } 357238104Sdes } 358238104Sdes 359238104Sdes return (mapped); 360238104Sdes} 361238104Sdes 362238104Sdes/* 363238104Sdes * vm_page_hash: 364238104Sdes * 365238104Sdes * Distributes the object/offset key pair among hash buckets. 366238104Sdes * 367238104Sdes * NOTE: This macro depends on vm_page_bucket_count being a power of 2. 368238104Sdes */ 369238104Sdesstatic inline __pure int 370238104Sdesvm_page_hash(object, pindex) 371238104Sdes vm_object_t object; 372238104Sdes vm_pindex_t pindex; 373238104Sdes{ 374238104Sdes return ((unsigned) object + pindex) & vm_page_hash_mask; 375238104Sdes} 376238104Sdes 377238104Sdes/* 378238104Sdes * vm_page_insert: [ internal use only ] 379238104Sdes * 380238104Sdes * Inserts the given mem entry into the object/object-page 381238104Sdes * table and object list. 382238104Sdes * 383238104Sdes * The object and page must be locked, and must be splhigh. 384238104Sdes */ 385238104Sdes 386238104Sdesinline void 387238104Sdesvm_page_insert(m, object, pindex) 388238104Sdes register vm_page_t m; 389238104Sdes register vm_object_t object; 390238104Sdes register vm_pindex_t pindex; 391238104Sdes{ 392238104Sdes register struct pglist *bucket; 393238104Sdes 394238104Sdes if (m->flags & PG_TABLED) 395238104Sdes panic("vm_page_insert: already inserted"); 396238104Sdes 397238104Sdes /* 398238104Sdes * Record the object/offset pair in this page 399238104Sdes */ 400238104Sdes 401238104Sdes m->object = object; 402238104Sdes m->pindex = pindex; 403238104Sdes 404238104Sdes /* 405238104Sdes * Insert it into the object_object/offset hash table 406238104Sdes */ 407238104Sdes 408238104Sdes bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; 409238104Sdes TAILQ_INSERT_TAIL(bucket, m, hashq); 410238104Sdes 411238104Sdes /* 412238104Sdes * Now link into the object's list of backed pages. 413238104Sdes */ 414238104Sdes 415238104Sdes TAILQ_INSERT_TAIL(&object->memq, m, listq); 416238104Sdes m->flags |= PG_TABLED; 417238104Sdes 418238104Sdes /* 419238104Sdes * And show that the object has one more resident page. 420238104Sdes */ 421238104Sdes 422238104Sdes object->resident_page_count++; 423238104Sdes} 424238104Sdes 425238104Sdes/* 426238104Sdes * vm_page_remove: [ internal use only ] 427238104Sdes * NOTE: used by device pager as well -wfj 428238104Sdes * 429238104Sdes * Removes the given mem entry from the object/offset-page 430238104Sdes * table and the object page list. 431238104Sdes * 432238104Sdes * The object and page must be locked, and at splhigh. 433238104Sdes */ 434238104Sdes 435238104Sdesinline void 436238104Sdesvm_page_remove(m) 437238104Sdes register vm_page_t m; 438238104Sdes{ 439238104Sdes register struct pglist *bucket; 440238104Sdes 441238104Sdes if (!(m->flags & PG_TABLED)) 442238104Sdes return; 443238104Sdes 444238104Sdes /* 445238104Sdes * Remove from the object_object/offset hash table 446238104Sdes */ 447238104Sdes 448238104Sdes bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)]; 449238104Sdes TAILQ_REMOVE(bucket, m, hashq); 450238104Sdes 451238104Sdes /* 452238104Sdes * Now remove from the object's list of backed pages. 453238104Sdes */ 454238104Sdes 455238104Sdes TAILQ_REMOVE(&m->object->memq, m, listq); 456238104Sdes 457238104Sdes /* 458238104Sdes * And show that the object has one fewer resident page. 459238104Sdes */ 460238104Sdes 461238104Sdes m->object->resident_page_count--; 462238104Sdes 463238104Sdes m->flags &= ~PG_TABLED; 464238104Sdes} 465238104Sdes 466238104Sdes/* 467238104Sdes * vm_page_lookup: 468238104Sdes * 469238104Sdes * Returns the page associated with the object/offset 470238104Sdes * pair specified; if none is found, NULL is returned. 471 * 472 * The object must be locked. No side effects. 473 */ 474 475vm_page_t 476vm_page_lookup(object, pindex) 477 register vm_object_t object; 478 register vm_pindex_t pindex; 479{ 480 register vm_page_t m; 481 register struct pglist *bucket; 482 int s; 483 484 /* 485 * Search the hash table for this object/offset pair 486 */ 487 488 bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; 489 490 s = splhigh(); 491 for (m = bucket->tqh_first; m != NULL; m = m->hashq.tqe_next) { 492 if ((m->object == object) && (m->pindex == pindex)) { 493 splx(s); 494 return (m); 495 } 496 } 497 498 splx(s); 499 return (NULL); 500} 501 502/* 503 * vm_page_rename: 504 * 505 * Move the given memory entry from its 506 * current object to the specified target object/offset. 507 * 508 * The object must be locked. 509 */ 510void 511vm_page_rename(m, new_object, new_pindex) 512 register vm_page_t m; 513 register vm_object_t new_object; 514 vm_pindex_t new_pindex; 515{ 516 int s; 517 518 s = splhigh(); 519 vm_page_remove(m); 520 vm_page_insert(m, new_object, new_pindex); 521 splx(s); 522} 523 524/* 525 * vm_page_unqueue must be called at splhigh(); 526 */ 527static inline void 528vm_page_unqueue(vm_page_t m) 529{ 530 int queue = m->queue; 531 if (queue == PQ_NONE) 532 return; 533 m->queue = PQ_NONE; 534 TAILQ_REMOVE(vm_page_queues[queue].pl, m, pageq); 535 --(*vm_page_queues[queue].cnt); 536 if (queue == PQ_CACHE) { 537 if ((cnt.v_cache_count + cnt.v_free_count) < 538 (cnt.v_free_reserved + cnt.v_cache_min)) 539 pagedaemon_wakeup(); 540 } 541 return; 542} 543 544/* 545 * vm_page_alloc: 546 * 547 * Allocate and return a memory cell associated 548 * with this VM object/offset pair. 549 * 550 * page_req classes: 551 * VM_ALLOC_NORMAL normal process request 552 * VM_ALLOC_SYSTEM system *really* needs a page 553 * VM_ALLOC_INTERRUPT interrupt time request 554 * VM_ALLOC_ZERO zero page 555 * 556 * Object must be locked. 557 */ 558vm_page_t 559vm_page_alloc(object, pindex, page_req) 560 vm_object_t object; 561 vm_pindex_t pindex; 562 int page_req; 563{ 564 register vm_page_t m; 565 int queue; 566 int s; 567 568#ifdef DIAGNOSTIC 569 m = vm_page_lookup(object, pindex); 570 if (m) 571 panic("vm_page_alloc: page already allocated"); 572#endif 573 574 if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) { 575 page_req = VM_ALLOC_SYSTEM; 576 }; 577 578 s = splhigh(); 579 580 switch (page_req) { 581 582 case VM_ALLOC_NORMAL: 583 if (cnt.v_free_count >= cnt.v_free_reserved) { 584 m = vm_page_queue_free.tqh_first; 585 if (m == NULL) { 586 --vm_page_zero_count; 587 m = vm_page_queue_zero.tqh_first; 588 } 589 } else { 590 m = vm_page_queue_cache.tqh_first; 591 if (m == NULL) { 592 splx(s); 593 pagedaemon_wakeup(); 594 return (NULL); 595 } 596 } 597 break; 598 599 case VM_ALLOC_ZERO: 600 if (cnt.v_free_count >= cnt.v_free_reserved) { 601 m = vm_page_queue_zero.tqh_first; 602 if (m) { 603 --vm_page_zero_count; 604 } else { 605 m = vm_page_queue_free.tqh_first; 606 } 607 } else { 608 m = vm_page_queue_cache.tqh_first; 609 if (m == NULL) { 610 splx(s); 611 pagedaemon_wakeup(); 612 return (NULL); 613 } 614 } 615 break; 616 617 case VM_ALLOC_SYSTEM: 618 if ((cnt.v_free_count >= cnt.v_free_reserved) || 619 ((cnt.v_cache_count == 0) && 620 (cnt.v_free_count >= cnt.v_interrupt_free_min))) { 621 m = vm_page_queue_free.tqh_first; 622 if (m == NULL) { 623 --vm_page_zero_count; 624 m = vm_page_queue_zero.tqh_first; 625 } 626 } else { 627 m = vm_page_queue_cache.tqh_first; 628 if (m == NULL) { 629 splx(s); 630 pagedaemon_wakeup(); 631 return (NULL); 632 } 633 } 634 break; 635 636 case VM_ALLOC_INTERRUPT: 637 if (cnt.v_free_count > 0) { 638 m = vm_page_queue_free.tqh_first; 639 if (m == NULL) { 640 --vm_page_zero_count; 641 m = vm_page_queue_zero.tqh_first; 642 } 643 } else { 644 splx(s); 645 pagedaemon_wakeup(); 646 return (NULL); 647 } 648 break; 649 650 default: 651 panic("vm_page_alloc: invalid allocation class"); 652 } 653 654 queue = m->queue; 655 TAILQ_REMOVE(vm_page_queues[queue].pl, m, pageq); 656 --(*vm_page_queues[queue].cnt); 657 if (queue == PQ_ZERO) { 658 m->flags = PG_ZERO|PG_BUSY; 659 } else if (queue == PQ_CACHE) { 660 vm_page_remove(m); 661 m->flags = PG_BUSY; 662 } else { 663 m->flags = PG_BUSY; 664 } 665 m->wire_count = 0; 666 m->hold_count = 0; 667 m->act_count = 0; 668 m->busy = 0; 669 m->valid = 0; 670 m->dirty = 0; 671 m->queue = PQ_NONE; 672 673 /* XXX before splx until vm_page_insert is safe */ 674 vm_page_insert(m, object, pindex); 675 676 splx(s); 677 678 /* 679 * Don't wakeup too often - wakeup the pageout daemon when 680 * we would be nearly out of memory. 681 */ 682 if (((cnt.v_free_count + cnt.v_cache_count) < 683 (cnt.v_free_reserved + cnt.v_cache_min)) || 684 (cnt.v_free_count < cnt.v_pageout_free_min)) 685 pagedaemon_wakeup(); 686 687 return (m); 688} 689 690/* 691 * This interface is for merging with malloc() someday. 692 * Even if we never implement compaction so that contiguous allocation 693 * works after initialization time, malloc()'s data structures are good 694 * for statistics and for allocations of less than a page. 695 */ 696void * 697contigmalloc(size, type, flags, low, high, alignment, boundary) 698 unsigned long size; /* should be size_t here and for malloc() */ 699 int type; 700 int flags; 701 unsigned long low; 702 unsigned long high; 703 unsigned long alignment; 704 unsigned long boundary; 705{ 706 int i, s, start; 707 vm_offset_t addr, phys, tmp_addr; 708 vm_page_t pga = vm_page_array; 709 710 size = round_page(size); 711 if (size == 0) 712 panic("vm_page_alloc_contig: size must not be 0"); 713 if ((alignment & (alignment - 1)) != 0) 714 panic("vm_page_alloc_contig: alignment must be a power of 2"); 715 if ((boundary & (boundary - 1)) != 0) 716 panic("vm_page_alloc_contig: boundary must be a power of 2"); 717 718 start = 0; 719 s = splhigh(); 720again: 721 /* 722 * Find first page in array that is free, within range, aligned, and 723 * such that the boundary won't be crossed. 724 */ 725 for (i = start; i < cnt.v_page_count; i++) { 726 phys = VM_PAGE_TO_PHYS(&pga[i]); 727 if ((pga[i].queue == PQ_FREE) && 728 (phys >= low) && (phys < high) && 729 ((phys & (alignment - 1)) == 0) && 730 (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0)) 731 break; 732 } 733 734 /* 735 * If the above failed or we will exceed the upper bound, fail. 736 */ 737 if ((i == cnt.v_page_count) || 738 ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { 739 splx(s); 740 return (NULL); 741 } 742 start = i; 743 744 /* 745 * Check successive pages for contiguous and free. 746 */ 747 for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { 748 if ((VM_PAGE_TO_PHYS(&pga[i]) != 749 (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) || 750 (pga[i].queue != PQ_FREE)) { 751 start++; 752 goto again; 753 } 754 } 755 756 /* 757 * We've found a contiguous chunk that meets are requirements. 758 * Allocate kernel VM, unfree and assign the physical pages to it and 759 * return kernel VM pointer. 760 */ 761 tmp_addr = addr = kmem_alloc_pageable(kernel_map, size); 762 if (addr == 0) { 763 splx(s); 764 return (NULL); 765 } 766 767 for (i = start; i < (start + size / PAGE_SIZE); i++) { 768 vm_page_t m = &pga[i]; 769 770 TAILQ_REMOVE(&vm_page_queue_free, m, pageq); 771 cnt.v_free_count--; 772 m->valid = VM_PAGE_BITS_ALL; 773 m->flags = 0; 774 m->dirty = 0; 775 m->wire_count = 0; 776 m->act_count = 0; 777 m->busy = 0; 778 m->queue = PQ_NONE; 779 vm_page_insert(m, kernel_object, 780 OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS)); 781 vm_page_wire(m); 782 pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m)); 783 tmp_addr += PAGE_SIZE; 784 } 785 786 splx(s); 787 return ((void *)addr); 788} 789 790vm_offset_t 791vm_page_alloc_contig(size, low, high, alignment) 792 vm_offset_t size; 793 vm_offset_t low; 794 vm_offset_t high; 795 vm_offset_t alignment; 796{ 797 return ((vm_offset_t)contigmalloc(size, M_DEVBUF, M_NOWAIT, low, high, 798 alignment, 0ul)); 799} 800 801/* 802 * vm_page_free: 803 * 804 * Returns the given page to the free list, 805 * disassociating it with any VM object. 806 * 807 * Object and page must be locked prior to entry. 808 */ 809void 810vm_page_free(m) 811 register vm_page_t m; 812{ 813 int s; 814 int flags = m->flags; 815 816 s = splhigh(); 817 if (m->busy || (flags & PG_BUSY) || (m->queue == PQ_FREE)) { 818 printf("vm_page_free: pindex(%ld), busy(%d), PG_BUSY(%d)\n", 819 m->pindex, m->busy, (flags & PG_BUSY) ? 1 : 0); 820 if (m->queue == PQ_FREE) 821 panic("vm_page_free: freeing free page"); 822 else 823 panic("vm_page_free: freeing busy page"); 824 } 825 826 if (m->hold_count) { 827 panic("freeing held page, count=%d", m->hold_count); 828 } 829 830 vm_page_remove(m); 831 vm_page_unqueue(m); 832 833 if ((flags & PG_FICTITIOUS) == 0) { 834 if (m->wire_count) { 835 if (m->wire_count > 1) { 836 printf("vm_page_free: wire count > 1 (%d)", m->wire_count); 837 panic("vm_page_free: invalid wire count"); 838 } 839 cnt.v_wire_count--; 840 m->wire_count = 0; 841 } 842 m->queue = PQ_FREE; 843 TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq); 844 splx(s); 845 /* 846 * if pageout daemon needs pages, then tell it that there are 847 * some free. 848 */ 849 if (vm_pageout_pages_needed) { 850 wakeup(&vm_pageout_pages_needed); 851 vm_pageout_pages_needed = 0; 852 } 853 854 cnt.v_free_count++; 855 /* 856 * wakeup processes that are waiting on memory if we hit a 857 * high water mark. And wakeup scheduler process if we have 858 * lots of memory. this process will swapin processes. 859 */ 860 if ((cnt.v_free_count + cnt.v_cache_count) == cnt.v_free_min) { 861 wakeup(&cnt.v_free_count); 862 wakeup(&proc0); 863 } 864 } else { 865 splx(s); 866 } 867 cnt.v_tfree++; 868} 869 870 871/* 872 * vm_page_wire: 873 * 874 * Mark this page as wired down by yet 875 * another map, removing it from paging queues 876 * as necessary. 877 * 878 * The page queues must be locked. 879 */ 880void 881vm_page_wire(m) 882 register vm_page_t m; 883{ 884 int s; 885 886 if (m->wire_count == 0) { 887 s = splhigh(); 888 vm_page_unqueue(m); 889 splx(s); 890 cnt.v_wire_count++; 891 } 892 m->wire_count++; 893 m->flags |= PG_MAPPED; 894} 895 896/* 897 * vm_page_unwire: 898 * 899 * Release one wiring of this page, potentially 900 * enabling it to be paged again. 901 * 902 * The page queues must be locked. 903 */ 904void 905vm_page_unwire(m) 906 register vm_page_t m; 907{ 908 int s; 909 910 s = splhigh(); 911 912 if (m->wire_count > 0) 913 m->wire_count--; 914 915 if (m->wire_count == 0) { 916 cnt.v_wire_count--; 917 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 918 m->queue = PQ_ACTIVE; 919 if( m->act_count < ACT_MAX) 920 m->act_count += 1; 921 cnt.v_active_count++; 922 } 923 splx(s); 924} 925 926/* 927 * vm_page_activate: 928 * 929 * Put the specified page on the active list (if appropriate). 930 * 931 * The page queues must be locked. 932 */ 933void 934vm_page_activate(m) 935 register vm_page_t m; 936{ 937 int s; 938 939 s = splhigh(); 940 if (m->queue == PQ_ACTIVE) 941 panic("vm_page_activate: already active"); 942 943 if (m->queue == PQ_CACHE) 944 cnt.v_reactivated++; 945 946 vm_page_unqueue(m); 947 948 if (m->wire_count == 0) { 949 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 950 m->queue = PQ_ACTIVE; 951 if (m->act_count < 5) 952 m->act_count = 5; 953 else if( m->act_count < ACT_MAX) 954 m->act_count += 1; 955 cnt.v_active_count++; 956 } 957 splx(s); 958} 959 960/* 961 * vm_page_deactivate: 962 * 963 * Returns the given page to the inactive list, 964 * indicating that no physical maps have access 965 * to this page. [Used by the physical mapping system.] 966 * 967 * The page queues must be locked. 968 */ 969void 970vm_page_deactivate(m) 971 register vm_page_t m; 972{ 973 int spl; 974 975 /* 976 * Only move active pages -- ignore locked or already inactive ones. 977 * 978 * XXX: sometimes we get pages which aren't wired down or on any queue - 979 * we need to put them on the inactive queue also, otherwise we lose 980 * track of them. Paul Mackerras (paulus@cs.anu.edu.au) 9-Jan-93. 981 */ 982 if (m->queue == PQ_INACTIVE) 983 return; 984 985 spl = splhigh(); 986 if (m->wire_count == 0 && m->hold_count == 0) { 987 if (m->queue == PQ_CACHE) 988 cnt.v_reactivated++; 989 vm_page_unqueue(m); 990 TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); 991 m->queue = PQ_INACTIVE; 992 cnt.v_inactive_count++; 993 m->act_count = 0; 994 } 995 splx(spl); 996} 997 998/* 999 * vm_page_cache 1000 * 1001 * Put the specified page onto the page cache queue (if appropriate). 1002 */ 1003void 1004vm_page_cache(m) 1005 register vm_page_t m; 1006{ 1007 int s; 1008 1009 if ((m->flags & PG_BUSY) || m->busy || m->wire_count) { 1010 printf("vm_page_cache: attempting to cache busy page\n"); 1011 return; 1012 } 1013 if (m->queue == PQ_CACHE) 1014 return; 1015 1016 vm_page_protect(m, VM_PROT_NONE); 1017 s = splhigh(); 1018 vm_page_unqueue(m); 1019 TAILQ_INSERT_TAIL(&vm_page_queue_cache, m, pageq); 1020 m->queue = PQ_CACHE; 1021 cnt.v_cache_count++; 1022 if ((cnt.v_free_count + cnt.v_cache_count) == cnt.v_free_min) { 1023 wakeup(&cnt.v_free_count); 1024 wakeup(&proc0); 1025 } 1026 if (vm_pageout_pages_needed) { 1027 wakeup(&vm_pageout_pages_needed); 1028 vm_pageout_pages_needed = 0; 1029 } 1030 splx(s); 1031} 1032 1033/* 1034 * vm_page_zero_fill: 1035 * 1036 * Zero-fill the specified page. 1037 * Written as a standard pagein routine, to 1038 * be used by the zero-fill object. 1039 */ 1040boolean_t 1041vm_page_zero_fill(m) 1042 vm_page_t m; 1043{ 1044 pmap_zero_page(VM_PAGE_TO_PHYS(m)); 1045 return (TRUE); 1046} 1047 1048/* 1049 * vm_page_copy: 1050 * 1051 * Copy one page to another 1052 */ 1053void 1054vm_page_copy(src_m, dest_m) 1055 vm_page_t src_m; 1056 vm_page_t dest_m; 1057{ 1058 pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m)); 1059 dest_m->valid = VM_PAGE_BITS_ALL; 1060} 1061 1062 1063/* 1064 * mapping function for valid bits or for dirty bits in 1065 * a page 1066 */ 1067inline int 1068vm_page_bits(int base, int size) 1069{ 1070 u_short chunk; 1071 1072 if ((base == 0) && (size >= PAGE_SIZE)) 1073 return VM_PAGE_BITS_ALL; 1074 size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 1075 base = (base % PAGE_SIZE) / DEV_BSIZE; 1076 chunk = vm_page_dev_bsize_chunks[size / DEV_BSIZE]; 1077 return (chunk << base) & VM_PAGE_BITS_ALL; 1078} 1079 1080/* 1081 * set a page valid and clean 1082 */ 1083void 1084vm_page_set_validclean(m, base, size) 1085 vm_page_t m; 1086 int base; 1087 int size; 1088{ 1089 int pagebits = vm_page_bits(base, size); 1090 m->valid |= pagebits; 1091 m->dirty &= ~pagebits; 1092 if( base == 0 && size == PAGE_SIZE) 1093 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 1094} 1095 1096/* 1097 * set a page (partially) invalid 1098 */ 1099void 1100vm_page_set_invalid(m, base, size) 1101 vm_page_t m; 1102 int base; 1103 int size; 1104{ 1105 int bits; 1106 1107 m->valid &= ~(bits = vm_page_bits(base, size)); 1108 if (m->valid == 0) 1109 m->dirty &= ~bits; 1110} 1111 1112/* 1113 * is (partial) page valid? 1114 */ 1115int 1116vm_page_is_valid(m, base, size) 1117 vm_page_t m; 1118 int base; 1119 int size; 1120{ 1121 int bits = vm_page_bits(base, size); 1122 1123 if (m->valid && ((m->valid & bits) == bits)) 1124 return 1; 1125 else 1126 return 0; 1127} 1128 1129 1130 1131void 1132vm_page_test_dirty(m) 1133 vm_page_t m; 1134{ 1135 if ((m->dirty != VM_PAGE_BITS_ALL) && 1136 pmap_is_modified(VM_PAGE_TO_PHYS(m))) { 1137 m->dirty = VM_PAGE_BITS_ALL; 1138 } 1139} 1140 1141#ifdef DDB 1142void 1143DDB_print_page_info(void) 1144{ 1145 printf("cnt.v_free_count: %d\n", cnt.v_free_count); 1146 printf("cnt.v_cache_count: %d\n", cnt.v_cache_count); 1147 printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count); 1148 printf("cnt.v_active_count: %d\n", cnt.v_active_count); 1149 printf("cnt.v_wire_count: %d\n", cnt.v_wire_count); 1150 printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved); 1151 printf("cnt.v_free_min: %d\n", cnt.v_free_min); 1152 printf("cnt.v_free_target: %d\n", cnt.v_free_target); 1153 printf("cnt.v_cache_min: %d\n", cnt.v_cache_min); 1154 printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target); 1155} 1156#endif 1157