vm_page.c revision 33936
11541Srgrimes/* 21541Srgrimes * Copyright (c) 1991 Regents of the University of California. 31541Srgrimes * All rights reserved. 41541Srgrimes * 51541Srgrimes * This code is derived from software contributed to Berkeley by 61541Srgrimes * The Mach Operating System project at Carnegie-Mellon University. 71541Srgrimes * 81541Srgrimes * Redistribution and use in source and binary forms, with or without 91541Srgrimes * modification, are permitted provided that the following conditions 101541Srgrimes * are met: 111541Srgrimes * 1. Redistributions of source code must retain the above copyright 121541Srgrimes * notice, this list of conditions and the following disclaimer. 131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer in the 151541Srgrimes * documentation and/or other materials provided with the distribution. 161541Srgrimes * 3. All advertising materials mentioning features or use of this software 171541Srgrimes * must display the following acknowledgement: 181541Srgrimes * This product includes software developed by the University of 191541Srgrimes * California, Berkeley and its contributors. 201541Srgrimes * 4. Neither the name of the University nor the names of its contributors 211541Srgrimes * may be used to endorse or promote products derived from this software 221541Srgrimes * without specific prior written permission. 231541Srgrimes * 241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 271541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2985051Sru * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3050477Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 321541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 341541Srgrimes * SUCH DAMAGE. 351541Srgrimes * 361541Srgrimes * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 3732356Seivind * $Id: vm_page.c,v 1.93 1998/02/09 06:11:32 eivind Exp $ 3832350Seivind */ 3954263Sshin 4031742Seivind/* 41108041Srwatson * Copyright (c) 1987, 1990 Carnegie-Mellon University. 4231742Seivind * All rights reserved. 431541Srgrimes * 441541Srgrimes * Authors: Avadis Tevanian, Jr., Michael Wayne Young 451541Srgrimes * 46108041Srwatson * Permission to use, copy, modify and distribute this software and 4771791Speter * its documentation is hereby granted, provided that both the copyright 481541Srgrimes * notice and this permission notice appear in all copies of the 4971862Speter * software, derivative works or modified versions, and any portions 5091648Sbrooks * thereof, and that both notices appear in supporting documentation. 5191648Sbrooks * 521541Srgrimes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 5324204Sbde * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 5471791Speter * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 551541Srgrimes * 561541Srgrimes * Carnegie Mellon requests users of this software to return to 571541Srgrimes * 581541Srgrimes * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 591541Srgrimes * School of Computer Science 601541Srgrimes * Carnegie Mellon University 6178064Sume * Pittsburgh PA 15213-3890 621541Srgrimes * 631541Srgrimes * any improvements or extensions that they make and grant Carnegie the 641541Srgrimes * rights to redistribute these changes. 651541Srgrimes */ 661541Srgrimes 671541Srgrimes/* 6811819Sjulian * Resident memory management module. 6911819Sjulian */ 7011819Sjulian 7111819Sjulian#include <sys/param.h> 7211819Sjulian#include <sys/systm.h> 7353541Sshin#include <sys/malloc.h> 7453541Sshin#include <sys/proc.h> 7553541Sshin#include <sys/vmmeter.h> 7653541Sshin#include <sys/vnode.h> 7753541Sshin 7862587Sitojun#include <vm/vm.h> 7953541Sshin#include <vm/vm_param.h> 8053541Sshin#include <vm/vm_prot.h> 8115885Sjulian#include <sys/lock.h> 8215885Sjulian#include <vm/vm_kern.h> 8315885Sjulian#include <vm/vm_object.h> 8483268Speter#include <vm/vm_page.h> 8515885Sjulian#include <vm/vm_pageout.h> 861622Sdg#include <vm/vm_extern.h> 871541Srgrimes 8853541Sshinstatic void vm_page_queue_init __P((void)); 8953541Sshinstatic vm_page_t vm_page_select_free __P((vm_object_t object, 901622Sdg vm_pindex_t pindex, int prefqueue)); 916876Sdg 921622Sdg/* 931541Srgrimes * Associated with page of user-allocatable memory is a 9491648Sbrooks * page structure. 951541Srgrimes */ 9671791Speter 9771791Speterstatic struct pglist *vm_page_buckets; /* Array of buckets */ 9887914Sjlemonstatic int vm_page_bucket_count; /* How big is array? */ 9971791Speterstatic int vm_page_hash_mask; /* Mask for hash function */ 10091648Sbrooksstatic volatile int vm_page_bucket_generation; 10191648Sbrooks 10291648Sbrooksstruct pglist vm_page_queue_free[PQ_L2_SIZE] = {0}; 10391648Sbrooksstruct pglist vm_page_queue_zero[PQ_L2_SIZE] = {0}; 10491648Sbrooksstruct pglist vm_page_queue_active = {0}; 105128209Sbrooksstruct pglist vm_page_queue_inactive = {0}; 106128209Sbrooksstruct pglist vm_page_queue_cache[PQ_L2_SIZE] = {0}; 10791648Sbrooks 10891648Sbrooksstatic int no_queue=0; 10991648Sbrooks 11091648Sbrooksstruct vpgqueues vm_page_queues[PQ_COUNT] = {0}; 11191648Sbrooksstatic int pqcnt[PQ_COUNT] = {0}; 112126778Srwatson 11371791Speterstatic void 11471791Spetervm_page_queue_init(void) { 11597289Sbrooks int i; 11697289Sbrooks 11791648Sbrooks vm_page_queues[PQ_NONE].pl = NULL; 118128209Sbrooks vm_page_queues[PQ_NONE].cnt = &no_queue; 11991648Sbrooks for(i=0;i<PQ_L2_SIZE;i++) { 12091648Sbrooks vm_page_queues[PQ_FREE+i].pl = &vm_page_queue_free[i]; 12191648Sbrooks vm_page_queues[PQ_FREE+i].cnt = &cnt.v_free_count; 12291648Sbrooks } 12391648Sbrooks for(i=0;i<PQ_L2_SIZE;i++) { 12491648Sbrooks vm_page_queues[PQ_ZERO+i].pl = &vm_page_queue_zero[i]; 12591648Sbrooks vm_page_queues[PQ_ZERO+i].cnt = &cnt.v_free_count; 12697289Sbrooks } 12797289Sbrooks vm_page_queues[PQ_INACTIVE].pl = &vm_page_queue_inactive; 12891648Sbrooks vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count; 129126778Srwatson 130126778Srwatson vm_page_queues[PQ_ACTIVE].pl = &vm_page_queue_active; 131126778Srwatson vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count; 13291648Sbrooks for(i=0;i<PQ_L2_SIZE;i++) { 13391648Sbrooks vm_page_queues[PQ_CACHE+i].pl = &vm_page_queue_cache[i]; 13493752Sluigi vm_page_queues[PQ_CACHE+i].cnt = &cnt.v_cache_count; 13591648Sbrooks } 13691648Sbrooks for(i=0;i<PQ_COUNT;i++) { 137128209Sbrooks if (vm_page_queues[i].pl) { 13892081Smux TAILQ_INIT(vm_page_queues[i].pl); 13992081Smux } else if (i != 0) { 14092081Smux panic("vm_page_queue_init: queue %d is null", i); 14171791Speter } 14271791Speter vm_page_queues[i].lcnt = &pqcnt[i]; 14371791Speter } 144111119Simp} 14571791Speter 146121816Sbrooksvm_page_t vm_page_array = 0; 14771791Speterstatic int vm_page_array_size = 0; 14871791Speterlong first_page = 0; 14971791Speterstatic long last_page; 15071791Speterstatic vm_size_t page_mask; 15171791Speterstatic int page_shift; 15271791Speterint vm_page_zero_count = 0; 15391648Sbrooks 15471791Speter/* 15571791Speter * map of contiguous valid DEV_BSIZE chunks in a page 156126778Srwatson * (this list is valid for page sizes upto 16*DEV_BSIZE) 15771791Speter */ 158126778Srwatsonstatic u_short vm_page_dev_bsize_chunks[] = { 15971791Speter 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 16071791Speter 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff 16192081Smux}; 16292081Smux 16371791Speterstatic inline int vm_page_hash __P((vm_object_t object, vm_pindex_t pindex)); 16471791Speterstatic int vm_page_freechk_and_unqueue __P((vm_page_t m)); 16571791Speterstatic void vm_page_free_wakeup __P((void)); 16671862Speter 16771862Speter/* 16871862Speter * vm_set_page_size: 16971862Speter * 170126778Srwatson * Sets the page size, perhaps based upon the memory 17191648Sbrooks * size. Must be called before any use of page-size 17291648Sbrooks * dependent functions. 17371862Speter * 17471862Speter * Sets page_shift and page_mask from cnt.v_page_size. 17571862Speter */ 17671862Spetervoid 17771862Spetervm_set_page_size() 17871862Speter{ 17971862Speter 1801541Srgrimes if (cnt.v_page_size == 0) 18171862Speter cnt.v_page_size = DEFAULT_PAGE_SIZE; 18271862Speter page_mask = cnt.v_page_size - 1; 18371862Speter if ((page_mask & cnt.v_page_size) != 0) 18471862Speter panic("vm_set_page_size: page size not a power of two"); 18571862Speter for (page_shift = 0;; page_shift++) 18671862Speter if ((1 << page_shift) == cnt.v_page_size) 187121596Skan break; 18871862Speter} 18954263Sshin 1901541Srgrimes/* 1911541Srgrimes * vm_page_startup: 1921541Srgrimes * 1931541Srgrimes * Initializes the resident memory module. 1941541Srgrimes * 1951541Srgrimes * Allocates memory for the page cells, and 196113255Sdes * for the object/offset-to-page hash table headers. 197113255Sdes * Each page cell is initialized and placed on the free list. 19836908Sjulian */ 19936908Sjulian 20036908Sjulianvm_offset_t 20136908Sjulianvm_page_startup(starta, enda, vaddr) 20236908Sjulian register vm_offset_t starta; 20353541Sshin vm_offset_t enda; 20436908Sjulian register vm_offset_t vaddr; 20536908Sjulian{ 20636992Sjulian register vm_offset_t mapped; 20736992Sjulian register vm_page_t m; 20836992Sjulian register struct pglist *bucket; 20953541Sshin vm_size_t npages, page_range; 21036992Sjulian register vm_offset_t new_start; 21136992Sjulian int i; 21236994Sjulian vm_offset_t pa; 21336992Sjulian int nblocks; 21465454Srwatson vm_offset_t first_managed_page; 21536992Sjulian 21636992Sjulian /* the biggest memory array is the second group of pages */ 21736992Sjulian vm_offset_t start; 21836992Sjulian vm_offset_t biggestone, biggestsize; 21960889Sarchie 22036908Sjulian vm_offset_t total; 22136908Sjulian 22236908Sjulian total = 0; 22336908Sjulian biggestsize = 0; 22436908Sjulian biggestone = 0; 22536908Sjulian nblocks = 0; 22636908Sjulian vaddr = round_page(vaddr); 22736908Sjulian 22836908Sjulian for (i = 0; phys_avail[i + 1]; i += 2) { 22936908Sjulian phys_avail[i] = round_page(phys_avail[i]); 23036908Sjulian phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 23136908Sjulian } 23236908Sjulian 23336908Sjulian for (i = 0; phys_avail[i + 1]; i += 2) { 23460889Sarchie int size = phys_avail[i + 1] - phys_avail[i]; 23536908Sjulian 23678064Sume if (size > biggestsize) { 23760889Sarchie biggestone = i; 23836908Sjulian biggestsize = size; 23936908Sjulian } 24069152Sjlemon ++nblocks; 2411541Srgrimes total += size; 242113255Sdes } 243121645Ssam 24436908Sjulian start = phys_avail[biggestone]; 24560889Sarchie 24610957Swollman /* 24760889Sarchie * Initialize the queue headers for the free queue, the active queue 24860889Sarchie * and the inactive queue. 24960889Sarchie */ 25010957Swollman 25110957Swollman vm_page_queue_init(); 25210957Swollman 25310957Swollman /* 25410957Swollman * Allocate (and initialize) the hash table buckets. 25560889Sarchie * 2568090Spst * The number of buckets MUST BE a power of 2, and the actual value is 25778064Sume * the next power of 2 greater than the number of physical pages in 258123922Ssam * the system. 25978064Sume * 260123922Ssam * Note: This computation can be tweaked if desired. 26178064Sume */ 262123922Ssam vm_page_buckets = (struct pglist *) vaddr; 263123922Ssam bucket = vm_page_buckets; 264123922Ssam if (vm_page_bucket_count == 0) { 2651541Srgrimes vm_page_bucket_count = 1; 2661541Srgrimes while (vm_page_bucket_count < atop(total)) 26736908Sjulian vm_page_bucket_count <<= 1; 26837600Sdfr } 26960952Sgallatin vm_page_hash_mask = vm_page_bucket_count - 1; 27088660Sjake 27137600Sdfr /* 27237600Sdfr * Validate these addresses. 27360952Sgallatin */ 27461181Smjacob 27560952Sgallatin new_start = start + vm_page_bucket_count * sizeof(struct pglist); 27660952Sgallatin new_start = round_page(new_start); 27760952Sgallatin mapped = vaddr; 27860952Sgallatin vaddr = pmap_map(mapped, start, new_start, 27960952Sgallatin VM_PROT_READ | VM_PROT_WRITE); 28060952Sgallatin start = new_start; 28137600Sdfr bzero((caddr_t) mapped, vaddr - mapped); 28237600Sdfr mapped = vaddr; 28336908Sjulian 28460889Sarchie for (i = 0; i < vm_page_bucket_count; i++) { 28560889Sarchie TAILQ_INIT(bucket); 2861541Srgrimes bucket++; 2871541Srgrimes } 2881541Srgrimes 2891541Srgrimes /* 2901541Srgrimes * Validate these zone addresses. 29153541Sshin */ 29253541Sshin 29353541Sshin new_start = start + (vaddr - mapped); 29453541Sshin pmap_map(mapped, start, new_start, VM_PROT_READ | VM_PROT_WRITE); 29553541Sshin bzero((caddr_t) mapped, (vaddr - mapped)); 29653541Sshin start = round_page(new_start); 29711819Sjulian 29811819Sjulian /* 29911819Sjulian * Compute the number of pages of memory that will be available for 30011819Sjulian * use (taking into account the overhead of a page structure per 30111819Sjulian * page). 30215885Sjulian */ 30315885Sjulian 304111888Sjlemon first_page = phys_avail[0] / PAGE_SIZE; 30515885Sjulian last_page = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE; 30683268Speter 3071541Srgrimes page_range = last_page - (phys_avail[0] / PAGE_SIZE); 30860889Sarchie npages = (total - (page_range * sizeof(struct vm_page)) - 3091541Srgrimes (start - phys_avail[biggestone])) / PAGE_SIZE; 3101541Srgrimes 3111541Srgrimes /* 3121541Srgrimes * Initialize the mem entry structures now, and put them in the free 3131541Srgrimes * queue. 314121698Ssam */ 3151541Srgrimes 3161541Srgrimes vm_page_array = (vm_page_t) vaddr; 3171541Srgrimes mapped = vaddr; 3181541Srgrimes 31912706Sphk /* 32085074Sru * Validate these addresses. 3211541Srgrimes */ 3221541Srgrimes 32385074Sru new_start = round_page(start + page_range * sizeof(struct vm_page)); 3241541Srgrimes mapped = pmap_map(mapped, start, new_start, 325120727Ssam VM_PROT_READ | VM_PROT_WRITE); 326122922Sandre start = new_start; 327122922Sandre 3281541Srgrimes first_managed_page = start / PAGE_SIZE; 3291541Srgrimes 3301541Srgrimes /* 3311541Srgrimes * Clear all of the page structures 3321541Srgrimes */ 3331541Srgrimes bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page)); 33454263Sshin vm_page_array_size = page_range; 3351541Srgrimes 3361541Srgrimes cnt.v_page_count = 0; 33736735Sdfr cnt.v_free_count = 0; 3381541Srgrimes for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) { 3391541Srgrimes if (i == biggestone) 3401541Srgrimes pa = ptoa(first_managed_page); 3411944Sdg else 3421541Srgrimes pa = phys_avail[i]; 3431541Srgrimes while (pa < phys_avail[i + 1] && npages-- > 0) { 3441541Srgrimes ++cnt.v_page_count; 3451541Srgrimes ++cnt.v_free_count; 3461541Srgrimes m = PHYS_TO_VM_PAGE(pa); 34716512Swollman m->phys_addr = pa; 3481541Srgrimes m->flags = 0; 34913928Swollman m->pc = (pa >> PAGE_SHIFT) & PQ_L2_MASK; 3501541Srgrimes m->queue = PQ_FREE + m->pc; 3511541Srgrimes TAILQ_INSERT_TAIL(vm_page_queues[m->queue].pl, m, pageq); 3521541Srgrimes ++(*vm_page_queues[m->queue].lcnt); 3531541Srgrimes pa += PAGE_SIZE; 3541541Srgrimes } 3551541Srgrimes } 3561541Srgrimes return (mapped); 3571541Srgrimes} 3581541Srgrimes 3591541Srgrimes/* 3601541Srgrimes * vm_page_hash: 3611541Srgrimes * 3621541Srgrimes * Distributes the object/offset key pair among hash buckets. 3631541Srgrimes * 3641541Srgrimes * NOTE: This macro depends on vm_page_bucket_count being a power of 2. 3651541Srgrimes */ 3661541Srgrimesstatic inline int 36753541Sshinvm_page_hash(object, pindex) 36853541Sshin vm_object_t object; 36953541Sshin vm_pindex_t pindex; 37053541Sshin{ 3711541Srgrimes return ((((unsigned) object) >> 5) + (pindex >> 1)) & vm_page_hash_mask; 3721541Srgrimes} 3731541Srgrimes 3741541Srgrimes/* 3751541Srgrimes * vm_page_insert: [ internal use only ] 3761541Srgrimes * 3771541Srgrimes * Inserts the given mem entry into the object/object-page 3781944Sdg * table and object list. 37949468Sbrian * 3801944Sdg * The object and page must be locked, and must be splhigh. 3811944Sdg */ 38235563Sphk 38335563Sphkvoid 38435563Sphkvm_page_insert(m, object, pindex) 3851541Srgrimes register vm_page_t m; 3861541Srgrimes register vm_object_t object; 3871541Srgrimes register vm_pindex_t pindex; 3881541Srgrimes{ 3891541Srgrimes register struct pglist *bucket; 390 391#if !defined(MAX_PERF) 392 if (m->flags & PG_TABLED) 393 panic("vm_page_insert: already inserted"); 394#endif 395 396 /* 397 * Record the object/offset pair in this page 398 */ 399 400 m->object = object; 401 m->pindex = pindex; 402 403 /* 404 * Insert it into the object_object/offset hash table 405 */ 406 407 bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; 408 TAILQ_INSERT_TAIL(bucket, m, hashq); 409 vm_page_bucket_generation++; 410 411 /* 412 * Now link into the object's list of backed pages. 413 */ 414 415 TAILQ_INSERT_TAIL(&object->memq, m, listq); 416 m->flags |= PG_TABLED; 417 m->object->page_hint = m; 418 m->object->generation++; 419 420 if (m->wire_count) 421 object->wire_count++; 422 423 if ((m->queue - m->pc) == PQ_CACHE) 424 object->cache_count++; 425 426 /* 427 * And show that the object has one more resident page. 428 */ 429 430 object->resident_page_count++; 431} 432 433/* 434 * vm_page_remove: [ internal use only ] 435 * NOTE: used by device pager as well -wfj 436 * 437 * Removes the given mem entry from the object/offset-page 438 * table and the object page list. 439 * 440 * The object and page must be locked, and at splhigh. 441 */ 442 443void 444vm_page_remove(m) 445 register vm_page_t m; 446{ 447 register struct pglist *bucket; 448 vm_object_t object; 449 450 if (!(m->flags & PG_TABLED)) 451 return; 452 453#if !defined(MAX_PERF) 454 if ((m->flags & PG_BUSY) == 0) { 455 panic("vm_page_remove: page not busy"); 456 } 457#endif 458 459 m->flags &= ~PG_BUSY; 460 if (m->flags & PG_WANTED) { 461 m->flags &= ~PG_WANTED; 462 wakeup(m); 463 } 464 465 object = m->object; 466 if (object->page_hint == m) 467 object->page_hint = NULL; 468 469 if (m->wire_count) 470 object->wire_count--; 471 472 if ((m->queue - m->pc) == PQ_CACHE) 473 object->cache_count--; 474 475 /* 476 * Remove from the object_object/offset hash table 477 */ 478 479 bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)]; 480 TAILQ_REMOVE(bucket, m, hashq); 481 vm_page_bucket_generation++; 482 483 /* 484 * Now remove from the object's list of backed pages. 485 */ 486 487 TAILQ_REMOVE(&object->memq, m, listq); 488 489 /* 490 * And show that the object has one fewer resident page. 491 */ 492 493 object->resident_page_count--; 494 object->generation++; 495 m->object = NULL; 496 497 m->flags &= ~PG_TABLED; 498} 499 500/* 501 * vm_page_lookup: 502 * 503 * Returns the page associated with the object/offset 504 * pair specified; if none is found, NULL is returned. 505 * 506 * The object must be locked. No side effects. 507 */ 508 509vm_page_t 510vm_page_lookup(object, pindex) 511 register vm_object_t object; 512 register vm_pindex_t pindex; 513{ 514 register vm_page_t m; 515 register struct pglist *bucket; 516 int generation; 517 int s; 518 519 /* 520 * Search the hash table for this object/offset pair 521 */ 522 523 if (object->page_hint && (object->page_hint->pindex == pindex) && 524 (object->page_hint->object == object)) 525 return object->page_hint; 526 527retry: 528 generation = vm_page_bucket_generation; 529 bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; 530 for (m = TAILQ_FIRST(bucket); m != NULL; m = TAILQ_NEXT(m,hashq)) { 531 if ((m->object == object) && (m->pindex == pindex)) { 532 if (vm_page_bucket_generation != generation) 533 goto retry; 534 m->object->page_hint = m; 535 return (m); 536 } 537 } 538 if (vm_page_bucket_generation != generation) 539 goto retry; 540 return (NULL); 541} 542 543/* 544 * vm_page_rename: 545 * 546 * Move the given memory entry from its 547 * current object to the specified target object/offset. 548 * 549 * The object must be locked. 550 */ 551void 552vm_page_rename(m, new_object, new_pindex) 553 register vm_page_t m; 554 register vm_object_t new_object; 555 vm_pindex_t new_pindex; 556{ 557 int s; 558 559 s = splvm(); 560 vm_page_remove(m); 561 vm_page_insert(m, new_object, new_pindex); 562 splx(s); 563} 564 565/* 566 * vm_page_unqueue without any wakeup 567 */ 568void 569vm_page_unqueue_nowakeup(m) 570 vm_page_t m; 571{ 572 int queue = m->queue; 573 struct vpgqueues *pq; 574 if (queue != PQ_NONE) { 575 pq = &vm_page_queues[queue]; 576 m->queue = PQ_NONE; 577 TAILQ_REMOVE(pq->pl, m, pageq); 578 (*pq->cnt)--; 579 (*pq->lcnt)--; 580 if ((queue - m->pc) == PQ_CACHE) { 581 if (m->object) 582 m->object->cache_count--; 583 } 584 } 585} 586 587/* 588 * vm_page_unqueue must be called at splhigh(); 589 */ 590void 591vm_page_unqueue(m) 592 vm_page_t m; 593{ 594 int queue = m->queue; 595 struct vpgqueues *pq; 596 if (queue != PQ_NONE) { 597 m->queue = PQ_NONE; 598 pq = &vm_page_queues[queue]; 599 TAILQ_REMOVE(pq->pl, m, pageq); 600 (*pq->cnt)--; 601 (*pq->lcnt)--; 602 if ((queue - m->pc) == PQ_CACHE) { 603 if ((cnt.v_cache_count + cnt.v_free_count) < 604 (cnt.v_free_reserved + cnt.v_cache_min)) 605 pagedaemon_wakeup(); 606 if (m->object) 607 m->object->cache_count--; 608 } 609 } 610} 611 612/* 613 * Find a page on the specified queue with color optimization. 614 */ 615vm_page_t 616vm_page_list_find(basequeue, index) 617 int basequeue, index; 618{ 619#if PQ_L2_SIZE > 1 620 621 int i,j; 622 vm_page_t m; 623 int hindex; 624 struct vpgqueues *pq; 625 626 pq = &vm_page_queues[basequeue]; 627 628 m = TAILQ_FIRST(pq[index].pl); 629 if (m) 630 return m; 631 632 for(j = 0; j < PQ_L1_SIZE; j++) { 633 int ij; 634 for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE; 635 (ij = i + j) > 0; 636 i -= PQ_L1_SIZE) { 637 638 hindex = index + ij; 639 if (hindex >= PQ_L2_SIZE) 640 hindex -= PQ_L2_SIZE; 641 if (m = TAILQ_FIRST(pq[hindex].pl)) 642 return m; 643 644 hindex = index - ij; 645 if (hindex < 0) 646 hindex += PQ_L2_SIZE; 647 if (m = TAILQ_FIRST(pq[hindex].pl)) 648 return m; 649 } 650 } 651 652 hindex = index + PQ_L2_SIZE / 2; 653 if (hindex >= PQ_L2_SIZE) 654 hindex -= PQ_L2_SIZE; 655 m = TAILQ_FIRST(pq[hindex].pl); 656 if (m) 657 return m; 658 659 return NULL; 660#else 661 return TAILQ_FIRST(vm_page_queues[basequeue].pl); 662#endif 663 664} 665 666/* 667 * Find a page on the specified queue with color optimization. 668 */ 669vm_page_t 670vm_page_select(object, pindex, basequeue) 671 vm_object_t object; 672 vm_pindex_t pindex; 673 int basequeue; 674{ 675 676#if PQ_L2_SIZE > 1 677 int index; 678 index = (pindex + object->pg_color) & PQ_L2_MASK; 679 return vm_page_list_find(basequeue, index); 680 681#else 682 return TAILQ_FIRST(vm_page_queues[basequeue].pl); 683#endif 684 685} 686 687/* 688 * Find a free or zero page, with specified preference. 689 */ 690static vm_page_t 691vm_page_select_free(object, pindex, prefqueue) 692 vm_object_t object; 693 vm_pindex_t pindex; 694 int prefqueue; 695{ 696#if PQ_L2_SIZE > 1 697 int i,j; 698 int index, hindex; 699#endif 700 vm_page_t m, mh; 701 int oqueuediff; 702 struct vpgqueues *pq; 703 704 if (prefqueue == PQ_ZERO) 705 oqueuediff = PQ_FREE - PQ_ZERO; 706 else 707 oqueuediff = PQ_ZERO - PQ_FREE; 708 709 if (mh = object->page_hint) { 710 if (mh->pindex == (pindex - 1)) { 711 if ((mh->flags & PG_FICTITIOUS) == 0) { 712 if ((mh < &vm_page_array[cnt.v_page_count-1]) && 713 (mh >= &vm_page_array[0])) { 714 int queue; 715 m = mh + 1; 716 if (VM_PAGE_TO_PHYS(m) == (VM_PAGE_TO_PHYS(mh) + PAGE_SIZE)) { 717 queue = m->queue - m->pc; 718 if (queue == PQ_FREE || queue == PQ_ZERO) { 719 return m; 720 } 721 } 722 } 723 } 724 } 725 } 726 727 pq = &vm_page_queues[prefqueue]; 728 729#if PQ_L2_SIZE > 1 730 731 index = (pindex + object->pg_color) & PQ_L2_MASK; 732 733 if (m = TAILQ_FIRST(pq[index].pl)) 734 return m; 735 if (m = TAILQ_FIRST(pq[index + oqueuediff].pl)) 736 return m; 737 738 for(j = 0; j < PQ_L1_SIZE; j++) { 739 int ij; 740 for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE; 741 (ij = i + j) >= 0; 742 i -= PQ_L1_SIZE) { 743 744 hindex = index + ij; 745 if (hindex >= PQ_L2_SIZE) 746 hindex -= PQ_L2_SIZE; 747 if (m = TAILQ_FIRST(pq[hindex].pl)) 748 return m; 749 if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl)) 750 return m; 751 752 hindex = index - ij; 753 if (hindex < 0) 754 hindex += PQ_L2_SIZE; 755 if (m = TAILQ_FIRST(pq[hindex].pl)) 756 return m; 757 if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl)) 758 return m; 759 } 760 } 761 762 hindex = index + PQ_L2_SIZE / 2; 763 if (hindex >= PQ_L2_SIZE) 764 hindex -= PQ_L2_SIZE; 765 if (m = TAILQ_FIRST(pq[hindex].pl)) 766 return m; 767 if (m = TAILQ_FIRST(pq[hindex+oqueuediff].pl)) 768 return m; 769 770#else 771 if (m = TAILQ_FIRST(pq[0].pl)) 772 return m; 773 else 774 return TAILQ_FIRST(pq[oqueuediff].pl); 775#endif 776 777 return NULL; 778} 779 780/* 781 * vm_page_alloc: 782 * 783 * Allocate and return a memory cell associated 784 * with this VM object/offset pair. 785 * 786 * page_req classes: 787 * VM_ALLOC_NORMAL normal process request 788 * VM_ALLOC_SYSTEM system *really* needs a page 789 * VM_ALLOC_INTERRUPT interrupt time request 790 * VM_ALLOC_ZERO zero page 791 * 792 * Object must be locked. 793 */ 794vm_page_t 795vm_page_alloc(object, pindex, page_req) 796 vm_object_t object; 797 vm_pindex_t pindex; 798 int page_req; 799{ 800 register vm_page_t m; 801 struct vpgqueues *pq; 802 vm_object_t oldobject; 803 int queue, qtype; 804 int s; 805 806#ifdef DIAGNOSTIC 807 m = vm_page_lookup(object, pindex); 808 if (m) 809 panic("vm_page_alloc: page already allocated"); 810#endif 811 812 if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) { 813 page_req = VM_ALLOC_SYSTEM; 814 }; 815 816 s = splvm(); 817 818 switch (page_req) { 819 820 case VM_ALLOC_NORMAL: 821 if (cnt.v_free_count >= cnt.v_free_reserved) { 822 m = vm_page_select_free(object, pindex, PQ_FREE); 823#if defined(DIAGNOSTIC) 824 if (m == NULL) 825 panic("vm_page_alloc(NORMAL): missing page on free queue\n"); 826#endif 827 } else { 828 m = vm_page_select(object, pindex, PQ_CACHE); 829 if (m == NULL) { 830 splx(s); 831#if defined(DIAGNOSTIC) 832 if (cnt.v_cache_count > 0) 833 printf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", cnt.v_cache_count); 834#endif 835 vm_pageout_deficit++; 836 pagedaemon_wakeup(); 837 return (NULL); 838 } 839 } 840 break; 841 842 case VM_ALLOC_ZERO: 843 if (cnt.v_free_count >= cnt.v_free_reserved) { 844 m = vm_page_select_free(object, pindex, PQ_ZERO); 845#if defined(DIAGNOSTIC) 846 if (m == NULL) 847 panic("vm_page_alloc(ZERO): missing page on free queue\n"); 848#endif 849 } else { 850 m = vm_page_select(object, pindex, PQ_CACHE); 851 if (m == NULL) { 852 splx(s); 853#if defined(DIAGNOSTIC) 854 if (cnt.v_cache_count > 0) 855 printf("vm_page_alloc(ZERO): missing pages on cache queue: %d\n", cnt.v_cache_count); 856#endif 857 vm_pageout_deficit++; 858 pagedaemon_wakeup(); 859 return (NULL); 860 } 861 } 862 break; 863 864 case VM_ALLOC_SYSTEM: 865 if ((cnt.v_free_count >= cnt.v_free_reserved) || 866 ((cnt.v_cache_count == 0) && 867 (cnt.v_free_count >= cnt.v_interrupt_free_min))) { 868 m = vm_page_select_free(object, pindex, PQ_FREE); 869#if defined(DIAGNOSTIC) 870 if (m == NULL) 871 panic("vm_page_alloc(SYSTEM): missing page on free queue\n"); 872#endif 873 } else { 874 m = vm_page_select(object, pindex, PQ_CACHE); 875 if (m == NULL) { 876 splx(s); 877#if defined(DIAGNOSTIC) 878 if (cnt.v_cache_count > 0) 879 printf("vm_page_alloc(SYSTEM): missing pages on cache queue: %d\n", cnt.v_cache_count); 880#endif 881 vm_pageout_deficit++; 882 pagedaemon_wakeup(); 883 return (NULL); 884 } 885 } 886 break; 887 888 case VM_ALLOC_INTERRUPT: 889 if (cnt.v_free_count > 0) { 890 m = vm_page_select_free(object, pindex, PQ_FREE); 891#if defined(DIAGNOSTIC) 892 if (m == NULL) 893 panic("vm_page_alloc(INTERRUPT): missing page on free queue\n"); 894#endif 895 } else { 896 splx(s); 897 vm_pageout_deficit++; 898 pagedaemon_wakeup(); 899 return (NULL); 900 } 901 break; 902 903 default: 904 m = NULL; 905#if !defined(MAX_PERF) 906 panic("vm_page_alloc: invalid allocation class"); 907#endif 908 } 909 910 queue = m->queue; 911 qtype = queue - m->pc; 912 if (qtype == PQ_ZERO) 913 vm_page_zero_count--; 914 pq = &vm_page_queues[queue]; 915 TAILQ_REMOVE(pq->pl, m, pageq); 916 (*pq->cnt)--; 917 (*pq->lcnt)--; 918 oldobject = NULL; 919 if (qtype == PQ_ZERO) { 920 m->flags = PG_ZERO | PG_BUSY; 921 } else if (qtype == PQ_CACHE) { 922 oldobject = m->object; 923 m->flags |= PG_BUSY; 924 vm_page_remove(m); 925 m->flags = PG_BUSY; 926 } else { 927 m->flags = PG_BUSY; 928 } 929 m->wire_count = 0; 930 m->hold_count = 0; 931 m->act_count = 0; 932 m->busy = 0; 933 m->valid = 0; 934 m->dirty = 0; 935 m->queue = PQ_NONE; 936 937 /* XXX before splx until vm_page_insert is safe */ 938 vm_page_insert(m, object, pindex); 939 940 /* 941 * Don't wakeup too often - wakeup the pageout daemon when 942 * we would be nearly out of memory. 943 */ 944 if (((cnt.v_free_count + cnt.v_cache_count) < 945 (cnt.v_free_reserved + cnt.v_cache_min)) || 946 (cnt.v_free_count < cnt.v_pageout_free_min)) 947 pagedaemon_wakeup(); 948 949 if ((qtype == PQ_CACHE) && 950 ((page_req == VM_ALLOC_NORMAL) || (page_req == VM_ALLOC_ZERO)) && 951 oldobject && (oldobject->type == OBJT_VNODE) && 952 ((oldobject->flags & OBJ_DEAD) == 0)) { 953 struct vnode *vp; 954 vp = (struct vnode *) oldobject->handle; 955 if (vp && VSHOULDFREE(vp)) { 956 if ((vp->v_flag & (VFREE|VTBFREE|VDOOMED)) == 0) { 957 TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist); 958 vp->v_flag |= VTBFREE; 959 } 960 } 961 } 962 splx(s); 963 964 return (m); 965} 966 967void 968vm_wait() 969{ 970 int s; 971 972 s = splvm(); 973 if (curproc == pageproc) { 974 vm_pageout_pages_needed = 1; 975 tsleep(&vm_pageout_pages_needed, PSWP, "vmwait", 0); 976 } else { 977 if (!vm_pages_needed) { 978 vm_pages_needed++; 979 wakeup(&vm_pages_needed); 980 } 981 tsleep(&cnt.v_free_count, PVM, "vmwait", 0); 982 } 983 splx(s); 984} 985 986int 987vm_page_sleep(vm_page_t m, char *msg, char *busy) { 988 vm_object_t object = m->object; 989 int generation = object->generation; 990 if ((busy && *busy) || (m->flags & PG_BUSY)) { 991 int s; 992 s = splvm(); 993 if ((busy && *busy) || (m->flags & PG_BUSY)) { 994 m->flags |= PG_WANTED; 995 tsleep(m, PVM, msg, 800); 996 } 997 splx(s); 998 } 999 return ((generation != object->generation) || (busy && *busy) || 1000 (m->flags & PG_BUSY)); 1001} 1002 1003/* 1004 * vm_page_activate: 1005 * 1006 * Put the specified page on the active list (if appropriate). 1007 * 1008 * The page queues must be locked. 1009 */ 1010void 1011vm_page_activate(m) 1012 register vm_page_t m; 1013{ 1014 int s; 1015 vm_page_t np; 1016 vm_object_t object; 1017 1018 s = splvm(); 1019 if (m->queue != PQ_ACTIVE) { 1020 if ((m->queue - m->pc) == PQ_CACHE) 1021 cnt.v_reactivated++; 1022 1023 vm_page_unqueue(m); 1024 1025 if (m->wire_count == 0) { 1026 m->queue = PQ_ACTIVE; 1027 ++(*vm_page_queues[PQ_ACTIVE].lcnt); 1028 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 1029 if (m->act_count < ACT_INIT) 1030 m->act_count = ACT_INIT; 1031 cnt.v_active_count++; 1032 } 1033 } else { 1034 if (m->act_count < ACT_INIT) 1035 m->act_count = ACT_INIT; 1036 } 1037 1038 object = m->object; 1039 TAILQ_REMOVE(&object->memq, m, listq); 1040 TAILQ_INSERT_TAIL(&object->memq, m, listq); 1041 object->generation++; 1042 1043 splx(s); 1044} 1045 1046/* 1047 * helper routine for vm_page_free and vm_page_free_zero 1048 */ 1049static int 1050vm_page_freechk_and_unqueue(m) 1051 vm_page_t m; 1052{ 1053 vm_object_t oldobject; 1054 1055 oldobject = m->object; 1056 1057#if !defined(MAX_PERF) 1058 if (m->busy || ((m->queue - m->pc) == PQ_FREE) || 1059 (m->hold_count != 0)) { 1060 printf("vm_page_free: pindex(%ld), busy(%d), PG_BUSY(%d), hold(%d)\n", 1061 m->pindex, m->busy, 1062 (m->flags & PG_BUSY) ? 1 : 0, m->hold_count); 1063 if ((m->queue - m->pc) == PQ_FREE) 1064 panic("vm_page_free: freeing free page"); 1065 else 1066 panic("vm_page_free: freeing busy page"); 1067 } 1068#endif 1069 1070 vm_page_unqueue_nowakeup(m); 1071 vm_page_remove(m); 1072 1073 if ((m->flags & PG_FICTITIOUS) != 0) { 1074 return 0; 1075 } 1076 1077 m->valid = 0; 1078 1079 if (m->wire_count != 0) { 1080#if !defined(MAX_PERF) 1081 if (m->wire_count > 1) { 1082 panic("vm_page_free: invalid wire count (%d), pindex: 0x%x", 1083 m->wire_count, m->pindex); 1084 } 1085#endif 1086 m->wire_count = 0; 1087 if (m->object) 1088 m->object->wire_count--; 1089 cnt.v_wire_count--; 1090 } 1091 1092 if (oldobject && (oldobject->type == OBJT_VNODE) && 1093 ((oldobject->flags & OBJ_DEAD) == 0)) { 1094 struct vnode *vp; 1095 vp = (struct vnode *) oldobject->handle; 1096 if (vp && VSHOULDFREE(vp)) { 1097 if ((vp->v_flag & (VTBFREE|VDOOMED|VFREE)) == 0) { 1098 TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist); 1099 vp->v_flag |= VTBFREE; 1100 } 1101 } 1102 } 1103 1104 return 1; 1105} 1106 1107/* 1108 * helper routine for vm_page_free and vm_page_free_zero 1109 */ 1110static __inline void 1111vm_page_free_wakeup() 1112{ 1113 1114/* 1115 * if pageout daemon needs pages, then tell it that there are 1116 * some free. 1117 */ 1118 if (vm_pageout_pages_needed) { 1119 wakeup(&vm_pageout_pages_needed); 1120 vm_pageout_pages_needed = 0; 1121 } 1122 /* 1123 * wakeup processes that are waiting on memory if we hit a 1124 * high water mark. And wakeup scheduler process if we have 1125 * lots of memory. this process will swapin processes. 1126 */ 1127 if (vm_pages_needed && 1128 ((cnt.v_free_count + cnt.v_cache_count) >= cnt.v_free_min)) { 1129 wakeup(&cnt.v_free_count); 1130 vm_pages_needed = 0; 1131 } 1132} 1133 1134/* 1135 * vm_page_free: 1136 * 1137 * Returns the given page to the free list, 1138 * disassociating it with any VM object. 1139 * 1140 * Object and page must be locked prior to entry. 1141 */ 1142void 1143vm_page_free(m) 1144 register vm_page_t m; 1145{ 1146 int s; 1147 struct vpgqueues *pq; 1148 1149 s = splvm(); 1150 1151 cnt.v_tfree++; 1152 1153 if (!vm_page_freechk_and_unqueue(m)) { 1154 splx(s); 1155 return; 1156 } 1157 1158 m->queue = PQ_FREE + m->pc; 1159 pq = &vm_page_queues[m->queue]; 1160 ++(*pq->lcnt); 1161 ++(*pq->cnt); 1162 /* 1163 * If the pageout process is grabbing the page, it is likely 1164 * that the page is NOT in the cache. It is more likely that 1165 * the page will be partially in the cache if it is being 1166 * explicitly freed. 1167 */ 1168 if (curproc == pageproc) { 1169 TAILQ_INSERT_TAIL(pq->pl, m, pageq); 1170 } else { 1171 TAILQ_INSERT_HEAD(pq->pl, m, pageq); 1172 } 1173 1174 vm_page_free_wakeup(); 1175 splx(s); 1176} 1177 1178void 1179vm_page_free_zero(m) 1180 register vm_page_t m; 1181{ 1182 int s; 1183 struct vpgqueues *pq; 1184 1185 s = splvm(); 1186 1187 cnt.v_tfree++; 1188 1189 if (!vm_page_freechk_and_unqueue(m)) { 1190 splx(s); 1191 return; 1192 } 1193 1194 m->queue = PQ_ZERO + m->pc; 1195 pq = &vm_page_queues[m->queue]; 1196 ++(*pq->lcnt); 1197 ++(*pq->cnt); 1198 1199 TAILQ_INSERT_HEAD(pq->pl, m, pageq); 1200 ++vm_page_zero_count; 1201 vm_page_free_wakeup(); 1202 splx(s); 1203} 1204 1205/* 1206 * vm_page_wire: 1207 * 1208 * Mark this page as wired down by yet 1209 * another map, removing it from paging queues 1210 * as necessary. 1211 * 1212 * The page queues must be locked. 1213 */ 1214void 1215vm_page_wire(m) 1216 register vm_page_t m; 1217{ 1218 int s; 1219 1220 if (m->wire_count == 0) { 1221 s = splvm(); 1222 vm_page_unqueue(m); 1223 splx(s); 1224 cnt.v_wire_count++; 1225 if (m->object) 1226 m->object->wire_count++; 1227 } 1228 (*vm_page_queues[PQ_NONE].lcnt)++; 1229 m->wire_count++; 1230 m->flags |= PG_MAPPED; 1231} 1232 1233/* 1234 * vm_page_unwire: 1235 * 1236 * Release one wiring of this page, potentially 1237 * enabling it to be paged again. 1238 * 1239 * The page queues must be locked. 1240 */ 1241void 1242vm_page_unwire(m) 1243 register vm_page_t m; 1244{ 1245 int s; 1246 1247 s = splvm(); 1248 1249 if (m->wire_count > 0) { 1250 m->wire_count--; 1251 if (m->wire_count == 0) { 1252 if (m->object) 1253 m->object->wire_count--; 1254 cnt.v_wire_count--; 1255 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 1256 m->queue = PQ_ACTIVE; 1257 (*vm_page_queues[PQ_ACTIVE].lcnt)++; 1258 cnt.v_active_count++; 1259 } 1260 } else { 1261#if !defined(MAX_PERF) 1262 panic("vm_page_unwire: invalid wire count: %d\n", m->wire_count); 1263#endif 1264 } 1265 splx(s); 1266} 1267 1268 1269/* 1270 * vm_page_deactivate: 1271 * 1272 * Returns the given page to the inactive list, 1273 * indicating that no physical maps have access 1274 * to this page. [Used by the physical mapping system.] 1275 * 1276 * The page queues must be locked. 1277 */ 1278void 1279vm_page_deactivate(m) 1280 register vm_page_t m; 1281{ 1282 int s; 1283 1284 /* 1285 * Only move active pages -- ignore locked or already inactive ones. 1286 * 1287 * XXX: sometimes we get pages which aren't wired down or on any queue - 1288 * we need to put them on the inactive queue also, otherwise we lose 1289 * track of them. Paul Mackerras (paulus@cs.anu.edu.au) 9-Jan-93. 1290 */ 1291 if (m->queue == PQ_INACTIVE) 1292 return; 1293 1294 s = splvm(); 1295 if (m->wire_count == 0 && m->hold_count == 0) { 1296 if ((m->queue - m->pc) == PQ_CACHE) 1297 cnt.v_reactivated++; 1298 vm_page_unqueue(m); 1299 TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); 1300 m->queue = PQ_INACTIVE; 1301 ++(*vm_page_queues[PQ_INACTIVE].lcnt); 1302 cnt.v_inactive_count++; 1303 } 1304 splx(s); 1305} 1306 1307/* 1308 * vm_page_cache 1309 * 1310 * Put the specified page onto the page cache queue (if appropriate). 1311 */ 1312void 1313vm_page_cache(m) 1314 register vm_page_t m; 1315{ 1316 int s; 1317 1318#if !defined(MAX_PERF) 1319 if ((m->flags & PG_BUSY) || m->busy || m->wire_count) { 1320 printf("vm_page_cache: attempting to cache busy page\n"); 1321 return; 1322 } 1323#endif 1324 if ((m->queue - m->pc) == PQ_CACHE) 1325 return; 1326 1327 vm_page_protect(m, VM_PROT_NONE); 1328#if !defined(MAX_PERF) 1329 if (m->dirty != 0) { 1330 panic("vm_page_cache: caching a dirty page, pindex: %d", m->pindex); 1331 } 1332#endif 1333 s = splvm(); 1334 vm_page_unqueue_nowakeup(m); 1335 m->queue = PQ_CACHE + m->pc; 1336 (*vm_page_queues[m->queue].lcnt)++; 1337 TAILQ_INSERT_TAIL(vm_page_queues[m->queue].pl, m, pageq); 1338 cnt.v_cache_count++; 1339 m->object->cache_count++; 1340 vm_page_free_wakeup(); 1341 splx(s); 1342} 1343 1344/* 1345 * Grab a page, waiting until we are waken up due to the page 1346 * changing state. We keep on waiting, if the page continues 1347 * to be in the object. If the page doesn't exist, allocate it. 1348 */ 1349vm_page_t 1350vm_page_grab(object, pindex, allocflags) 1351 vm_object_t object; 1352 vm_pindex_t pindex; 1353 int allocflags; 1354{ 1355 1356 vm_page_t m; 1357 int s, generation; 1358 1359retrylookup: 1360 if ((m = vm_page_lookup(object, pindex)) != NULL) { 1361 if (m->busy || (m->flags & PG_BUSY)) { 1362 generation = object->generation; 1363 1364 s = splvm(); 1365 while ((object->generation == generation) && 1366 (m->busy || (m->flags & PG_BUSY))) { 1367 m->flags |= PG_WANTED | PG_REFERENCED; 1368 tsleep(m, PVM, "pgrbwt", 0); 1369 if ((allocflags & VM_ALLOC_RETRY) == 0) { 1370 splx(s); 1371 return NULL; 1372 } 1373 } 1374 splx(s); 1375 goto retrylookup; 1376 } else { 1377 m->flags |= PG_BUSY; 1378 return m; 1379 } 1380 } 1381 1382 m = vm_page_alloc(object, pindex, allocflags & ~VM_ALLOC_RETRY); 1383 if (m == NULL) { 1384 VM_WAIT; 1385 if ((allocflags & VM_ALLOC_RETRY) == 0) 1386 return NULL; 1387 goto retrylookup; 1388 } 1389 1390 return m; 1391} 1392 1393/* 1394 * mapping function for valid bits or for dirty bits in 1395 * a page 1396 */ 1397inline int 1398vm_page_bits(int base, int size) 1399{ 1400 u_short chunk; 1401 1402 if ((base == 0) && (size >= PAGE_SIZE)) 1403 return VM_PAGE_BITS_ALL; 1404 size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 1405 base = (base % PAGE_SIZE) / DEV_BSIZE; 1406 chunk = vm_page_dev_bsize_chunks[size / DEV_BSIZE]; 1407 return (chunk << base) & VM_PAGE_BITS_ALL; 1408} 1409 1410/* 1411 * set a page valid and clean 1412 */ 1413void 1414vm_page_set_validclean(m, base, size) 1415 vm_page_t m; 1416 int base; 1417 int size; 1418{ 1419 int pagebits = vm_page_bits(base, size); 1420 m->valid |= pagebits; 1421 m->dirty &= ~pagebits; 1422 if( base == 0 && size == PAGE_SIZE) 1423 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 1424} 1425 1426/* 1427 * set a page (partially) invalid 1428 */ 1429void 1430vm_page_set_invalid(m, base, size) 1431 vm_page_t m; 1432 int base; 1433 int size; 1434{ 1435 int bits; 1436 1437 m->valid &= ~(bits = vm_page_bits(base, size)); 1438 if (m->valid == 0) 1439 m->dirty &= ~bits; 1440} 1441 1442/* 1443 * is (partial) page valid? 1444 */ 1445int 1446vm_page_is_valid(m, base, size) 1447 vm_page_t m; 1448 int base; 1449 int size; 1450{ 1451 int bits = vm_page_bits(base, size); 1452 1453 if (m->valid && ((m->valid & bits) == bits)) 1454 return 1; 1455 else 1456 return 0; 1457} 1458 1459void 1460vm_page_test_dirty(m) 1461 vm_page_t m; 1462{ 1463 if ((m->dirty != VM_PAGE_BITS_ALL) && 1464 pmap_is_modified(VM_PAGE_TO_PHYS(m))) { 1465 m->dirty = VM_PAGE_BITS_ALL; 1466 } 1467} 1468 1469/* 1470 * This interface is for merging with malloc() someday. 1471 * Even if we never implement compaction so that contiguous allocation 1472 * works after initialization time, malloc()'s data structures are good 1473 * for statistics and for allocations of less than a page. 1474 */ 1475void * 1476contigmalloc1(size, type, flags, low, high, alignment, boundary, map) 1477 unsigned long size; /* should be size_t here and for malloc() */ 1478 struct malloc_type *type; 1479 int flags; 1480 unsigned long low; 1481 unsigned long high; 1482 unsigned long alignment; 1483 unsigned long boundary; 1484 vm_map_t map; 1485{ 1486 int i, s, start; 1487 vm_offset_t addr, phys, tmp_addr; 1488 int pass; 1489 vm_page_t pga = vm_page_array; 1490 1491 size = round_page(size); 1492#if !defined(MAX_PERF) 1493 if (size == 0) 1494 panic("contigmalloc1: size must not be 0"); 1495 if ((alignment & (alignment - 1)) != 0) 1496 panic("contigmalloc1: alignment must be a power of 2"); 1497 if ((boundary & (boundary - 1)) != 0) 1498 panic("contigmalloc1: boundary must be a power of 2"); 1499#endif 1500 1501 start = 0; 1502 for (pass = 0; pass <= 1; pass++) { 1503 s = splvm(); 1504again: 1505 /* 1506 * Find first page in array that is free, within range, aligned, and 1507 * such that the boundary won't be crossed. 1508 */ 1509 for (i = start; i < cnt.v_page_count; i++) { 1510 int pqtype; 1511 phys = VM_PAGE_TO_PHYS(&pga[i]); 1512 pqtype = pga[i].queue - pga[i].pc; 1513 if (((pqtype == PQ_ZERO) || (pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) && 1514 (phys >= low) && (phys < high) && 1515 ((phys & (alignment - 1)) == 0) && 1516 (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0)) 1517 break; 1518 } 1519 1520 /* 1521 * If the above failed or we will exceed the upper bound, fail. 1522 */ 1523 if ((i == cnt.v_page_count) || 1524 ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { 1525 vm_page_t m, next; 1526 1527again1: 1528 for (m = TAILQ_FIRST(&vm_page_queue_inactive); 1529 m != NULL; 1530 m = next) { 1531 1532 if (m->queue != PQ_INACTIVE) { 1533 break; 1534 } 1535 1536 next = TAILQ_NEXT(m, pageq); 1537 if (vm_page_sleep(m, "vpctw0", &m->busy)) 1538 goto again1; 1539 vm_page_test_dirty(m); 1540 if (m->dirty) { 1541 if (m->object->type == OBJT_VNODE) { 1542 vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc); 1543 vm_object_page_clean(m->object, 0, 0, TRUE); 1544 VOP_UNLOCK(m->object->handle, 0, curproc); 1545 goto again1; 1546 } else if (m->object->type == OBJT_SWAP || 1547 m->object->type == OBJT_DEFAULT) { 1548 m->flags |= PG_BUSY; 1549 vm_page_protect(m, VM_PROT_NONE); 1550 vm_pageout_flush(&m, 1, 0); 1551 goto again1; 1552 } 1553 } 1554 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) 1555 vm_page_cache(m); 1556 } 1557 1558 for (m = TAILQ_FIRST(&vm_page_queue_active); 1559 m != NULL; 1560 m = next) { 1561 1562 if (m->queue != PQ_ACTIVE) { 1563 break; 1564 } 1565 1566 next = TAILQ_NEXT(m, pageq); 1567 if (vm_page_sleep(m, "vpctw1", &m->busy)) 1568 goto again1; 1569 vm_page_test_dirty(m); 1570 if (m->dirty) { 1571 if (m->object->type == OBJT_VNODE) { 1572 vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc); 1573 vm_object_page_clean(m->object, 0, 0, TRUE); 1574 VOP_UNLOCK(m->object->handle, 0, curproc); 1575 goto again1; 1576 } else if (m->object->type == OBJT_SWAP || 1577 m->object->type == OBJT_DEFAULT) { 1578 m->flags |= PG_BUSY; 1579 vm_page_protect(m, VM_PROT_NONE); 1580 vm_pageout_flush(&m, 1, 0); 1581 goto again1; 1582 } 1583 } 1584 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) 1585 vm_page_cache(m); 1586 } 1587 1588 splx(s); 1589 continue; 1590 } 1591 start = i; 1592 1593 /* 1594 * Check successive pages for contiguous and free. 1595 */ 1596 for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { 1597 int pqtype; 1598 pqtype = pga[i].queue - pga[i].pc; 1599 if ((VM_PAGE_TO_PHYS(&pga[i]) != 1600 (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) || 1601 ((pqtype != PQ_ZERO) && (pqtype != PQ_FREE) && (pqtype != PQ_CACHE))) { 1602 start++; 1603 goto again; 1604 } 1605 } 1606 1607 for (i = start; i < (start + size / PAGE_SIZE); i++) { 1608 int pqtype; 1609 vm_page_t m = &pga[i]; 1610 1611 pqtype = m->queue - m->pc; 1612 if (pqtype == PQ_CACHE) { 1613 m->flags |= PG_BUSY; 1614 vm_page_free(m); 1615 } 1616 1617 TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq); 1618 (*vm_page_queues[m->queue].lcnt)--; 1619 cnt.v_free_count--; 1620 m->valid = VM_PAGE_BITS_ALL; 1621 m->flags = 0; 1622 m->dirty = 0; 1623 m->wire_count = 0; 1624 m->busy = 0; 1625 m->queue = PQ_NONE; 1626 m->object = NULL; 1627 vm_page_wire(m); 1628 } 1629 1630 /* 1631 * We've found a contiguous chunk that meets are requirements. 1632 * Allocate kernel VM, unfree and assign the physical pages to it and 1633 * return kernel VM pointer. 1634 */ 1635 tmp_addr = addr = kmem_alloc_pageable(map, size); 1636 if (addr == 0) { 1637 /* 1638 * XXX We almost never run out of kernel virtual 1639 * space, so we don't make the allocated memory 1640 * above available. 1641 */ 1642 splx(s); 1643 return (NULL); 1644 } 1645 1646 for (i = start; i < (start + size / PAGE_SIZE); i++) { 1647 vm_page_t m = &pga[i]; 1648 vm_page_insert(m, kernel_object, 1649 OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS)); 1650 pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m)); 1651 tmp_addr += PAGE_SIZE; 1652 } 1653 1654 splx(s); 1655 return ((void *)addr); 1656 } 1657 return NULL; 1658} 1659 1660void * 1661contigmalloc(size, type, flags, low, high, alignment, boundary) 1662 unsigned long size; /* should be size_t here and for malloc() */ 1663 struct malloc_type *type; 1664 int flags; 1665 unsigned long low; 1666 unsigned long high; 1667 unsigned long alignment; 1668 unsigned long boundary; 1669{ 1670 return contigmalloc1(size, type, flags, low, high, alignment, boundary, 1671 kernel_map); 1672} 1673 1674vm_offset_t 1675vm_page_alloc_contig(size, low, high, alignment) 1676 vm_offset_t size; 1677 vm_offset_t low; 1678 vm_offset_t high; 1679 vm_offset_t alignment; 1680{ 1681 return ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high, 1682 alignment, 0ul, kernel_map)); 1683} 1684 1685#include "opt_ddb.h" 1686#ifdef DDB 1687#include <sys/kernel.h> 1688 1689#include <ddb/ddb.h> 1690 1691DB_SHOW_COMMAND(page, vm_page_print_page_info) 1692{ 1693 db_printf("cnt.v_free_count: %d\n", cnt.v_free_count); 1694 db_printf("cnt.v_cache_count: %d\n", cnt.v_cache_count); 1695 db_printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count); 1696 db_printf("cnt.v_active_count: %d\n", cnt.v_active_count); 1697 db_printf("cnt.v_wire_count: %d\n", cnt.v_wire_count); 1698 db_printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved); 1699 db_printf("cnt.v_free_min: %d\n", cnt.v_free_min); 1700 db_printf("cnt.v_free_target: %d\n", cnt.v_free_target); 1701 db_printf("cnt.v_cache_min: %d\n", cnt.v_cache_min); 1702 db_printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target); 1703} 1704 1705DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) 1706{ 1707 int i; 1708 db_printf("PQ_FREE:"); 1709 for(i=0;i<PQ_L2_SIZE;i++) { 1710 db_printf(" %d", *vm_page_queues[PQ_FREE + i].lcnt); 1711 } 1712 db_printf("\n"); 1713 1714 db_printf("PQ_CACHE:"); 1715 for(i=0;i<PQ_L2_SIZE;i++) { 1716 db_printf(" %d", *vm_page_queues[PQ_CACHE + i].lcnt); 1717 } 1718 db_printf("\n"); 1719 1720 db_printf("PQ_ZERO:"); 1721 for(i=0;i<PQ_L2_SIZE;i++) { 1722 db_printf(" %d", *vm_page_queues[PQ_ZERO + i].lcnt); 1723 } 1724 db_printf("\n"); 1725 1726 db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n", 1727 *vm_page_queues[PQ_ACTIVE].lcnt, 1728 *vm_page_queues[PQ_INACTIVE].lcnt); 1729} 1730#endif /* DDB */ 1731