vm_page.c revision 43121
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 37 * $Id: vm_page.c,v 1.118 1999/01/21 10:01:49 dillon Exp $ 38 */ 39 40/* 41 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 42 * All rights reserved. 43 * 44 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 45 * 46 * Permission to use, copy, modify and distribute this software and 47 * its documentation is hereby granted, provided that both the copyright 48 * notice and this permission notice appear in all copies of the 49 * software, derivative works or modified versions, and any portions 50 * thereof, and that both notices appear in supporting documentation. 51 * 52 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 53 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 54 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 55 * 56 * Carnegie Mellon requests users of this software to return to 57 * 58 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 59 * School of Computer Science 60 * Carnegie Mellon University 61 * Pittsburgh PA 15213-3890 62 * 63 * any improvements or extensions that they make and grant Carnegie the 64 * rights to redistribute these changes. 65 */ 66 67/* 68 * Resident memory management module. 69 */ 70 71#include <sys/param.h> 72#include <sys/systm.h> 73#include <sys/malloc.h> 74#include <sys/proc.h> 75#include <sys/vmmeter.h> 76#include <sys/vnode.h> 77 78#include <vm/vm.h> 79#include <vm/vm_param.h> 80#include <vm/vm_prot.h> 81#include <sys/lock.h> 82#include <vm/vm_kern.h> 83#include <vm/vm_object.h> 84#include <vm/vm_page.h> 85#include <vm/vm_pageout.h> 86#include <vm/vm_pager.h> 87#include <vm/vm_extern.h> 88 89static void vm_page_queue_init __P((void)); 90static vm_page_t vm_page_select_free __P((vm_object_t object, 91 vm_pindex_t pindex, int prefqueue)); 92static vm_page_t vm_page_select_cache __P((vm_object_t, vm_pindex_t)); 93 94/* 95 * Associated with page of user-allocatable memory is a 96 * page structure. 97 */ 98 99static struct vm_page **vm_page_buckets; /* Array of buckets */ 100static int vm_page_bucket_count; /* How big is array? */ 101static int vm_page_hash_mask; /* Mask for hash function */ 102static volatile int vm_page_bucket_generation; 103 104struct pglist vm_page_queue_free[PQ_L2_SIZE] = {0}; 105struct pglist vm_page_queue_zero[PQ_L2_SIZE] = {0}; 106struct pglist vm_page_queue_active = {0}; 107struct pglist vm_page_queue_inactive = {0}; 108struct pglist vm_page_queue_cache[PQ_L2_SIZE] = {0}; 109 110static int no_queue=0; 111 112struct vpgqueues vm_page_queues[PQ_COUNT] = {0}; 113static int pqcnt[PQ_COUNT] = {0}; 114 115static void 116vm_page_queue_init(void) { 117 int i; 118 119 vm_page_queues[PQ_NONE].pl = NULL; 120 vm_page_queues[PQ_NONE].cnt = &no_queue; 121 for(i=0;i<PQ_L2_SIZE;i++) { 122 vm_page_queues[PQ_FREE+i].pl = &vm_page_queue_free[i]; 123 vm_page_queues[PQ_FREE+i].cnt = &cnt.v_free_count; 124 } 125 for(i=0;i<PQ_L2_SIZE;i++) { 126 vm_page_queues[PQ_ZERO+i].pl = &vm_page_queue_zero[i]; 127 vm_page_queues[PQ_ZERO+i].cnt = &cnt.v_free_count; 128 } 129 vm_page_queues[PQ_INACTIVE].pl = &vm_page_queue_inactive; 130 vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count; 131 132 vm_page_queues[PQ_ACTIVE].pl = &vm_page_queue_active; 133 vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count; 134 for(i=0;i<PQ_L2_SIZE;i++) { 135 vm_page_queues[PQ_CACHE+i].pl = &vm_page_queue_cache[i]; 136 vm_page_queues[PQ_CACHE+i].cnt = &cnt.v_cache_count; 137 } 138 for(i=0;i<PQ_COUNT;i++) { 139 if (vm_page_queues[i].pl) { 140 TAILQ_INIT(vm_page_queues[i].pl); 141 } else if (i != 0) { 142 panic("vm_page_queue_init: queue %d is null", i); 143 } 144 vm_page_queues[i].lcnt = &pqcnt[i]; 145 } 146} 147 148vm_page_t vm_page_array = 0; 149static int vm_page_array_size = 0; 150long first_page = 0; 151static long last_page; 152static vm_size_t page_mask; 153static int page_shift; 154int vm_page_zero_count = 0; 155 156/* 157 * map of contiguous valid DEV_BSIZE chunks in a page 158 * (this list is valid for page sizes upto 16*DEV_BSIZE) 159 */ 160static u_short vm_page_dev_bsize_chunks[] = { 161 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 162 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff 163}; 164 165static __inline int vm_page_hash __P((vm_object_t object, vm_pindex_t pindex)); 166static void vm_page_free_wakeup __P((void)); 167 168/* 169 * vm_set_page_size: 170 * 171 * Sets the page size, perhaps based upon the memory 172 * size. Must be called before any use of page-size 173 * dependent functions. 174 * 175 * Sets page_shift and page_mask from cnt.v_page_size. 176 */ 177void 178vm_set_page_size() 179{ 180 181 if (cnt.v_page_size == 0) 182 cnt.v_page_size = DEFAULT_PAGE_SIZE; 183 page_mask = cnt.v_page_size - 1; 184 if ((page_mask & cnt.v_page_size) != 0) 185 panic("vm_set_page_size: page size not a power of two"); 186 for (page_shift = 0;; page_shift++) 187 if ((1 << page_shift) == cnt.v_page_size) 188 break; 189} 190 191/* 192 * vm_page_startup: 193 * 194 * Initializes the resident memory module. 195 * 196 * Allocates memory for the page cells, and 197 * for the object/offset-to-page hash table headers. 198 * Each page cell is initialized and placed on the free list. 199 */ 200 201vm_offset_t 202vm_page_startup(starta, enda, vaddr) 203 register vm_offset_t starta; 204 vm_offset_t enda; 205 register vm_offset_t vaddr; 206{ 207 register vm_offset_t mapped; 208 register vm_page_t m; 209 register struct vm_page **bucket; 210 vm_size_t npages, page_range; 211 register vm_offset_t new_start; 212 int i; 213 vm_offset_t pa; 214 int nblocks; 215 vm_offset_t first_managed_page; 216 217 /* the biggest memory array is the second group of pages */ 218 vm_offset_t start; 219 vm_offset_t biggestone, biggestsize; 220 221 vm_offset_t total; 222 223 total = 0; 224 biggestsize = 0; 225 biggestone = 0; 226 nblocks = 0; 227 vaddr = round_page(vaddr); 228 229 for (i = 0; phys_avail[i + 1]; i += 2) { 230 phys_avail[i] = round_page(phys_avail[i]); 231 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 232 } 233 234 for (i = 0; phys_avail[i + 1]; i += 2) { 235 int size = phys_avail[i + 1] - phys_avail[i]; 236 237 if (size > biggestsize) { 238 biggestone = i; 239 biggestsize = size; 240 } 241 ++nblocks; 242 total += size; 243 } 244 245 start = phys_avail[biggestone]; 246 247 /* 248 * Initialize the queue headers for the free queue, the active queue 249 * and the inactive queue. 250 */ 251 252 vm_page_queue_init(); 253 254 /* 255 * Allocate (and initialize) the hash table buckets. 256 * 257 * The number of buckets MUST BE a power of 2, and the actual value is 258 * the next power of 2 greater than the number of physical pages in 259 * the system. 260 * 261 * We make the hash table approximately 2x the number of pages to 262 * reduce the chain length. This is about the same size using the 263 * singly-linked list as the 1x hash table we were using before 264 * using TAILQ but the chain length will be smaller. 265 * 266 * Note: This computation can be tweaked if desired. 267 */ 268 vm_page_buckets = (struct vm_page **)vaddr; 269 bucket = vm_page_buckets; 270 if (vm_page_bucket_count == 0) { 271 vm_page_bucket_count = 1; 272 while (vm_page_bucket_count < atop(total)) 273 vm_page_bucket_count <<= 1; 274 } 275 vm_page_bucket_count <<= 1; 276 vm_page_hash_mask = vm_page_bucket_count - 1; 277 278 /* 279 * Validate these addresses. 280 */ 281 282 new_start = start + vm_page_bucket_count * sizeof(struct vm_page *); 283 new_start = round_page(new_start); 284 mapped = round_page(vaddr); 285 vaddr = pmap_map(mapped, start, new_start, 286 VM_PROT_READ | VM_PROT_WRITE); 287 start = new_start; 288 vaddr = round_page(vaddr); 289 bzero((caddr_t) mapped, vaddr - mapped); 290 291 for (i = 0; i < vm_page_bucket_count; i++) { 292 *bucket = NULL; 293 bucket++; 294 } 295 296 /* 297 * Compute the number of pages of memory that will be available for 298 * use (taking into account the overhead of a page structure per 299 * page). 300 */ 301 302 first_page = phys_avail[0] / PAGE_SIZE; 303 last_page = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE; 304 305 page_range = last_page - (phys_avail[0] / PAGE_SIZE); 306 npages = (total - (page_range * sizeof(struct vm_page)) - 307 (start - phys_avail[biggestone])) / PAGE_SIZE; 308 309 /* 310 * Initialize the mem entry structures now, and put them in the free 311 * queue. 312 */ 313 vm_page_array = (vm_page_t) vaddr; 314 mapped = vaddr; 315 316 /* 317 * Validate these addresses. 318 */ 319 new_start = round_page(start + page_range * sizeof(struct vm_page)); 320 mapped = pmap_map(mapped, start, new_start, 321 VM_PROT_READ | VM_PROT_WRITE); 322 start = new_start; 323 324 first_managed_page = start / PAGE_SIZE; 325 326 /* 327 * Clear all of the page structures 328 */ 329 bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page)); 330 vm_page_array_size = page_range; 331 332 cnt.v_page_count = 0; 333 cnt.v_free_count = 0; 334 for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) { 335 if (i == biggestone) 336 pa = ptoa(first_managed_page); 337 else 338 pa = phys_avail[i]; 339 while (pa < phys_avail[i + 1] && npages-- > 0) { 340 ++cnt.v_page_count; 341 ++cnt.v_free_count; 342 m = PHYS_TO_VM_PAGE(pa); 343 m->phys_addr = pa; 344 m->flags = 0; 345 m->pc = (pa >> PAGE_SHIFT) & PQ_L2_MASK; 346 m->queue = m->pc + PQ_FREE; 347 TAILQ_INSERT_TAIL(vm_page_queues[m->queue].pl, m, pageq); 348 ++(*vm_page_queues[m->queue].lcnt); 349 pa += PAGE_SIZE; 350 } 351 } 352 return (mapped); 353} 354 355/* 356 * vm_page_hash: 357 * 358 * Distributes the object/offset key pair among hash buckets. 359 * 360 * NOTE: This macro depends on vm_page_bucket_count being a power of 2. 361 * This routine may not block. 362 * 363 * We try to randomize the hash based on the object to spread the pages 364 * out in the hash table without it costing us too much. 365 */ 366static __inline int 367vm_page_hash(object, pindex) 368 vm_object_t object; 369 vm_pindex_t pindex; 370{ 371 int i = ((uintptr_t)object + pindex) ^ object->hash_rand; 372 373 return(i & vm_page_hash_mask); 374} 375 376/* 377 * vm_page_insert: [ internal use only ] 378 * 379 * Inserts the given mem entry into the object and object list. 380 * 381 * The pagetables are not updated but will presumably fault the page 382 * in if necessary, or if a kernel page the caller will at some point 383 * enter the page into the kernel's pmap. We are not allowed to block 384 * here so we *can't* do this anyway. 385 * 386 * The object and page must be locked, and must be splhigh. 387 * This routine may not block. 388 */ 389 390void 391vm_page_insert(m, object, pindex) 392 register vm_page_t m; 393 register vm_object_t object; 394 register vm_pindex_t pindex; 395{ 396 register struct vm_page **bucket; 397 398 if (m->object != NULL) 399 panic("vm_page_insert: already inserted"); 400 401 /* 402 * Record the object/offset pair in this page 403 */ 404 405 m->object = object; 406 m->pindex = pindex; 407 408 /* 409 * Insert it into the object_object/offset hash table 410 */ 411 412 bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; 413 m->hnext = *bucket; 414 *bucket = m; 415 vm_page_bucket_generation++; 416 417 /* 418 * Now link into the object's list of backed pages. 419 */ 420 421 TAILQ_INSERT_TAIL(&object->memq, m, listq); 422#if 0 423 m->object->page_hint = m; 424#endif 425 m->object->generation++; 426 427 if (m->wire_count) 428 object->wire_count++; 429 430 if ((m->queue - m->pc) == PQ_CACHE) 431 object->cache_count++; 432 433 /* 434 * show that the object has one more resident page. 435 */ 436 437 object->resident_page_count++; 438} 439 440/* 441 * vm_page_remove: 442 * NOTE: used by device pager as well -wfj 443 * 444 * Removes the given mem entry from the object/offset-page 445 * table and the object page list, but do not invalidate/terminate 446 * the backing store. 447 * 448 * The object and page must be locked, and at splhigh. 449 * The underlying pmap entry (if any) is NOT removed here. 450 * This routine may not block. 451 */ 452 453vm_object_t 454vm_page_remove(m) 455 vm_page_t m; 456{ 457 register struct vm_page **bucket; 458 vm_object_t object; 459 460 if (m->object == NULL) 461 return(NULL); 462 463#if !defined(MAX_PERF) 464 if ((m->flags & PG_BUSY) == 0) { 465 panic("vm_page_remove: page not busy"); 466 } 467#endif 468 469 /* 470 * Basically destroy the page. 471 */ 472 473 vm_page_wakeup(m); 474 475 object = m->object; 476 477 if (m->wire_count) 478 object->wire_count--; 479 480 if ((m->queue - m->pc) == PQ_CACHE) 481 object->cache_count--; 482 483 /* 484 * Remove from the object_object/offset hash table. The object 485 * must be on the hash queue, we will panic if it isn't 486 * 487 * Note: we must NULL-out m->hnext to prevent loops in detached 488 * buffers with vm_page_lookup(). 489 */ 490 491 bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)]; 492 while (*bucket != m) { 493#if !defined(MAX_PERF) 494 if (*bucket == NULL) 495 panic("vm_page_remove(): page not found in hash"); 496#endif 497 bucket = &(*bucket)->hnext; 498 } 499 *bucket = m->hnext; 500 m->hnext = NULL; 501 vm_page_bucket_generation++; 502 503 /* 504 * Now remove from the object's list of backed pages. 505 */ 506 507 TAILQ_REMOVE(&object->memq, m, listq); 508 509 /* 510 * And show that the object has one fewer resident page. 511 */ 512 513 object->resident_page_count--; 514 object->generation++; 515 516 m->object = NULL; 517 518 return(object); 519} 520 521/* 522 * vm_page_lookup: 523 * 524 * Returns the page associated with the object/offset 525 * pair specified; if none is found, NULL is returned. 526 * 527 * NOTE: the code below does not lock. It will operate properly if 528 * an interrupt makes a change, but the generation algorithm will not 529 * operate properly in an SMP environment where both cpu's are able to run 530 * kernel code simultaniously. 531 * 532 * The object must be locked. No side effects. 533 * This routine may not block. 534 * This is a critical path routine 535 */ 536 537vm_page_t 538vm_page_lookup(object, pindex) 539 register vm_object_t object; 540 register vm_pindex_t pindex; 541{ 542 register vm_page_t m; 543 register struct vm_page **bucket; 544 int generation; 545 546 /* 547 * Search the hash table for this object/offset pair 548 */ 549 550#if 0 551 if (object->page_hint && (object->page_hint->pindex == pindex) && 552 (object->page_hint->object == object)) 553 return object->page_hint; 554#endif 555 556retry: 557 generation = vm_page_bucket_generation; 558 bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; 559 for (m = *bucket; m != NULL; m = m->hnext) { 560 if ((m->object == object) && (m->pindex == pindex)) { 561 if (vm_page_bucket_generation != generation) 562 goto retry; 563#if 0 564 m->object->page_hint = m; 565#endif 566 return (m); 567 } 568 } 569 if (vm_page_bucket_generation != generation) 570 goto retry; 571 return (NULL); 572} 573 574/* 575 * vm_page_rename: 576 * 577 * Move the given memory entry from its 578 * current object to the specified target object/offset. 579 * 580 * The object must be locked. 581 * This routine may not block. 582 * 583 * Note: this routine will raise itself to splvm(), the caller need not. 584 * 585 * Note: swap associated with the page must be invalidated by the move. We 586 * have to do this for several reasons: (1) we aren't freeing the 587 * page, (2) we are dirtying the page, (3) the VM system is probably 588 * moving the page from object A to B, and will then later move 589 * the backing store from A to B and we can't have a conflict. 590 * 591 * Note: we *always* dirty the page. It is necessary both for the 592 * fact that we moved it, and because we may be invalidating 593 * swap. 594 */ 595 596void 597vm_page_rename(m, new_object, new_pindex) 598 register vm_page_t m; 599 register vm_object_t new_object; 600 vm_pindex_t new_pindex; 601{ 602 int s; 603 604 s = splvm(); 605 vm_page_remove(m); 606 vm_page_insert(m, new_object, new_pindex); 607 m->dirty = VM_PAGE_BITS_ALL; 608 splx(s); 609} 610 611/* 612 * vm_page_unqueue_nowakeup: 613 * 614 * vm_page_unqueue() without any wakeup 615 * 616 * This routine must be called at splhigh(). 617 * This routine may not block. 618 */ 619 620void 621vm_page_unqueue_nowakeup(m) 622 vm_page_t m; 623{ 624 int queue = m->queue; 625 struct vpgqueues *pq; 626 if (queue != PQ_NONE) { 627 pq = &vm_page_queues[queue]; 628 m->queue = PQ_NONE; 629 TAILQ_REMOVE(pq->pl, m, pageq); 630 (*pq->cnt)--; 631 (*pq->lcnt)--; 632 if ((queue - m->pc) == PQ_CACHE) { 633 if (m->object) 634 m->object->cache_count--; 635 } 636 } 637} 638 639/* 640 * vm_page_unqueue: 641 * 642 * Remove a page from its queue. 643 * 644 * This routine must be called at splhigh(). 645 * This routine may not block. 646 */ 647 648void 649vm_page_unqueue(m) 650 vm_page_t m; 651{ 652 int queue = m->queue; 653 struct vpgqueues *pq; 654 if (queue != PQ_NONE) { 655 m->queue = PQ_NONE; 656 pq = &vm_page_queues[queue]; 657 TAILQ_REMOVE(pq->pl, m, pageq); 658 (*pq->cnt)--; 659 (*pq->lcnt)--; 660 if ((queue - m->pc) == PQ_CACHE) { 661 if ((cnt.v_cache_count + cnt.v_free_count) < 662 (cnt.v_free_reserved + cnt.v_cache_min)) 663 pagedaemon_wakeup(); 664 if (m->object) 665 m->object->cache_count--; 666 } 667 } 668} 669 670/* 671 * vm_page_list_find: 672 * 673 * Find a page on the specified queue with color optimization. 674 * 675 * The page coloring optimization attempts to locate a page 676 * that does not overload other nearby pages in the object in 677 * the cpu's L1 or L2 caches. We need this optmization because 678 * cpu caches tend to be physical caches, while object spaces tend 679 * to be virtual. 680 * 681 * This routine must be called at splvm(). 682 * This routine may not block. 683 */ 684vm_page_t 685vm_page_list_find(basequeue, index) 686 int basequeue, index; 687{ 688#if PQ_L2_SIZE > 1 689 690 int i,j; 691 vm_page_t m; 692 int hindex; 693 struct vpgqueues *pq; 694 695 pq = &vm_page_queues[basequeue]; 696 697 m = TAILQ_FIRST(pq[index].pl); 698 if (m) 699 return m; 700 701 for(j = 0; j < PQ_L1_SIZE; j++) { 702 int ij; 703 for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE; 704 (ij = i + j) > 0; 705 i -= PQ_L1_SIZE) { 706 707 hindex = index + ij; 708 if (hindex >= PQ_L2_SIZE) 709 hindex -= PQ_L2_SIZE; 710 if (m = TAILQ_FIRST(pq[hindex].pl)) 711 return m; 712 713 hindex = index - ij; 714 if (hindex < 0) 715 hindex += PQ_L2_SIZE; 716 if (m = TAILQ_FIRST(pq[hindex].pl)) 717 return m; 718 } 719 } 720 721 hindex = index + PQ_L2_SIZE / 2; 722 if (hindex >= PQ_L2_SIZE) 723 hindex -= PQ_L2_SIZE; 724 m = TAILQ_FIRST(pq[hindex].pl); 725 if (m) 726 return m; 727 728 return NULL; 729#else 730 return TAILQ_FIRST(vm_page_queues[basequeue].pl); 731#endif 732 733} 734 735/* 736 * vm_page_select: 737 * 738 * Find a page on the specified queue with color optimization. 739 * 740 * This routine must be called at splvm(). 741 * This routine may not block. 742 */ 743vm_page_t 744vm_page_select(object, pindex, basequeue) 745 vm_object_t object; 746 vm_pindex_t pindex; 747 int basequeue; 748{ 749 750#if PQ_L2_SIZE > 1 751 int index; 752 index = (pindex + object->pg_color) & PQ_L2_MASK; 753 return vm_page_list_find(basequeue, index); 754 755#else 756 return TAILQ_FIRST(vm_page_queues[basequeue].pl); 757#endif 758 759} 760 761/* 762 * vm_page_select_cache: 763 * 764 * Find a page on the cache queue with color optimization. As pages 765 * might be found, but not applicable, they are deactivated. This 766 * keeps us from using potentially busy cached pages. 767 * 768 * This routine must be called at splvm(). 769 * This routine may not block. 770 */ 771vm_page_t 772vm_page_select_cache(object, pindex) 773 vm_object_t object; 774 vm_pindex_t pindex; 775{ 776 vm_page_t m; 777 778 while (TRUE) { 779#if PQ_L2_SIZE > 1 780 int index; 781 index = (pindex + object->pg_color) & PQ_L2_MASK; 782 m = vm_page_list_find(PQ_CACHE, index); 783 784#else 785 m = TAILQ_FIRST(vm_page_queues[PQ_CACHE].pl); 786#endif 787 if (m && ((m->flags & PG_BUSY) || m->busy || 788 m->hold_count || m->wire_count)) { 789 vm_page_deactivate(m); 790 continue; 791 } 792 return m; 793 } 794} 795 796/* 797 * vm_page_select_free: 798 * 799 * Find a free or zero page, with specified preference. 800 * 801 * This routine must be called at splvm(). 802 * This routine may not block. 803 */ 804 805static vm_page_t 806vm_page_select_free(object, pindex, prefqueue) 807 vm_object_t object; 808 vm_pindex_t pindex; 809 int prefqueue; 810{ 811#if PQ_L2_SIZE > 1 812 int i,j; 813 int index, hindex; 814#endif 815 vm_page_t m; 816#if 0 817 vm_page_t mh; 818#endif 819 int oqueuediff; 820 struct vpgqueues *pq; 821 822 if (prefqueue == PQ_ZERO) 823 oqueuediff = PQ_FREE - PQ_ZERO; 824 else 825 oqueuediff = PQ_ZERO - PQ_FREE; 826 827#if 0 828 if (mh = object->page_hint) { 829 if (mh->pindex == (pindex - 1)) { 830 if ((mh->flags & PG_FICTITIOUS) == 0) { 831 if ((mh < &vm_page_array[cnt.v_page_count-1]) && 832 (mh >= &vm_page_array[0])) { 833 int queue; 834 m = mh + 1; 835 if (VM_PAGE_TO_PHYS(m) == (VM_PAGE_TO_PHYS(mh) + PAGE_SIZE)) { 836 queue = m->queue - m->pc; 837 if (queue == PQ_FREE || queue == PQ_ZERO) { 838 return m; 839 } 840 } 841 } 842 } 843 } 844 } 845#endif 846 847 pq = &vm_page_queues[prefqueue]; 848 849#if PQ_L2_SIZE > 1 850 851 index = (pindex + object->pg_color) & PQ_L2_MASK; 852 853 if (m = TAILQ_FIRST(pq[index].pl)) 854 return m; 855 if (m = TAILQ_FIRST(pq[index + oqueuediff].pl)) 856 return m; 857 858 for(j = 0; j < PQ_L1_SIZE; j++) { 859 int ij; 860 for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE; 861 (ij = i + j) >= 0; 862 i -= PQ_L1_SIZE) { 863 864 hindex = index + ij; 865 if (hindex >= PQ_L2_SIZE) 866 hindex -= PQ_L2_SIZE; 867 if (m = TAILQ_FIRST(pq[hindex].pl)) 868 return m; 869 if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl)) 870 return m; 871 872 hindex = index - ij; 873 if (hindex < 0) 874 hindex += PQ_L2_SIZE; 875 if (m = TAILQ_FIRST(pq[hindex].pl)) 876 return m; 877 if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl)) 878 return m; 879 } 880 } 881 882 hindex = index + PQ_L2_SIZE / 2; 883 if (hindex >= PQ_L2_SIZE) 884 hindex -= PQ_L2_SIZE; 885 if (m = TAILQ_FIRST(pq[hindex].pl)) 886 return m; 887 if (m = TAILQ_FIRST(pq[hindex+oqueuediff].pl)) 888 return m; 889 890#else 891 if (m = TAILQ_FIRST(pq[0].pl)) 892 return m; 893 else 894 return TAILQ_FIRST(pq[oqueuediff].pl); 895#endif 896 897 return NULL; 898} 899 900/* 901 * vm_page_alloc: 902 * 903 * Allocate and return a memory cell associated 904 * with this VM object/offset pair. 905 * 906 * page_req classes: 907 * VM_ALLOC_NORMAL normal process request 908 * VM_ALLOC_SYSTEM system *really* needs a page 909 * VM_ALLOC_INTERRUPT interrupt time request 910 * VM_ALLOC_ZERO zero page 911 * 912 * Object must be locked. 913 * This routine may not block. 914 * 915 * Additional special handling is required when called from an 916 * interrupt (VM_ALLOC_INTERRUPT). We are not allowed to mess with 917 * the page cache in this case. 918 * 919 * vm_page_alloc() 920 */ 921vm_page_t 922vm_page_alloc(object, pindex, page_req) 923 vm_object_t object; 924 vm_pindex_t pindex; 925 int page_req; 926{ 927 register vm_page_t m = NULL; 928 struct vpgqueues *pq; 929 vm_object_t oldobject; 930 int queue, qtype; 931 int s; 932 933 KASSERT(!vm_page_lookup(object, pindex), 934 ("vm_page_alloc: page already allocated")); 935 936 /* 937 * The pager is allowed to eat deeper into the free page list. 938 */ 939 940 if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) { 941 page_req = VM_ALLOC_SYSTEM; 942 }; 943 944 s = splvm(); 945 946loop: 947 switch (page_req) { 948 949 case VM_ALLOC_NORMAL: 950 if (cnt.v_free_count >= cnt.v_free_reserved) { 951 m = vm_page_select_free(object, pindex, PQ_FREE); 952 KASSERT(m != NULL, ("vm_page_alloc(NORMAL): missing page on free queue\n")); 953 } else { 954 m = vm_page_select_cache(object, pindex); 955 if (m == NULL) { 956 splx(s); 957#if defined(DIAGNOSTIC) 958 if (cnt.v_cache_count > 0) 959 printf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", cnt.v_cache_count); 960#endif 961 vm_pageout_deficit++; 962 pagedaemon_wakeup(); 963 return (NULL); 964 } 965 } 966 break; 967 968 case VM_ALLOC_ZERO: 969 if (cnt.v_free_count >= cnt.v_free_reserved) { 970 m = vm_page_select_free(object, pindex, PQ_ZERO); 971 KASSERT(m != NULL, ("vm_page_alloc(ZERO): missing page on free queue\n")); 972 } else { 973 m = vm_page_select_cache(object, pindex); 974 if (m == NULL) { 975 splx(s); 976#if defined(DIAGNOSTIC) 977 if (cnt.v_cache_count > 0) 978 printf("vm_page_alloc(ZERO): missing pages on cache queue: %d\n", cnt.v_cache_count); 979#endif 980 vm_pageout_deficit++; 981 pagedaemon_wakeup(); 982 return (NULL); 983 } 984 } 985 break; 986 987 case VM_ALLOC_SYSTEM: 988 if ((cnt.v_free_count >= cnt.v_free_reserved) || 989 ((cnt.v_cache_count == 0) && 990 (cnt.v_free_count >= cnt.v_interrupt_free_min))) { 991 m = vm_page_select_free(object, pindex, PQ_FREE); 992 KASSERT(m != NULL, ("vm_page_alloc(SYSTEM): missing page on free queue\n")); 993 } else { 994 m = vm_page_select_cache(object, pindex); 995 if (m == NULL) { 996 splx(s); 997#if defined(DIAGNOSTIC) 998 if (cnt.v_cache_count > 0) 999 printf("vm_page_alloc(SYSTEM): missing pages on cache queue: %d\n", cnt.v_cache_count); 1000#endif 1001 vm_pageout_deficit++; 1002 pagedaemon_wakeup(); 1003 return (NULL); 1004 } 1005 } 1006 break; 1007 1008 case VM_ALLOC_INTERRUPT: 1009 if (cnt.v_free_count > 0) { 1010 m = vm_page_select_free(object, pindex, PQ_FREE); 1011 KASSERT(m != NULL, ("vm_page_alloc(INTERRUPT): missing page on free queue\n")); 1012 } else { 1013 splx(s); 1014 vm_pageout_deficit++; 1015 pagedaemon_wakeup(); 1016 return (NULL); 1017 } 1018 break; 1019 1020 default: 1021 m = NULL; 1022#if !defined(MAX_PERF) 1023 panic("vm_page_alloc: invalid allocation class"); 1024#endif 1025 } 1026 1027 queue = m->queue; 1028 qtype = queue - m->pc; 1029 1030 /* 1031 * Cache pages must be formally freed (and doubly so with the 1032 * new pagerops functions). We free the page and try again. 1033 * 1034 * This also has the side effect of ensuring that the minfreepage 1035 * wall is held more tightly verses the old code. 1036 */ 1037 1038 if (qtype == PQ_CACHE) { 1039#if !defined(MAX_PERF) 1040 if (m->dirty) 1041 panic("found dirty cache page %p", m); 1042#endif 1043 vm_page_busy(m); 1044 vm_page_protect(m, VM_PROT_NONE); 1045 vm_page_free(m); 1046 goto loop; 1047 } 1048 1049 pq = &vm_page_queues[queue]; 1050 TAILQ_REMOVE(pq->pl, m, pageq); 1051 (*pq->cnt)--; 1052 (*pq->lcnt)--; 1053 oldobject = NULL; 1054 1055 if (qtype == PQ_ZERO) { 1056 vm_page_zero_count--; 1057 m->flags = PG_ZERO | PG_BUSY; 1058 } else { 1059 m->flags = PG_BUSY; 1060 } 1061 m->wire_count = 0; 1062 m->hold_count = 0; 1063 m->act_count = 0; 1064 m->busy = 0; 1065 m->valid = 0; 1066 m->dirty = 0; 1067 m->queue = PQ_NONE; 1068 1069 /* 1070 * vm_page_insert() is safe prior to the splx(). Note also that 1071 * inserting a page here does not insert it into the pmap (which 1072 * could cause us to block allocating memory). We cannot block 1073 * anywhere. 1074 */ 1075 1076 vm_page_insert(m, object, pindex); 1077 1078 /* 1079 * Don't wakeup too often - wakeup the pageout daemon when 1080 * we would be nearly out of memory. 1081 */ 1082 if (((cnt.v_free_count + cnt.v_cache_count) < 1083 (cnt.v_free_reserved + cnt.v_cache_min)) || 1084 (cnt.v_free_count < cnt.v_pageout_free_min)) 1085 pagedaemon_wakeup(); 1086 1087#if 0 1088 /* 1089 * (code removed - was previously a manual breakout of the act of 1090 * freeing a page from cache. We now just call vm_page_free() on 1091 * a cache page an loop so this code no longer needs to be here) 1092 */ 1093 if ((qtype == PQ_CACHE) && 1094 ((page_req == VM_ALLOC_NORMAL) || (page_req == VM_ALLOC_ZERO)) && 1095 oldobject && (oldobject->type == OBJT_VNODE) && 1096 ((oldobject->flags & OBJ_DEAD) == 0)) { 1097 struct vnode *vp; 1098 vp = (struct vnode *) oldobject->handle; 1099 if (vp && VSHOULDFREE(vp)) { 1100 if ((vp->v_flag & (VFREE|VTBFREE|VDOOMED)) == 0) { 1101 TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist); 1102 vp->v_flag |= VTBFREE; 1103 } 1104 } 1105 } 1106#endif 1107 splx(s); 1108 1109 return (m); 1110} 1111 1112/* 1113 * vm_wait: (also see VM_WAIT macro) 1114 * 1115 * Block until free pages are available for allocation 1116 */ 1117 1118void 1119vm_wait() 1120{ 1121 int s; 1122 1123 s = splvm(); 1124 if (curproc == pageproc) { 1125 vm_pageout_pages_needed = 1; 1126 tsleep(&vm_pageout_pages_needed, PSWP, "vmwait", 0); 1127 } else { 1128 if (!vm_pages_needed) { 1129 vm_pages_needed++; 1130 wakeup(&vm_pages_needed); 1131 } 1132 tsleep(&cnt.v_free_count, PVM, "vmwait", 0); 1133 } 1134 splx(s); 1135} 1136 1137/* 1138 * vm_await: (also see VM_AWAIT macro) 1139 * 1140 * asleep on an event that will signal when free pages are available 1141 * for allocation. 1142 */ 1143 1144void 1145vm_await() 1146{ 1147 int s; 1148 1149 s = splvm(); 1150 if (curproc == pageproc) { 1151 vm_pageout_pages_needed = 1; 1152 asleep(&vm_pageout_pages_needed, PSWP, "vmwait", 0); 1153 } else { 1154 if (!vm_pages_needed) { 1155 vm_pages_needed++; 1156 wakeup(&vm_pages_needed); 1157 } 1158 asleep(&cnt.v_free_count, PVM, "vmwait", 0); 1159 } 1160 splx(s); 1161} 1162 1163#if 0 1164/* 1165 * vm_page_sleep: 1166 * 1167 * Block until page is no longer busy. 1168 */ 1169 1170int 1171vm_page_sleep(vm_page_t m, char *msg, char *busy) { 1172 int slept = 0; 1173 if ((busy && *busy) || (m->flags & PG_BUSY)) { 1174 int s; 1175 s = splvm(); 1176 if ((busy && *busy) || (m->flags & PG_BUSY)) { 1177 vm_page_flag_set(m, PG_WANTED); 1178 tsleep(m, PVM, msg, 0); 1179 slept = 1; 1180 } 1181 splx(s); 1182 } 1183 return slept; 1184} 1185 1186#endif 1187 1188#if 0 1189 1190/* 1191 * vm_page_asleep: 1192 * 1193 * Similar to vm_page_sleep(), but does not block. Returns 0 if 1194 * the page is not busy, or 1 if the page is busy. 1195 * 1196 * This routine has the side effect of calling asleep() if the page 1197 * was busy (1 returned). 1198 */ 1199 1200int 1201vm_page_asleep(vm_page_t m, char *msg, char *busy) { 1202 int slept = 0; 1203 if ((busy && *busy) || (m->flags & PG_BUSY)) { 1204 int s; 1205 s = splvm(); 1206 if ((busy && *busy) || (m->flags & PG_BUSY)) { 1207 vm_page_flag_set(m, PG_WANTED); 1208 asleep(m, PVM, msg, 0); 1209 slept = 1; 1210 } 1211 splx(s); 1212 } 1213 return slept; 1214} 1215 1216#endif 1217 1218/* 1219 * vm_page_activate: 1220 * 1221 * Put the specified page on the active list (if appropriate). 1222 * 1223 * The page queues must be locked. 1224 * This routine may not block. 1225 */ 1226void 1227vm_page_activate(m) 1228 register vm_page_t m; 1229{ 1230 int s; 1231 1232 s = splvm(); 1233 if (m->queue != PQ_ACTIVE) { 1234 if ((m->queue - m->pc) == PQ_CACHE) 1235 cnt.v_reactivated++; 1236 1237 vm_page_unqueue(m); 1238 1239 if (m->wire_count == 0) { 1240 m->queue = PQ_ACTIVE; 1241 ++(*vm_page_queues[PQ_ACTIVE].lcnt); 1242 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 1243 if (m->act_count < ACT_INIT) 1244 m->act_count = ACT_INIT; 1245 cnt.v_active_count++; 1246 } 1247 } else { 1248 if (m->act_count < ACT_INIT) 1249 m->act_count = ACT_INIT; 1250 } 1251 1252 splx(s); 1253} 1254 1255/* 1256 * helper routine for vm_page_free and vm_page_free_zero. 1257 * 1258 * This routine may not block. 1259 */ 1260static __inline void 1261vm_page_free_wakeup() 1262{ 1263 /* 1264 * if pageout daemon needs pages, then tell it that there are 1265 * some free. 1266 */ 1267 if (vm_pageout_pages_needed) { 1268 wakeup(&vm_pageout_pages_needed); 1269 vm_pageout_pages_needed = 0; 1270 } 1271 /* 1272 * wakeup processes that are waiting on memory if we hit a 1273 * high water mark. And wakeup scheduler process if we have 1274 * lots of memory. this process will swapin processes. 1275 */ 1276 if (vm_pages_needed && 1277 ((cnt.v_free_count + cnt.v_cache_count) >= cnt.v_free_min)) { 1278 wakeup(&cnt.v_free_count); 1279 vm_pages_needed = 0; 1280 } 1281} 1282 1283/* 1284 * vm_page_free_toq: 1285 * 1286 * Returns the given page to the PQ_FREE or PQ_ZERO list, 1287 * disassociating it with any VM object. 1288 * 1289 * Object and page must be locked prior to entry. 1290 * This routine may not block. 1291 */ 1292 1293void 1294vm_page_free_toq(vm_page_t m, int queue) 1295{ 1296 int s; 1297 struct vpgqueues *pq; 1298 vm_object_t object = m->object; 1299 1300 s = splvm(); 1301 1302 cnt.v_tfree++; 1303 1304#if !defined(MAX_PERF) 1305 if (m->busy || ((m->queue - m->pc) == PQ_FREE) || 1306 (m->hold_count != 0)) { 1307 printf( 1308 "vm_page_free: pindex(%lu), busy(%d), PG_BUSY(%d), hold(%d)\n", 1309 (u_long)m->pindex, m->busy, (m->flags & PG_BUSY) ? 1 : 0, 1310 m->hold_count); 1311 if ((m->queue - m->pc) == PQ_FREE) 1312 panic("vm_page_free: freeing free page"); 1313 else 1314 panic("vm_page_free: freeing busy page"); 1315 } 1316#endif 1317 1318 /* 1319 * unqueue, then remove page. Note that we cannot destroy 1320 * the page here because we do not want to call the pager's 1321 * callback routine until after we've put the page on the 1322 * appropriate free queue. 1323 */ 1324 1325 vm_page_unqueue_nowakeup(m); 1326 vm_page_remove(m); 1327 1328 /* 1329 * If fictitious remove object association and 1330 * return, otherwise delay object association removal. 1331 */ 1332 1333 if ((m->flags & PG_FICTITIOUS) != 0) { 1334 splx(s); 1335 return; 1336 } 1337 1338 m->valid = 0; 1339 1340 if (m->wire_count != 0) { 1341#if !defined(MAX_PERF) 1342 if (m->wire_count > 1) { 1343 panic("vm_page_free: invalid wire count (%d), pindex: 0x%x", 1344 m->wire_count, m->pindex); 1345 } 1346#endif 1347 printf("vm_page_free: freeing wired page\n"); 1348 m->wire_count = 0; 1349 if (m->object) 1350 m->object->wire_count--; 1351 cnt.v_wire_count--; 1352 } 1353 1354 /* 1355 * If we've exhausted the object's resident pages we want to free 1356 * it up. 1357 */ 1358 1359 if (object && 1360 (object->type == OBJT_VNODE) && 1361 ((object->flags & OBJ_DEAD) == 0) 1362 ) { 1363 struct vnode *vp = (struct vnode *)object->handle; 1364 1365 if (vp && VSHOULDFREE(vp)) { 1366 if ((vp->v_flag & (VTBFREE|VDOOMED|VFREE)) == 0) { 1367 TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist); 1368 vp->v_flag |= VTBFREE; 1369 } 1370 } 1371 } 1372 1373#ifdef __alpha__ 1374 pmap_page_is_free(m); 1375#endif 1376 1377 m->queue = queue + m->pc; 1378 pq = &vm_page_queues[m->queue]; 1379 ++(*pq->lcnt); 1380 ++(*pq->cnt); 1381 1382 if (queue == PQ_ZERO) { 1383 TAILQ_INSERT_HEAD(pq->pl, m, pageq); 1384 ++vm_page_zero_count; 1385 } else { 1386 /* 1387 * If the pageout process is grabbing the page, it is likely 1388 * that the page is NOT in the cache. It is more likely that 1389 * the page will be partially in the cache if it is being 1390 * explicitly freed. 1391 */ 1392 1393 if (curproc == pageproc) { 1394 TAILQ_INSERT_TAIL(pq->pl, m, pageq); 1395 } else { 1396 TAILQ_INSERT_HEAD(pq->pl, m, pageq); 1397 } 1398 } 1399 1400 vm_page_free_wakeup(); 1401 1402 splx(s); 1403} 1404 1405/* 1406 * vm_page_wire: 1407 * 1408 * Mark this page as wired down by yet 1409 * another map, removing it from paging queues 1410 * as necessary. 1411 * 1412 * The page queues must be locked. 1413 * This routine may not block. 1414 */ 1415void 1416vm_page_wire(m) 1417 register vm_page_t m; 1418{ 1419 int s; 1420 1421 s = splvm(); 1422 if (m->wire_count == 0) { 1423 vm_page_unqueue(m); 1424 cnt.v_wire_count++; 1425 if (m->object) 1426 m->object->wire_count++; 1427 } 1428 m->wire_count++; 1429 splx(s); 1430 (*vm_page_queues[PQ_NONE].lcnt)++; 1431 vm_page_flag_set(m, PG_MAPPED); 1432} 1433 1434/* 1435 * vm_page_unwire: 1436 * 1437 * Release one wiring of this page, potentially 1438 * enabling it to be paged again. 1439 * 1440 * Many pages placed on the inactive queue should actually go 1441 * into the cache, but it is difficult to figure out which. What 1442 * we do instead, if the inactive target is well met, is to put 1443 * clean pages at the head of the inactive queue instead of the tail. 1444 * This will cause them to be moved to the cache more quickly and 1445 * if not actively re-referenced, freed more quickly. If we just 1446 * stick these pages at the end of the inactive queue, heavy filesystem 1447 * meta-data accesses can cause an unnecessary paging load on memory bound 1448 * processes. This optimization causes one-time-use metadata to be 1449 * reused more quickly. 1450 * 1451 * The page queues must be locked. 1452 * This routine may not block. 1453 */ 1454void 1455vm_page_unwire(m, activate) 1456 register vm_page_t m; 1457 int activate; 1458{ 1459 int s; 1460 1461 s = splvm(); 1462 1463 if (m->wire_count > 0) { 1464 m->wire_count--; 1465 if (m->wire_count == 0) { 1466 if (m->object) 1467 m->object->wire_count--; 1468 cnt.v_wire_count--; 1469 if (activate) { 1470 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 1471 m->queue = PQ_ACTIVE; 1472 (*vm_page_queues[PQ_ACTIVE].lcnt)++; 1473 cnt.v_active_count++; 1474 } else { 1475 TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); 1476 m->queue = PQ_INACTIVE; 1477 (*vm_page_queues[PQ_INACTIVE].lcnt)++; 1478 cnt.v_inactive_count++; 1479 } 1480 } 1481 } else { 1482#if !defined(MAX_PERF) 1483 panic("vm_page_unwire: invalid wire count: %d\n", m->wire_count); 1484#endif 1485 } 1486 splx(s); 1487} 1488 1489 1490/* 1491 * Move the specified page to the inactive queue. If the page has 1492 * any associated swap, the swap is deallocated. 1493 * 1494 * This routine may not block. 1495 */ 1496void 1497vm_page_deactivate(m) 1498 register vm_page_t m; 1499{ 1500 int s; 1501 1502 /* 1503 * Ignore if already inactive. 1504 */ 1505 if (m->queue == PQ_INACTIVE) 1506 return; 1507 1508 s = splvm(); 1509 if (m->wire_count == 0) { 1510 if ((m->queue - m->pc) == PQ_CACHE) 1511 cnt.v_reactivated++; 1512 vm_page_unqueue(m); 1513 TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); 1514 m->queue = PQ_INACTIVE; 1515 ++(*vm_page_queues[PQ_INACTIVE].lcnt); 1516 cnt.v_inactive_count++; 1517 } 1518 splx(s); 1519} 1520 1521/* 1522 * vm_page_cache 1523 * 1524 * Put the specified page onto the page cache queue (if appropriate). 1525 * 1526 * This routine may not block. 1527 */ 1528void 1529vm_page_cache(m) 1530 register vm_page_t m; 1531{ 1532 int s; 1533 1534#if !defined(MAX_PERF) 1535 if ((m->flags & PG_BUSY) || m->busy || m->wire_count) { 1536 printf("vm_page_cache: attempting to cache busy page\n"); 1537 return; 1538 } 1539#endif 1540 if ((m->queue - m->pc) == PQ_CACHE) 1541 return; 1542 1543 /* 1544 * Remove all pmaps and indicate that the page is not 1545 * writeable. 1546 */ 1547 1548 vm_page_protect(m, VM_PROT_NONE); 1549 vm_page_flag_clear(m, PG_WRITEABLE); 1550#if !defined(MAX_PERF) 1551 if (m->dirty != 0) { 1552 panic("vm_page_cache: caching a dirty page, pindex: %d", m->pindex); 1553 } 1554#endif 1555 s = splvm(); 1556 vm_page_unqueue_nowakeup(m); 1557 m->queue = PQ_CACHE + m->pc; 1558 (*vm_page_queues[m->queue].lcnt)++; 1559 TAILQ_INSERT_TAIL(vm_page_queues[m->queue].pl, m, pageq); 1560 cnt.v_cache_count++; 1561 m->object->cache_count++; 1562 vm_page_free_wakeup(); 1563 splx(s); 1564} 1565 1566/* 1567 * Grab a page, waiting until we are waken up due to the page 1568 * changing state. We keep on waiting, if the page continues 1569 * to be in the object. If the page doesn't exist, allocate it. 1570 * 1571 * This routine may block. 1572 */ 1573vm_page_t 1574vm_page_grab(object, pindex, allocflags) 1575 vm_object_t object; 1576 vm_pindex_t pindex; 1577 int allocflags; 1578{ 1579 1580 vm_page_t m; 1581 int s, generation; 1582 1583retrylookup: 1584 if ((m = vm_page_lookup(object, pindex)) != NULL) { 1585 if (m->busy || (m->flags & PG_BUSY)) { 1586 generation = object->generation; 1587 1588 s = splvm(); 1589 while ((object->generation == generation) && 1590 (m->busy || (m->flags & PG_BUSY))) { 1591 vm_page_flag_set(m, PG_WANTED | PG_REFERENCED); 1592 tsleep(m, PVM, "pgrbwt", 0); 1593 if ((allocflags & VM_ALLOC_RETRY) == 0) { 1594 splx(s); 1595 return NULL; 1596 } 1597 } 1598 splx(s); 1599 goto retrylookup; 1600 } else { 1601 vm_page_busy(m); 1602 return m; 1603 } 1604 } 1605 1606 m = vm_page_alloc(object, pindex, allocflags & ~VM_ALLOC_RETRY); 1607 if (m == NULL) { 1608 VM_WAIT; 1609 if ((allocflags & VM_ALLOC_RETRY) == 0) 1610 return NULL; 1611 goto retrylookup; 1612 } 1613 1614 return m; 1615} 1616 1617/* 1618 * mapping function for valid bits or for dirty bits in 1619 * a page. May not block. 1620 */ 1621__inline int 1622vm_page_bits(int base, int size) 1623{ 1624 u_short chunk; 1625 1626 if ((base == 0) && (size >= PAGE_SIZE)) 1627 return VM_PAGE_BITS_ALL; 1628 1629 size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 1630 base &= PAGE_MASK; 1631 if (size > PAGE_SIZE - base) { 1632 size = PAGE_SIZE - base; 1633 } 1634 1635 base = base / DEV_BSIZE; 1636 chunk = vm_page_dev_bsize_chunks[size / DEV_BSIZE]; 1637 return (chunk << base) & VM_PAGE_BITS_ALL; 1638} 1639 1640/* 1641 * set a page valid and clean. May not block. 1642 */ 1643void 1644vm_page_set_validclean(m, base, size) 1645 vm_page_t m; 1646 int base; 1647 int size; 1648{ 1649 int pagebits = vm_page_bits(base, size); 1650 m->valid |= pagebits; 1651 m->dirty &= ~pagebits; 1652 if( base == 0 && size == PAGE_SIZE) 1653 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 1654} 1655 1656/* 1657 * set a page (partially) invalid. May not block. 1658 */ 1659void 1660vm_page_set_invalid(m, base, size) 1661 vm_page_t m; 1662 int base; 1663 int size; 1664{ 1665 int bits; 1666 1667 m->valid &= ~(bits = vm_page_bits(base, size)); 1668 if (m->valid == 0) 1669 m->dirty &= ~bits; 1670 m->object->generation++; 1671} 1672 1673/* 1674 * is (partial) page valid? May not block. 1675 */ 1676int 1677vm_page_is_valid(m, base, size) 1678 vm_page_t m; 1679 int base; 1680 int size; 1681{ 1682 int bits = vm_page_bits(base, size); 1683 1684 if (m->valid && ((m->valid & bits) == bits)) 1685 return 1; 1686 else 1687 return 0; 1688} 1689 1690/* 1691 * update dirty bits from pmap/mmu. May not block. 1692 */ 1693 1694void 1695vm_page_test_dirty(m) 1696 vm_page_t m; 1697{ 1698 if ((m->dirty != VM_PAGE_BITS_ALL) && 1699 pmap_is_modified(VM_PAGE_TO_PHYS(m))) { 1700 m->dirty = VM_PAGE_BITS_ALL; 1701 } 1702} 1703 1704/* 1705 * This interface is for merging with malloc() someday. 1706 * Even if we never implement compaction so that contiguous allocation 1707 * works after initialization time, malloc()'s data structures are good 1708 * for statistics and for allocations of less than a page. 1709 */ 1710void * 1711contigmalloc1(size, type, flags, low, high, alignment, boundary, map) 1712 unsigned long size; /* should be size_t here and for malloc() */ 1713 struct malloc_type *type; 1714 int flags; 1715 unsigned long low; 1716 unsigned long high; 1717 unsigned long alignment; 1718 unsigned long boundary; 1719 vm_map_t map; 1720{ 1721 int i, s, start; 1722 vm_offset_t addr, phys, tmp_addr; 1723 int pass; 1724 vm_page_t pga = vm_page_array; 1725 1726 size = round_page(size); 1727#if !defined(MAX_PERF) 1728 if (size == 0) 1729 panic("contigmalloc1: size must not be 0"); 1730 if ((alignment & (alignment - 1)) != 0) 1731 panic("contigmalloc1: alignment must be a power of 2"); 1732 if ((boundary & (boundary - 1)) != 0) 1733 panic("contigmalloc1: boundary must be a power of 2"); 1734#endif 1735 1736 start = 0; 1737 for (pass = 0; pass <= 1; pass++) { 1738 s = splvm(); 1739again: 1740 /* 1741 * Find first page in array that is free, within range, aligned, and 1742 * such that the boundary won't be crossed. 1743 */ 1744 for (i = start; i < cnt.v_page_count; i++) { 1745 int pqtype; 1746 phys = VM_PAGE_TO_PHYS(&pga[i]); 1747 pqtype = pga[i].queue - pga[i].pc; 1748 if (((pqtype == PQ_ZERO) || (pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) && 1749 (phys >= low) && (phys < high) && 1750 ((phys & (alignment - 1)) == 0) && 1751 (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0)) 1752 break; 1753 } 1754 1755 /* 1756 * If the above failed or we will exceed the upper bound, fail. 1757 */ 1758 if ((i == cnt.v_page_count) || 1759 ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { 1760 vm_page_t m, next; 1761 1762again1: 1763 for (m = TAILQ_FIRST(&vm_page_queue_inactive); 1764 m != NULL; 1765 m = next) { 1766 1767 if (m->queue != PQ_INACTIVE) { 1768 break; 1769 } 1770 1771 next = TAILQ_NEXT(m, pageq); 1772 if (vm_page_sleep_busy(m, TRUE, "vpctw0")) 1773 goto again1; 1774 vm_page_test_dirty(m); 1775 if (m->dirty) { 1776 if (m->object->type == OBJT_VNODE) { 1777 vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc); 1778 vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC); 1779 VOP_UNLOCK(m->object->handle, 0, curproc); 1780 goto again1; 1781 } else if (m->object->type == OBJT_SWAP || 1782 m->object->type == OBJT_DEFAULT) { 1783 vm_pageout_flush(&m, 1, 0); 1784 goto again1; 1785 } 1786 } 1787 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) 1788 vm_page_cache(m); 1789 } 1790 1791 for (m = TAILQ_FIRST(&vm_page_queue_active); 1792 m != NULL; 1793 m = next) { 1794 1795 if (m->queue != PQ_ACTIVE) { 1796 break; 1797 } 1798 1799 next = TAILQ_NEXT(m, pageq); 1800 if (vm_page_sleep_busy(m, TRUE, "vpctw1")) 1801 goto again1; 1802 vm_page_test_dirty(m); 1803 if (m->dirty) { 1804 if (m->object->type == OBJT_VNODE) { 1805 vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc); 1806 vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC); 1807 VOP_UNLOCK(m->object->handle, 0, curproc); 1808 goto again1; 1809 } else if (m->object->type == OBJT_SWAP || 1810 m->object->type == OBJT_DEFAULT) { 1811 vm_pageout_flush(&m, 1, 0); 1812 goto again1; 1813 } 1814 } 1815 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) 1816 vm_page_cache(m); 1817 } 1818 1819 splx(s); 1820 continue; 1821 } 1822 start = i; 1823 1824 /* 1825 * Check successive pages for contiguous and free. 1826 */ 1827 for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { 1828 int pqtype; 1829 pqtype = pga[i].queue - pga[i].pc; 1830 if ((VM_PAGE_TO_PHYS(&pga[i]) != 1831 (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) || 1832 ((pqtype != PQ_ZERO) && (pqtype != PQ_FREE) && (pqtype != PQ_CACHE))) { 1833 start++; 1834 goto again; 1835 } 1836 } 1837 1838 for (i = start; i < (start + size / PAGE_SIZE); i++) { 1839 int pqtype; 1840 vm_page_t m = &pga[i]; 1841 1842 pqtype = m->queue - m->pc; 1843 if (pqtype == PQ_CACHE) { 1844 vm_page_busy(m); 1845 vm_page_free(m); 1846 } 1847 1848 TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq); 1849 (*vm_page_queues[m->queue].lcnt)--; 1850 cnt.v_free_count--; 1851 m->valid = VM_PAGE_BITS_ALL; 1852 m->flags = 0; 1853 m->dirty = 0; 1854 m->wire_count = 0; 1855 m->busy = 0; 1856 m->queue = PQ_NONE; 1857 m->object = NULL; 1858 vm_page_wire(m); 1859 } 1860 1861 /* 1862 * We've found a contiguous chunk that meets are requirements. 1863 * Allocate kernel VM, unfree and assign the physical pages to it and 1864 * return kernel VM pointer. 1865 */ 1866 tmp_addr = addr = kmem_alloc_pageable(map, size); 1867 if (addr == 0) { 1868 /* 1869 * XXX We almost never run out of kernel virtual 1870 * space, so we don't make the allocated memory 1871 * above available. 1872 */ 1873 splx(s); 1874 return (NULL); 1875 } 1876 1877 for (i = start; i < (start + size / PAGE_SIZE); i++) { 1878 vm_page_t m = &pga[i]; 1879 vm_page_insert(m, kernel_object, 1880 OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS)); 1881 pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m)); 1882 tmp_addr += PAGE_SIZE; 1883 } 1884 1885 splx(s); 1886 return ((void *)addr); 1887 } 1888 return NULL; 1889} 1890 1891void * 1892contigmalloc(size, type, flags, low, high, alignment, boundary) 1893 unsigned long size; /* should be size_t here and for malloc() */ 1894 struct malloc_type *type; 1895 int flags; 1896 unsigned long low; 1897 unsigned long high; 1898 unsigned long alignment; 1899 unsigned long boundary; 1900{ 1901 return contigmalloc1(size, type, flags, low, high, alignment, boundary, 1902 kernel_map); 1903} 1904 1905vm_offset_t 1906vm_page_alloc_contig(size, low, high, alignment) 1907 vm_offset_t size; 1908 vm_offset_t low; 1909 vm_offset_t high; 1910 vm_offset_t alignment; 1911{ 1912 return ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high, 1913 alignment, 0ul, kernel_map)); 1914} 1915 1916#include "opt_ddb.h" 1917#ifdef DDB 1918#include <sys/kernel.h> 1919 1920#include <ddb/ddb.h> 1921 1922DB_SHOW_COMMAND(page, vm_page_print_page_info) 1923{ 1924 db_printf("cnt.v_free_count: %d\n", cnt.v_free_count); 1925 db_printf("cnt.v_cache_count: %d\n", cnt.v_cache_count); 1926 db_printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count); 1927 db_printf("cnt.v_active_count: %d\n", cnt.v_active_count); 1928 db_printf("cnt.v_wire_count: %d\n", cnt.v_wire_count); 1929 db_printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved); 1930 db_printf("cnt.v_free_min: %d\n", cnt.v_free_min); 1931 db_printf("cnt.v_free_target: %d\n", cnt.v_free_target); 1932 db_printf("cnt.v_cache_min: %d\n", cnt.v_cache_min); 1933 db_printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target); 1934} 1935 1936DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) 1937{ 1938 int i; 1939 db_printf("PQ_FREE:"); 1940 for(i=0;i<PQ_L2_SIZE;i++) { 1941 db_printf(" %d", *vm_page_queues[PQ_FREE + i].lcnt); 1942 } 1943 db_printf("\n"); 1944 1945 db_printf("PQ_CACHE:"); 1946 for(i=0;i<PQ_L2_SIZE;i++) { 1947 db_printf(" %d", *vm_page_queues[PQ_CACHE + i].lcnt); 1948 } 1949 db_printf("\n"); 1950 1951 db_printf("PQ_ZERO:"); 1952 for(i=0;i<PQ_L2_SIZE;i++) { 1953 db_printf(" %d", *vm_page_queues[PQ_ZERO + i].lcnt); 1954 } 1955 db_printf("\n"); 1956 1957 db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n", 1958 *vm_page_queues[PQ_ACTIVE].lcnt, 1959 *vm_page_queues[PQ_INACTIVE].lcnt); 1960} 1961#endif /* DDB */ 1962