vm_page.c revision 36735
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 37 * $Id: vm_page.c,v 1.100 1998/06/02 05:50:08 dyson Exp $ 38 */ 39 40/* 41 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 42 * All rights reserved. 43 * 44 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 45 * 46 * Permission to use, copy, modify and distribute this software and 47 * its documentation is hereby granted, provided that both the copyright 48 * notice and this permission notice appear in all copies of the 49 * software, derivative works or modified versions, and any portions 50 * thereof, and that both notices appear in supporting documentation. 51 * 52 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 53 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 54 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 55 * 56 * Carnegie Mellon requests users of this software to return to 57 * 58 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 59 * School of Computer Science 60 * Carnegie Mellon University 61 * Pittsburgh PA 15213-3890 62 * 63 * any improvements or extensions that they make and grant Carnegie the 64 * rights to redistribute these changes. 65 */ 66 67/* 68 * Resident memory management module. 69 */ 70 71#include <sys/param.h> 72#include <sys/systm.h> 73#include <sys/malloc.h> 74#include <sys/proc.h> 75#include <sys/vmmeter.h> 76#include <sys/vnode.h> 77#include <sys/kernel.h> 78 79#include <vm/vm.h> 80#include <vm/vm_param.h> 81#include <vm/vm_prot.h> 82#include <sys/lock.h> 83#include <vm/vm_kern.h> 84#include <vm/vm_object.h> 85#include <vm/vm_page.h> 86#include <vm/vm_pageout.h> 87#include <vm/vm_extern.h> 88 89static void vm_page_queue_init __P((void)); 90static vm_page_t vm_page_select_free __P((vm_object_t object, 91 vm_pindex_t pindex, int prefqueue)); 92static vm_page_t vm_page_select_cache __P((vm_object_t, vm_pindex_t)); 93 94/* 95 * Associated with page of user-allocatable memory is a 96 * page structure. 97 */ 98 99static struct pglist *vm_page_buckets; /* Array of buckets */ 100static int vm_page_bucket_count; /* How big is array? */ 101static int vm_page_hash_mask; /* Mask for hash function */ 102static volatile int vm_page_bucket_generation; 103 104struct pglist vm_page_queue_free[PQ_L2_SIZE] = {0}; 105struct pglist vm_page_queue_zero[PQ_L2_SIZE] = {0}; 106struct pglist vm_page_queue_active = {0}; 107struct pglist vm_page_queue_inactive = {0}; 108struct pglist vm_page_queue_cache[PQ_L2_SIZE] = {0}; 109 110static int no_queue=0; 111 112struct vpgqueues vm_page_queues[PQ_COUNT] = {0}; 113static int pqcnt[PQ_COUNT] = {0}; 114 115static void 116vm_page_queue_init(void) { 117 int i; 118 119 vm_page_queues[PQ_NONE].pl = NULL; 120 vm_page_queues[PQ_NONE].cnt = &no_queue; 121 for(i=0;i<PQ_L2_SIZE;i++) { 122 vm_page_queues[PQ_FREE+i].pl = &vm_page_queue_free[i]; 123 vm_page_queues[PQ_FREE+i].cnt = &cnt.v_free_count; 124 } 125 for(i=0;i<PQ_L2_SIZE;i++) { 126 vm_page_queues[PQ_ZERO+i].pl = &vm_page_queue_zero[i]; 127 vm_page_queues[PQ_ZERO+i].cnt = &cnt.v_free_count; 128 } 129 vm_page_queues[PQ_INACTIVE].pl = &vm_page_queue_inactive; 130 vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count; 131 132 vm_page_queues[PQ_ACTIVE].pl = &vm_page_queue_active; 133 vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count; 134 for(i=0;i<PQ_L2_SIZE;i++) { 135 vm_page_queues[PQ_CACHE+i].pl = &vm_page_queue_cache[i]; 136 vm_page_queues[PQ_CACHE+i].cnt = &cnt.v_cache_count; 137 } 138 for(i=0;i<PQ_COUNT;i++) { 139 if (vm_page_queues[i].pl) { 140 TAILQ_INIT(vm_page_queues[i].pl); 141 } else if (i != 0) { 142 panic("vm_page_queue_init: queue %d is null", i); 143 } 144 vm_page_queues[i].lcnt = &pqcnt[i]; 145 } 146} 147 148vm_page_t vm_page_array = 0; 149static int vm_page_array_size = 0; 150long first_page = 0; 151static long last_page; 152static vm_size_t page_mask; 153static int page_shift; 154int vm_page_zero_count = 0; 155 156/* 157 * map of contiguous valid DEV_BSIZE chunks in a page 158 * (this list is valid for page sizes upto 16*DEV_BSIZE) 159 */ 160static u_short vm_page_dev_bsize_chunks[] = { 161 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 162 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff 163}; 164 165static __inline int vm_page_hash __P((vm_object_t object, vm_pindex_t pindex)); 166static int vm_page_freechk_and_unqueue __P((vm_page_t m)); 167static void vm_page_free_wakeup __P((void)); 168 169/* 170 * vm_set_page_size: 171 * 172 * Sets the page size, perhaps based upon the memory 173 * size. Must be called before any use of page-size 174 * dependent functions. 175 * 176 * Sets page_shift and page_mask from cnt.v_page_size. 177 */ 178void 179vm_set_page_size() 180{ 181 182 if (cnt.v_page_size == 0) 183 cnt.v_page_size = DEFAULT_PAGE_SIZE; 184 page_mask = cnt.v_page_size - 1; 185 if ((page_mask & cnt.v_page_size) != 0) 186 panic("vm_set_page_size: page size not a power of two"); 187 for (page_shift = 0;; page_shift++) 188 if ((1 << page_shift) == cnt.v_page_size) 189 break; 190} 191 192/* 193 * vm_page_startup: 194 * 195 * Initializes the resident memory module. 196 * 197 * Allocates memory for the page cells, and 198 * for the object/offset-to-page hash table headers. 199 * Each page cell is initialized and placed on the free list. 200 */ 201 202vm_offset_t 203vm_page_startup(starta, enda, vaddr) 204 register vm_offset_t starta; 205 vm_offset_t enda; 206 register vm_offset_t vaddr; 207{ 208 register vm_offset_t mapped; 209 register vm_page_t m; 210 register struct pglist *bucket; 211 vm_size_t npages, page_range; 212 register vm_offset_t new_start; 213 int i; 214 vm_offset_t pa; 215 int nblocks; 216 vm_offset_t first_managed_page; 217 218 /* the biggest memory array is the second group of pages */ 219 vm_offset_t start; 220 vm_offset_t biggestone, biggestsize; 221 222 vm_offset_t total; 223 224 total = 0; 225 biggestsize = 0; 226 biggestone = 0; 227 nblocks = 0; 228 vaddr = round_page(vaddr); 229 230 for (i = 0; phys_avail[i + 1]; i += 2) { 231 phys_avail[i] = round_page(phys_avail[i]); 232 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 233 } 234 235 for (i = 0; phys_avail[i + 1]; i += 2) { 236 int size = phys_avail[i + 1] - phys_avail[i]; 237 238 if (size > biggestsize) { 239 biggestone = i; 240 biggestsize = size; 241 } 242 ++nblocks; 243 total += size; 244 } 245 246 start = phys_avail[biggestone]; 247 248 /* 249 * Initialize the queue headers for the free queue, the active queue 250 * and the inactive queue. 251 */ 252 253 vm_page_queue_init(); 254 255 /* 256 * Allocate (and initialize) the hash table buckets. 257 * 258 * The number of buckets MUST BE a power of 2, and the actual value is 259 * the next power of 2 greater than the number of physical pages in 260 * the system. 261 * 262 * Note: This computation can be tweaked if desired. 263 */ 264 vm_page_buckets = (struct pglist *) vaddr; 265 bucket = vm_page_buckets; 266 if (vm_page_bucket_count == 0) { 267 vm_page_bucket_count = 1; 268 while (vm_page_bucket_count < atop(total)) 269 vm_page_bucket_count <<= 1; 270 } 271 vm_page_hash_mask = vm_page_bucket_count - 1; 272 273 /* 274 * Validate these addresses. 275 */ 276 277 new_start = start + vm_page_bucket_count * sizeof(struct pglist); 278 new_start = round_page(new_start); 279 mapped = round_page(vaddr); 280 vaddr = pmap_map(mapped, start, new_start, 281 VM_PROT_READ | VM_PROT_WRITE); 282 start = new_start; 283 vaddr = round_page(vaddr); 284 bzero((caddr_t) mapped, vaddr - mapped); 285 286 for (i = 0; i < vm_page_bucket_count; i++) { 287 TAILQ_INIT(bucket); 288 bucket++; 289 } 290 291 /* 292 * Compute the number of pages of memory that will be available for 293 * use (taking into account the overhead of a page structure per 294 * page). 295 */ 296 297 first_page = phys_avail[0] / PAGE_SIZE; 298 last_page = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE; 299 300 page_range = last_page - (phys_avail[0] / PAGE_SIZE); 301 npages = (total - (page_range * sizeof(struct vm_page)) - 302 (start - phys_avail[biggestone])) / PAGE_SIZE; 303 304 /* 305 * Initialize the mem entry structures now, and put them in the free 306 * queue. 307 */ 308 vm_page_array = (vm_page_t) vaddr; 309 mapped = vaddr; 310 311 /* 312 * Validate these addresses. 313 */ 314 new_start = round_page(start + page_range * sizeof(struct vm_page)); 315 mapped = pmap_map(mapped, start, new_start, 316 VM_PROT_READ | VM_PROT_WRITE); 317 start = new_start; 318 319 first_managed_page = start / PAGE_SIZE; 320 321 /* 322 * Clear all of the page structures 323 */ 324 bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page)); 325 vm_page_array_size = page_range; 326 327 cnt.v_page_count = 0; 328 cnt.v_free_count = 0; 329 for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) { 330 if (i == biggestone) 331 pa = ptoa(first_managed_page); 332 else 333 pa = phys_avail[i]; 334 while (pa < phys_avail[i + 1] && npages-- > 0) { 335 ++cnt.v_page_count; 336 ++cnt.v_free_count; 337 m = PHYS_TO_VM_PAGE(pa); 338 m->phys_addr = pa; 339 m->flags = 0; 340 m->pc = (pa >> PAGE_SHIFT) & PQ_L2_MASK; 341 m->queue = m->pc + PQ_FREE; 342 TAILQ_INSERT_TAIL(vm_page_queues[m->queue].pl, m, pageq); 343 ++(*vm_page_queues[m->queue].lcnt); 344 pa += PAGE_SIZE; 345 } 346 } 347 return (mapped); 348} 349 350/* 351 * vm_page_hash: 352 * 353 * Distributes the object/offset key pair among hash buckets. 354 * 355 * NOTE: This macro depends on vm_page_bucket_count being a power of 2. 356 */ 357static __inline int 358vm_page_hash(object, pindex) 359 vm_object_t object; 360 vm_pindex_t pindex; 361{ 362 return ((((unsigned long) object) >> 5) + (pindex >> 1)) & vm_page_hash_mask; 363} 364 365/* 366 * vm_page_insert: [ internal use only ] 367 * 368 * Inserts the given mem entry into the object/object-page 369 * table and object list. 370 * 371 * The object and page must be locked, and must be splhigh. 372 */ 373 374void 375vm_page_insert(m, object, pindex) 376 register vm_page_t m; 377 register vm_object_t object; 378 register vm_pindex_t pindex; 379{ 380 register struct pglist *bucket; 381 382#if !defined(MAX_PERF) 383 if (m->flags & PG_TABLED) 384 panic("vm_page_insert: already inserted"); 385#endif 386 387 /* 388 * Record the object/offset pair in this page 389 */ 390 391 m->object = object; 392 m->pindex = pindex; 393 394 /* 395 * Insert it into the object_object/offset hash table 396 */ 397 398 bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; 399 TAILQ_INSERT_TAIL(bucket, m, hashq); 400 vm_page_bucket_generation++; 401 402 /* 403 * Now link into the object's list of backed pages. 404 */ 405 406 TAILQ_INSERT_TAIL(&object->memq, m, listq); 407 m->flags |= PG_TABLED; 408 m->object->page_hint = m; 409 m->object->generation++; 410 411 if (m->wire_count) 412 object->wire_count++; 413 414 if ((m->queue - m->pc) == PQ_CACHE) 415 object->cache_count++; 416 417 /* 418 * And show that the object has one more resident page. 419 */ 420 421 object->resident_page_count++; 422} 423 424/* 425 * vm_page_remove: [ internal use only ] 426 * NOTE: used by device pager as well -wfj 427 * 428 * Removes the given mem entry from the object/offset-page 429 * table and the object page list. 430 * 431 * The object and page must be locked, and at splhigh. 432 */ 433 434void 435vm_page_remove(m) 436 register vm_page_t m; 437{ 438 register struct pglist *bucket; 439 vm_object_t object; 440 441 if (!(m->flags & PG_TABLED)) 442 return; 443 444#if !defined(MAX_PERF) 445 if ((m->flags & PG_BUSY) == 0) { 446 panic("vm_page_remove: page not busy"); 447 } 448#endif 449 450 m->flags &= ~PG_BUSY; 451 if (m->flags & PG_WANTED) { 452 m->flags &= ~PG_WANTED; 453 wakeup(m); 454 } 455 456 object = m->object; 457 if (object->page_hint == m) 458 object->page_hint = NULL; 459 460 if (m->wire_count) 461 object->wire_count--; 462 463 if ((m->queue - m->pc) == PQ_CACHE) 464 object->cache_count--; 465 466 /* 467 * Remove from the object_object/offset hash table 468 */ 469 470 bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)]; 471 TAILQ_REMOVE(bucket, m, hashq); 472 vm_page_bucket_generation++; 473 474 /* 475 * Now remove from the object's list of backed pages. 476 */ 477 478 TAILQ_REMOVE(&object->memq, m, listq); 479 480 /* 481 * And show that the object has one fewer resident page. 482 */ 483 484 object->resident_page_count--; 485 object->generation++; 486 m->object = NULL; 487 488 m->flags &= ~PG_TABLED; 489} 490 491/* 492 * vm_page_lookup: 493 * 494 * Returns the page associated with the object/offset 495 * pair specified; if none is found, NULL is returned. 496 * 497 * The object must be locked. No side effects. 498 */ 499 500vm_page_t 501vm_page_lookup(object, pindex) 502 register vm_object_t object; 503 register vm_pindex_t pindex; 504{ 505 register vm_page_t m; 506 register struct pglist *bucket; 507 int generation; 508 int s; 509 510 /* 511 * Search the hash table for this object/offset pair 512 */ 513 514 if (object->page_hint && (object->page_hint->pindex == pindex) && 515 (object->page_hint->object == object)) 516 return object->page_hint; 517 518retry: 519 generation = vm_page_bucket_generation; 520 bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; 521 for (m = TAILQ_FIRST(bucket); m != NULL; m = TAILQ_NEXT(m,hashq)) { 522 if ((m->object == object) && (m->pindex == pindex)) { 523 if (vm_page_bucket_generation != generation) 524 goto retry; 525 m->object->page_hint = m; 526 return (m); 527 } 528 } 529 if (vm_page_bucket_generation != generation) 530 goto retry; 531 return (NULL); 532} 533 534/* 535 * vm_page_rename: 536 * 537 * Move the given memory entry from its 538 * current object to the specified target object/offset. 539 * 540 * The object must be locked. 541 */ 542void 543vm_page_rename(m, new_object, new_pindex) 544 register vm_page_t m; 545 register vm_object_t new_object; 546 vm_pindex_t new_pindex; 547{ 548 int s; 549 550 s = splvm(); 551 vm_page_remove(m); 552 vm_page_insert(m, new_object, new_pindex); 553 splx(s); 554} 555 556/* 557 * vm_page_unqueue without any wakeup 558 */ 559void 560vm_page_unqueue_nowakeup(m) 561 vm_page_t m; 562{ 563 int queue = m->queue; 564 struct vpgqueues *pq; 565 if (queue != PQ_NONE) { 566 pq = &vm_page_queues[queue]; 567 m->queue = PQ_NONE; 568 TAILQ_REMOVE(pq->pl, m, pageq); 569 (*pq->cnt)--; 570 (*pq->lcnt)--; 571 if ((queue - m->pc) == PQ_CACHE) { 572 if (m->object) 573 m->object->cache_count--; 574 } 575 } 576} 577 578/* 579 * vm_page_unqueue must be called at splhigh(); 580 */ 581void 582vm_page_unqueue(m) 583 vm_page_t m; 584{ 585 int queue = m->queue; 586 struct vpgqueues *pq; 587 if (queue != PQ_NONE) { 588 m->queue = PQ_NONE; 589 pq = &vm_page_queues[queue]; 590 TAILQ_REMOVE(pq->pl, m, pageq); 591 (*pq->cnt)--; 592 (*pq->lcnt)--; 593 if ((queue - m->pc) == PQ_CACHE) { 594 if ((cnt.v_cache_count + cnt.v_free_count) < 595 (cnt.v_free_reserved + cnt.v_cache_min)) 596 pagedaemon_wakeup(); 597 if (m->object) 598 m->object->cache_count--; 599 } 600 } 601} 602 603/* 604 * Find a page on the specified queue with color optimization. 605 */ 606vm_page_t 607vm_page_list_find(basequeue, index) 608 int basequeue, index; 609{ 610#if PQ_L2_SIZE > 1 611 612 int i,j; 613 vm_page_t m; 614 int hindex; 615 struct vpgqueues *pq; 616 617 pq = &vm_page_queues[basequeue]; 618 619 m = TAILQ_FIRST(pq[index].pl); 620 if (m) 621 return m; 622 623 for(j = 0; j < PQ_L1_SIZE; j++) { 624 int ij; 625 for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE; 626 (ij = i + j) > 0; 627 i -= PQ_L1_SIZE) { 628 629 hindex = index + ij; 630 if (hindex >= PQ_L2_SIZE) 631 hindex -= PQ_L2_SIZE; 632 if (m = TAILQ_FIRST(pq[hindex].pl)) 633 return m; 634 635 hindex = index - ij; 636 if (hindex < 0) 637 hindex += PQ_L2_SIZE; 638 if (m = TAILQ_FIRST(pq[hindex].pl)) 639 return m; 640 } 641 } 642 643 hindex = index + PQ_L2_SIZE / 2; 644 if (hindex >= PQ_L2_SIZE) 645 hindex -= PQ_L2_SIZE; 646 m = TAILQ_FIRST(pq[hindex].pl); 647 if (m) 648 return m; 649 650 return NULL; 651#else 652 return TAILQ_FIRST(vm_page_queues[basequeue].pl); 653#endif 654 655} 656 657/* 658 * Find a page on the specified queue with color optimization. 659 */ 660vm_page_t 661vm_page_select(object, pindex, basequeue) 662 vm_object_t object; 663 vm_pindex_t pindex; 664 int basequeue; 665{ 666 667#if PQ_L2_SIZE > 1 668 int index; 669 index = (pindex + object->pg_color) & PQ_L2_MASK; 670 return vm_page_list_find(basequeue, index); 671 672#else 673 return TAILQ_FIRST(vm_page_queues[basequeue].pl); 674#endif 675 676} 677 678/* 679 * Find a page on the cache queue with color optimization. As pages 680 * might be found, but not applicable, they are deactivated. This 681 * keeps us from using potentially busy cached pages. 682 */ 683vm_page_t 684vm_page_select_cache(object, pindex) 685 vm_object_t object; 686 vm_pindex_t pindex; 687{ 688 vm_page_t m; 689 690 while (TRUE) { 691#if PQ_L2_SIZE > 1 692 int index; 693 index = (pindex + object->pg_color) & PQ_L2_MASK; 694 m = vm_page_list_find(PQ_CACHE, index); 695 696#else 697 m = TAILQ_FIRST(vm_page_queues[PQ_CACHE].pl); 698#endif 699 if (m && ((m->flags & PG_BUSY) || m->busy || 700 m->hold_count || m->wire_count)) { 701 vm_page_deactivate(m); 702 continue; 703 } 704 return m; 705 } 706} 707 708/* 709 * Find a free or zero page, with specified preference. 710 */ 711static vm_page_t 712vm_page_select_free(object, pindex, prefqueue) 713 vm_object_t object; 714 vm_pindex_t pindex; 715 int prefqueue; 716{ 717#if PQ_L2_SIZE > 1 718 int i,j; 719 int index, hindex; 720#endif 721 vm_page_t m, mh; 722 int oqueuediff; 723 struct vpgqueues *pq; 724 725 if (prefqueue == PQ_ZERO) 726 oqueuediff = PQ_FREE - PQ_ZERO; 727 else 728 oqueuediff = PQ_ZERO - PQ_FREE; 729 730 if (mh = object->page_hint) { 731 if (mh->pindex == (pindex - 1)) { 732 if ((mh->flags & PG_FICTITIOUS) == 0) { 733 if ((mh < &vm_page_array[cnt.v_page_count-1]) && 734 (mh >= &vm_page_array[0])) { 735 int queue; 736 m = mh + 1; 737 if (VM_PAGE_TO_PHYS(m) == (VM_PAGE_TO_PHYS(mh) + PAGE_SIZE)) { 738 queue = m->queue - m->pc; 739 if (queue == PQ_FREE || queue == PQ_ZERO) { 740 return m; 741 } 742 } 743 } 744 } 745 } 746 } 747 748 pq = &vm_page_queues[prefqueue]; 749 750#if PQ_L2_SIZE > 1 751 752 index = (pindex + object->pg_color) & PQ_L2_MASK; 753 754 if (m = TAILQ_FIRST(pq[index].pl)) 755 return m; 756 if (m = TAILQ_FIRST(pq[index + oqueuediff].pl)) 757 return m; 758 759 for(j = 0; j < PQ_L1_SIZE; j++) { 760 int ij; 761 for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE; 762 (ij = i + j) >= 0; 763 i -= PQ_L1_SIZE) { 764 765 hindex = index + ij; 766 if (hindex >= PQ_L2_SIZE) 767 hindex -= PQ_L2_SIZE; 768 if (m = TAILQ_FIRST(pq[hindex].pl)) 769 return m; 770 if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl)) 771 return m; 772 773 hindex = index - ij; 774 if (hindex < 0) 775 hindex += PQ_L2_SIZE; 776 if (m = TAILQ_FIRST(pq[hindex].pl)) 777 return m; 778 if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl)) 779 return m; 780 } 781 } 782 783 hindex = index + PQ_L2_SIZE / 2; 784 if (hindex >= PQ_L2_SIZE) 785 hindex -= PQ_L2_SIZE; 786 if (m = TAILQ_FIRST(pq[hindex].pl)) 787 return m; 788 if (m = TAILQ_FIRST(pq[hindex+oqueuediff].pl)) 789 return m; 790 791#else 792 if (m = TAILQ_FIRST(pq[0].pl)) 793 return m; 794 else 795 return TAILQ_FIRST(pq[oqueuediff].pl); 796#endif 797 798 return NULL; 799} 800 801/* 802 * vm_page_alloc: 803 * 804 * Allocate and return a memory cell associated 805 * with this VM object/offset pair. 806 * 807 * page_req classes: 808 * VM_ALLOC_NORMAL normal process request 809 * VM_ALLOC_SYSTEM system *really* needs a page 810 * VM_ALLOC_INTERRUPT interrupt time request 811 * VM_ALLOC_ZERO zero page 812 * 813 * Object must be locked. 814 */ 815vm_page_t 816vm_page_alloc(object, pindex, page_req) 817 vm_object_t object; 818 vm_pindex_t pindex; 819 int page_req; 820{ 821 register vm_page_t m; 822 struct vpgqueues *pq; 823 vm_object_t oldobject; 824 int queue, qtype; 825 int s; 826 827#ifdef DIAGNOSTIC 828 m = vm_page_lookup(object, pindex); 829 if (m) 830 panic("vm_page_alloc: page already allocated"); 831#endif 832 833 if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) { 834 page_req = VM_ALLOC_SYSTEM; 835 }; 836 837 s = splvm(); 838 839 switch (page_req) { 840 841 case VM_ALLOC_NORMAL: 842 if (cnt.v_free_count >= cnt.v_free_reserved) { 843 m = vm_page_select_free(object, pindex, PQ_FREE); 844#if defined(DIAGNOSTIC) 845 if (m == NULL) 846 panic("vm_page_alloc(NORMAL): missing page on free queue\n"); 847#endif 848 } else { 849 m = vm_page_select_cache(object, pindex); 850 if (m == NULL) { 851 splx(s); 852#if defined(DIAGNOSTIC) 853 if (cnt.v_cache_count > 0) 854 printf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", cnt.v_cache_count); 855#endif 856 vm_pageout_deficit++; 857 pagedaemon_wakeup(); 858 return (NULL); 859 } 860 } 861 break; 862 863 case VM_ALLOC_ZERO: 864 if (cnt.v_free_count >= cnt.v_free_reserved) { 865 m = vm_page_select_free(object, pindex, PQ_ZERO); 866#if defined(DIAGNOSTIC) 867 if (m == NULL) 868 panic("vm_page_alloc(ZERO): missing page on free queue\n"); 869#endif 870 } else { 871 m = vm_page_select_cache(object, pindex); 872 if (m == NULL) { 873 splx(s); 874#if defined(DIAGNOSTIC) 875 if (cnt.v_cache_count > 0) 876 printf("vm_page_alloc(ZERO): missing pages on cache queue: %d\n", cnt.v_cache_count); 877#endif 878 vm_pageout_deficit++; 879 pagedaemon_wakeup(); 880 return (NULL); 881 } 882 } 883 break; 884 885 case VM_ALLOC_SYSTEM: 886 if ((cnt.v_free_count >= cnt.v_free_reserved) || 887 ((cnt.v_cache_count == 0) && 888 (cnt.v_free_count >= cnt.v_interrupt_free_min))) { 889 m = vm_page_select_free(object, pindex, PQ_FREE); 890#if defined(DIAGNOSTIC) 891 if (m == NULL) 892 panic("vm_page_alloc(SYSTEM): missing page on free queue\n"); 893#endif 894 } else { 895 m = vm_page_select_cache(object, pindex); 896 if (m == NULL) { 897 splx(s); 898#if defined(DIAGNOSTIC) 899 if (cnt.v_cache_count > 0) 900 printf("vm_page_alloc(SYSTEM): missing pages on cache queue: %d\n", cnt.v_cache_count); 901#endif 902 vm_pageout_deficit++; 903 pagedaemon_wakeup(); 904 return (NULL); 905 } 906 } 907 break; 908 909 case VM_ALLOC_INTERRUPT: 910 if (cnt.v_free_count > 0) { 911 m = vm_page_select_free(object, pindex, PQ_FREE); 912#if defined(DIAGNOSTIC) 913 if (m == NULL) 914 panic("vm_page_alloc(INTERRUPT): missing page on free queue\n"); 915#endif 916 } else { 917 splx(s); 918 vm_pageout_deficit++; 919 pagedaemon_wakeup(); 920 return (NULL); 921 } 922 break; 923 924 default: 925 m = NULL; 926#if !defined(MAX_PERF) 927 panic("vm_page_alloc: invalid allocation class"); 928#endif 929 } 930 931 queue = m->queue; 932 qtype = queue - m->pc; 933 if (qtype == PQ_ZERO) 934 vm_page_zero_count--; 935 pq = &vm_page_queues[queue]; 936 TAILQ_REMOVE(pq->pl, m, pageq); 937 (*pq->cnt)--; 938 (*pq->lcnt)--; 939 oldobject = NULL; 940 if (qtype == PQ_ZERO) { 941 m->flags = PG_ZERO | PG_BUSY; 942 } else if (qtype == PQ_CACHE) { 943 oldobject = m->object; 944 m->flags |= PG_BUSY; 945 vm_page_remove(m); 946 m->flags = PG_BUSY; 947 } else { 948 m->flags = PG_BUSY; 949 } 950 m->wire_count = 0; 951 m->hold_count = 0; 952 m->act_count = 0; 953 m->busy = 0; 954 m->valid = 0; 955 m->dirty = 0; 956 m->queue = PQ_NONE; 957 958 /* XXX before splx until vm_page_insert is safe */ 959 vm_page_insert(m, object, pindex); 960 961 /* 962 * Don't wakeup too often - wakeup the pageout daemon when 963 * we would be nearly out of memory. 964 */ 965 if (((cnt.v_free_count + cnt.v_cache_count) < 966 (cnt.v_free_reserved + cnt.v_cache_min)) || 967 (cnt.v_free_count < cnt.v_pageout_free_min)) 968 pagedaemon_wakeup(); 969 970 if ((qtype == PQ_CACHE) && 971 ((page_req == VM_ALLOC_NORMAL) || (page_req == VM_ALLOC_ZERO)) && 972 oldobject && (oldobject->type == OBJT_VNODE) && 973 ((oldobject->flags & OBJ_DEAD) == 0)) { 974 struct vnode *vp; 975 vp = (struct vnode *) oldobject->handle; 976 if (vp && VSHOULDFREE(vp)) { 977 if ((vp->v_flag & (VFREE|VTBFREE|VDOOMED)) == 0) { 978 TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist); 979 vp->v_flag |= VTBFREE; 980 } 981 } 982 } 983 splx(s); 984 985 return (m); 986} 987 988void 989vm_wait() 990{ 991 int s; 992 993 s = splvm(); 994 if (curproc == pageproc) { 995 vm_pageout_pages_needed = 1; 996 tsleep(&vm_pageout_pages_needed, PSWP, "vmwait", 0); 997 } else { 998 if (!vm_pages_needed) { 999 vm_pages_needed++; 1000 wakeup(&vm_pages_needed); 1001 } 1002 tsleep(&cnt.v_free_count, PVM, "vmwait", 0); 1003 } 1004 splx(s); 1005} 1006 1007int 1008vm_page_sleep(vm_page_t m, char *msg, char *busy) { 1009 vm_object_t object = m->object; 1010 int slept = 0; 1011 if ((busy && *busy) || (m->flags & PG_BUSY)) { 1012 int s; 1013 s = splvm(); 1014 if ((busy && *busy) || (m->flags & PG_BUSY)) { 1015 m->flags |= PG_WANTED; 1016 tsleep(m, PVM, msg, 0); 1017 slept = 1; 1018 } 1019 splx(s); 1020 } 1021 return slept; 1022} 1023 1024/* 1025 * vm_page_activate: 1026 * 1027 * Put the specified page on the active list (if appropriate). 1028 * 1029 * The page queues must be locked. 1030 */ 1031void 1032vm_page_activate(m) 1033 register vm_page_t m; 1034{ 1035 int s; 1036 vm_page_t np; 1037 vm_object_t object; 1038 1039 s = splvm(); 1040 if (m->queue != PQ_ACTIVE) { 1041 if ((m->queue - m->pc) == PQ_CACHE) 1042 cnt.v_reactivated++; 1043 1044 vm_page_unqueue(m); 1045 1046 if (m->wire_count == 0) { 1047 m->queue = PQ_ACTIVE; 1048 ++(*vm_page_queues[PQ_ACTIVE].lcnt); 1049 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 1050 if (m->act_count < ACT_INIT) 1051 m->act_count = ACT_INIT; 1052 cnt.v_active_count++; 1053 } 1054 } else { 1055 if (m->act_count < ACT_INIT) 1056 m->act_count = ACT_INIT; 1057 } 1058 1059 splx(s); 1060} 1061 1062/* 1063 * helper routine for vm_page_free and vm_page_free_zero 1064 */ 1065static int 1066vm_page_freechk_and_unqueue(m) 1067 vm_page_t m; 1068{ 1069 vm_object_t oldobject; 1070 1071 oldobject = m->object; 1072 1073#if !defined(MAX_PERF) 1074 if (m->busy || ((m->queue - m->pc) == PQ_FREE) || 1075 (m->hold_count != 0)) { 1076 printf("vm_page_free: pindex(%ld), busy(%d), PG_BUSY(%d), hold(%d)\n", 1077 m->pindex, m->busy, 1078 (m->flags & PG_BUSY) ? 1 : 0, m->hold_count); 1079 if ((m->queue - m->pc) == PQ_FREE) 1080 panic("vm_page_free: freeing free page"); 1081 else 1082 panic("vm_page_free: freeing busy page"); 1083 } 1084#endif 1085 1086 vm_page_unqueue_nowakeup(m); 1087 vm_page_remove(m); 1088 1089 if ((m->flags & PG_FICTITIOUS) != 0) { 1090 return 0; 1091 } 1092 1093 m->valid = 0; 1094 1095 if (m->wire_count != 0) { 1096#if !defined(MAX_PERF) 1097 if (m->wire_count > 1) { 1098 panic("vm_page_free: invalid wire count (%d), pindex: 0x%x", 1099 m->wire_count, m->pindex); 1100 } 1101#endif 1102 m->wire_count = 0; 1103 if (m->object) 1104 m->object->wire_count--; 1105 cnt.v_wire_count--; 1106 } 1107 1108 if (oldobject && (oldobject->type == OBJT_VNODE) && 1109 ((oldobject->flags & OBJ_DEAD) == 0)) { 1110 struct vnode *vp; 1111 vp = (struct vnode *) oldobject->handle; 1112 if (vp && VSHOULDFREE(vp)) { 1113 if ((vp->v_flag & (VTBFREE|VDOOMED|VFREE)) == 0) { 1114 TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist); 1115 vp->v_flag |= VTBFREE; 1116 } 1117 } 1118 } 1119 1120 return 1; 1121} 1122 1123/* 1124 * helper routine for vm_page_free and vm_page_free_zero 1125 */ 1126static __inline void 1127vm_page_free_wakeup() 1128{ 1129 1130/* 1131 * if pageout daemon needs pages, then tell it that there are 1132 * some free. 1133 */ 1134 if (vm_pageout_pages_needed) { 1135 wakeup(&vm_pageout_pages_needed); 1136 vm_pageout_pages_needed = 0; 1137 } 1138 /* 1139 * wakeup processes that are waiting on memory if we hit a 1140 * high water mark. And wakeup scheduler process if we have 1141 * lots of memory. this process will swapin processes. 1142 */ 1143 if (vm_pages_needed && 1144 ((cnt.v_free_count + cnt.v_cache_count) >= cnt.v_free_min)) { 1145 wakeup(&cnt.v_free_count); 1146 vm_pages_needed = 0; 1147 } 1148} 1149 1150/* 1151 * vm_page_free: 1152 * 1153 * Returns the given page to the free list, 1154 * disassociating it with any VM object. 1155 * 1156 * Object and page must be locked prior to entry. 1157 */ 1158void 1159vm_page_free(m) 1160 register vm_page_t m; 1161{ 1162 int s; 1163 struct vpgqueues *pq; 1164 1165 s = splvm(); 1166 1167 cnt.v_tfree++; 1168 1169 if (!vm_page_freechk_and_unqueue(m)) { 1170 splx(s); 1171 return; 1172 } 1173 1174 m->queue = PQ_FREE + m->pc; 1175 pq = &vm_page_queues[m->queue]; 1176 ++(*pq->lcnt); 1177 ++(*pq->cnt); 1178 /* 1179 * If the pageout process is grabbing the page, it is likely 1180 * that the page is NOT in the cache. It is more likely that 1181 * the page will be partially in the cache if it is being 1182 * explicitly freed. 1183 */ 1184 if (curproc == pageproc) { 1185 TAILQ_INSERT_TAIL(pq->pl, m, pageq); 1186 } else { 1187 TAILQ_INSERT_HEAD(pq->pl, m, pageq); 1188 } 1189 1190 vm_page_free_wakeup(); 1191 splx(s); 1192} 1193 1194void 1195vm_page_free_zero(m) 1196 register vm_page_t m; 1197{ 1198 int s; 1199 struct vpgqueues *pq; 1200 1201 s = splvm(); 1202 1203 cnt.v_tfree++; 1204 1205 if (!vm_page_freechk_and_unqueue(m)) { 1206 splx(s); 1207 return; 1208 } 1209 1210 m->queue = PQ_ZERO + m->pc; 1211 pq = &vm_page_queues[m->queue]; 1212 ++(*pq->lcnt); 1213 ++(*pq->cnt); 1214 1215 TAILQ_INSERT_HEAD(pq->pl, m, pageq); 1216 ++vm_page_zero_count; 1217 vm_page_free_wakeup(); 1218 splx(s); 1219} 1220 1221/* 1222 * vm_page_wire: 1223 * 1224 * Mark this page as wired down by yet 1225 * another map, removing it from paging queues 1226 * as necessary. 1227 * 1228 * The page queues must be locked. 1229 */ 1230void 1231vm_page_wire(m) 1232 register vm_page_t m; 1233{ 1234 int s; 1235 1236 if (m->wire_count == 0) { 1237 s = splvm(); 1238 vm_page_unqueue(m); 1239 splx(s); 1240 cnt.v_wire_count++; 1241 if (m->object) 1242 m->object->wire_count++; 1243 } 1244 (*vm_page_queues[PQ_NONE].lcnt)++; 1245 m->wire_count++; 1246 m->flags |= PG_MAPPED; 1247} 1248 1249/* 1250 * vm_page_unwire: 1251 * 1252 * Release one wiring of this page, potentially 1253 * enabling it to be paged again. 1254 * 1255 * The page queues must be locked. 1256 */ 1257void 1258vm_page_unwire(m) 1259 register vm_page_t m; 1260{ 1261 int s; 1262 1263 s = splvm(); 1264 1265 if (m->wire_count > 0) { 1266 m->wire_count--; 1267 if (m->wire_count == 0) { 1268 if (m->object) 1269 m->object->wire_count--; 1270 cnt.v_wire_count--; 1271 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 1272 m->queue = PQ_ACTIVE; 1273 (*vm_page_queues[PQ_ACTIVE].lcnt)++; 1274 cnt.v_active_count++; 1275 } 1276 } else { 1277#if !defined(MAX_PERF) 1278 panic("vm_page_unwire: invalid wire count: %d\n", m->wire_count); 1279#endif 1280 } 1281 splx(s); 1282} 1283 1284 1285/* 1286 * vm_page_deactivate: 1287 * 1288 * Returns the given page to the inactive list, 1289 * indicating that no physical maps have access 1290 * to this page. [Used by the physical mapping system.] 1291 * 1292 * The page queues must be locked. 1293 */ 1294void 1295vm_page_deactivate(m) 1296 register vm_page_t m; 1297{ 1298 int s; 1299 1300 /* 1301 * Only move active pages -- ignore locked or already inactive ones. 1302 * 1303 * XXX: sometimes we get pages which aren't wired down or on any queue - 1304 * we need to put them on the inactive queue also, otherwise we lose 1305 * track of them. Paul Mackerras (paulus@cs.anu.edu.au) 9-Jan-93. 1306 */ 1307 if (m->queue == PQ_INACTIVE) 1308 return; 1309 1310 s = splvm(); 1311 if (m->wire_count == 0) { 1312 if ((m->queue - m->pc) == PQ_CACHE) 1313 cnt.v_reactivated++; 1314 vm_page_unqueue(m); 1315 TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); 1316 m->queue = PQ_INACTIVE; 1317 ++(*vm_page_queues[PQ_INACTIVE].lcnt); 1318 cnt.v_inactive_count++; 1319 } 1320 splx(s); 1321} 1322 1323/* 1324 * vm_page_cache 1325 * 1326 * Put the specified page onto the page cache queue (if appropriate). 1327 */ 1328void 1329vm_page_cache(m) 1330 register vm_page_t m; 1331{ 1332 int s; 1333 1334#if !defined(MAX_PERF) 1335 if ((m->flags & PG_BUSY) || m->busy || m->wire_count) { 1336 printf("vm_page_cache: attempting to cache busy page\n"); 1337 return; 1338 } 1339#endif 1340 if ((m->queue - m->pc) == PQ_CACHE) 1341 return; 1342 1343 vm_page_protect(m, VM_PROT_NONE); 1344#if !defined(MAX_PERF) 1345 if (m->dirty != 0) { 1346 panic("vm_page_cache: caching a dirty page, pindex: %d", m->pindex); 1347 } 1348#endif 1349 s = splvm(); 1350 vm_page_unqueue_nowakeup(m); 1351 m->queue = PQ_CACHE + m->pc; 1352 (*vm_page_queues[m->queue].lcnt)++; 1353 TAILQ_INSERT_TAIL(vm_page_queues[m->queue].pl, m, pageq); 1354 cnt.v_cache_count++; 1355 m->object->cache_count++; 1356 vm_page_free_wakeup(); 1357 splx(s); 1358} 1359 1360/* 1361 * Grab a page, waiting until we are waken up due to the page 1362 * changing state. We keep on waiting, if the page continues 1363 * to be in the object. If the page doesn't exist, allocate it. 1364 */ 1365vm_page_t 1366vm_page_grab(object, pindex, allocflags) 1367 vm_object_t object; 1368 vm_pindex_t pindex; 1369 int allocflags; 1370{ 1371 1372 vm_page_t m; 1373 int s, generation; 1374 1375retrylookup: 1376 if ((m = vm_page_lookup(object, pindex)) != NULL) { 1377 if (m->busy || (m->flags & PG_BUSY)) { 1378 generation = object->generation; 1379 1380 s = splvm(); 1381 while ((object->generation == generation) && 1382 (m->busy || (m->flags & PG_BUSY))) { 1383 m->flags |= PG_WANTED | PG_REFERENCED; 1384 tsleep(m, PVM, "pgrbwt", 0); 1385 if ((allocflags & VM_ALLOC_RETRY) == 0) { 1386 splx(s); 1387 return NULL; 1388 } 1389 } 1390 splx(s); 1391 goto retrylookup; 1392 } else { 1393 m->flags |= PG_BUSY; 1394 return m; 1395 } 1396 } 1397 1398 m = vm_page_alloc(object, pindex, allocflags & ~VM_ALLOC_RETRY); 1399 if (m == NULL) { 1400 VM_WAIT; 1401 if ((allocflags & VM_ALLOC_RETRY) == 0) 1402 return NULL; 1403 goto retrylookup; 1404 } 1405 1406 return m; 1407} 1408 1409/* 1410 * mapping function for valid bits or for dirty bits in 1411 * a page 1412 */ 1413__inline int 1414vm_page_bits(int base, int size) 1415{ 1416 u_short chunk; 1417 1418 if ((base == 0) && (size >= PAGE_SIZE)) 1419 return VM_PAGE_BITS_ALL; 1420 1421 size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 1422 base &= PAGE_MASK; 1423 if (size > PAGE_SIZE - base) { 1424 size = PAGE_SIZE - base; 1425 } 1426 1427 base = base / DEV_BSIZE; 1428 chunk = vm_page_dev_bsize_chunks[size / DEV_BSIZE]; 1429 return (chunk << base) & VM_PAGE_BITS_ALL; 1430} 1431 1432/* 1433 * set a page valid and clean 1434 */ 1435void 1436vm_page_set_validclean(m, base, size) 1437 vm_page_t m; 1438 int base; 1439 int size; 1440{ 1441 int pagebits = vm_page_bits(base, size); 1442 m->valid |= pagebits; 1443 m->dirty &= ~pagebits; 1444 if( base == 0 && size == PAGE_SIZE) 1445 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 1446} 1447 1448/* 1449 * set a page (partially) invalid 1450 */ 1451void 1452vm_page_set_invalid(m, base, size) 1453 vm_page_t m; 1454 int base; 1455 int size; 1456{ 1457 int bits; 1458 1459 m->valid &= ~(bits = vm_page_bits(base, size)); 1460 if (m->valid == 0) 1461 m->dirty &= ~bits; 1462 m->object->generation++; 1463} 1464 1465/* 1466 * is (partial) page valid? 1467 */ 1468int 1469vm_page_is_valid(m, base, size) 1470 vm_page_t m; 1471 int base; 1472 int size; 1473{ 1474 int bits = vm_page_bits(base, size); 1475 1476 if (m->valid && ((m->valid & bits) == bits)) 1477 return 1; 1478 else 1479 return 0; 1480} 1481 1482void 1483vm_page_test_dirty(m) 1484 vm_page_t m; 1485{ 1486 if ((m->dirty != VM_PAGE_BITS_ALL) && 1487 pmap_is_modified(VM_PAGE_TO_PHYS(m))) { 1488 m->dirty = VM_PAGE_BITS_ALL; 1489 } 1490} 1491 1492/* 1493 * This interface is for merging with malloc() someday. 1494 * Even if we never implement compaction so that contiguous allocation 1495 * works after initialization time, malloc()'s data structures are good 1496 * for statistics and for allocations of less than a page. 1497 */ 1498void * 1499contigmalloc1(size, type, flags, low, high, alignment, boundary, map) 1500 unsigned long size; /* should be size_t here and for malloc() */ 1501 struct malloc_type *type; 1502 int flags; 1503 unsigned long low; 1504 unsigned long high; 1505 unsigned long alignment; 1506 unsigned long boundary; 1507 vm_map_t map; 1508{ 1509 int i, s, start; 1510 vm_offset_t addr, phys, tmp_addr; 1511 int pass; 1512 vm_page_t pga = vm_page_array; 1513 1514 size = round_page(size); 1515#if !defined(MAX_PERF) 1516 if (size == 0) 1517 panic("contigmalloc1: size must not be 0"); 1518 if ((alignment & (alignment - 1)) != 0) 1519 panic("contigmalloc1: alignment must be a power of 2"); 1520 if ((boundary & (boundary - 1)) != 0) 1521 panic("contigmalloc1: boundary must be a power of 2"); 1522#endif 1523 1524 start = 0; 1525 for (pass = 0; pass <= 1; pass++) { 1526 s = splvm(); 1527again: 1528 /* 1529 * Find first page in array that is free, within range, aligned, and 1530 * such that the boundary won't be crossed. 1531 */ 1532 for (i = start; i < cnt.v_page_count; i++) { 1533 int pqtype; 1534 phys = VM_PAGE_TO_PHYS(&pga[i]); 1535 pqtype = pga[i].queue - pga[i].pc; 1536 if (((pqtype == PQ_ZERO) || (pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) && 1537 (phys >= low) && (phys < high) && 1538 ((phys & (alignment - 1)) == 0) && 1539 (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0)) 1540 break; 1541 } 1542 1543 /* 1544 * If the above failed or we will exceed the upper bound, fail. 1545 */ 1546 if ((i == cnt.v_page_count) || 1547 ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { 1548 vm_page_t m, next; 1549 1550again1: 1551 for (m = TAILQ_FIRST(&vm_page_queue_inactive); 1552 m != NULL; 1553 m = next) { 1554 1555 if (m->queue != PQ_INACTIVE) { 1556 break; 1557 } 1558 1559 next = TAILQ_NEXT(m, pageq); 1560 if (vm_page_sleep(m, "vpctw0", &m->busy)) 1561 goto again1; 1562 vm_page_test_dirty(m); 1563 if (m->dirty) { 1564 if (m->object->type == OBJT_VNODE) { 1565 vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc); 1566 vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC); 1567 VOP_UNLOCK(m->object->handle, 0, curproc); 1568 goto again1; 1569 } else if (m->object->type == OBJT_SWAP || 1570 m->object->type == OBJT_DEFAULT) { 1571 vm_pageout_flush(&m, 1, 0); 1572 goto again1; 1573 } 1574 } 1575 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) 1576 vm_page_cache(m); 1577 } 1578 1579 for (m = TAILQ_FIRST(&vm_page_queue_active); 1580 m != NULL; 1581 m = next) { 1582 1583 if (m->queue != PQ_ACTIVE) { 1584 break; 1585 } 1586 1587 next = TAILQ_NEXT(m, pageq); 1588 if (vm_page_sleep(m, "vpctw1", &m->busy)) 1589 goto again1; 1590 vm_page_test_dirty(m); 1591 if (m->dirty) { 1592 if (m->object->type == OBJT_VNODE) { 1593 vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc); 1594 vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC); 1595 VOP_UNLOCK(m->object->handle, 0, curproc); 1596 goto again1; 1597 } else if (m->object->type == OBJT_SWAP || 1598 m->object->type == OBJT_DEFAULT) { 1599 vm_pageout_flush(&m, 1, 0); 1600 goto again1; 1601 } 1602 } 1603 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) 1604 vm_page_cache(m); 1605 } 1606 1607 splx(s); 1608 continue; 1609 } 1610 start = i; 1611 1612 /* 1613 * Check successive pages for contiguous and free. 1614 */ 1615 for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { 1616 int pqtype; 1617 pqtype = pga[i].queue - pga[i].pc; 1618 if ((VM_PAGE_TO_PHYS(&pga[i]) != 1619 (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) || 1620 ((pqtype != PQ_ZERO) && (pqtype != PQ_FREE) && (pqtype != PQ_CACHE))) { 1621 start++; 1622 goto again; 1623 } 1624 } 1625 1626 for (i = start; i < (start + size / PAGE_SIZE); i++) { 1627 int pqtype; 1628 vm_page_t m = &pga[i]; 1629 1630 pqtype = m->queue - m->pc; 1631 if (pqtype == PQ_CACHE) { 1632 m->flags |= PG_BUSY; 1633 vm_page_free(m); 1634 } 1635 1636 TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq); 1637 (*vm_page_queues[m->queue].lcnt)--; 1638 cnt.v_free_count--; 1639 m->valid = VM_PAGE_BITS_ALL; 1640 m->flags = 0; 1641 m->dirty = 0; 1642 m->wire_count = 0; 1643 m->busy = 0; 1644 m->queue = PQ_NONE; 1645 m->object = NULL; 1646 vm_page_wire(m); 1647 } 1648 1649 /* 1650 * We've found a contiguous chunk that meets are requirements. 1651 * Allocate kernel VM, unfree and assign the physical pages to it and 1652 * return kernel VM pointer. 1653 */ 1654 tmp_addr = addr = kmem_alloc_pageable(map, size); 1655 if (addr == 0) { 1656 /* 1657 * XXX We almost never run out of kernel virtual 1658 * space, so we don't make the allocated memory 1659 * above available. 1660 */ 1661 splx(s); 1662 return (NULL); 1663 } 1664 1665 for (i = start; i < (start + size / PAGE_SIZE); i++) { 1666 vm_page_t m = &pga[i]; 1667 vm_page_insert(m, kernel_object, 1668 OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS)); 1669 pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m)); 1670 tmp_addr += PAGE_SIZE; 1671 } 1672 1673 splx(s); 1674 return ((void *)addr); 1675 } 1676 return NULL; 1677} 1678 1679void * 1680contigmalloc(size, type, flags, low, high, alignment, boundary) 1681 unsigned long size; /* should be size_t here and for malloc() */ 1682 struct malloc_type *type; 1683 int flags; 1684 unsigned long low; 1685 unsigned long high; 1686 unsigned long alignment; 1687 unsigned long boundary; 1688{ 1689 return contigmalloc1(size, type, flags, low, high, alignment, boundary, 1690 kernel_map); 1691} 1692 1693vm_offset_t 1694vm_page_alloc_contig(size, low, high, alignment) 1695 vm_offset_t size; 1696 vm_offset_t low; 1697 vm_offset_t high; 1698 vm_offset_t alignment; 1699{ 1700 return ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high, 1701 alignment, 0ul, kernel_map)); 1702} 1703 1704#include "opt_ddb.h" 1705#ifdef DDB 1706#include <sys/kernel.h> 1707 1708#include <ddb/ddb.h> 1709 1710DB_SHOW_COMMAND(page, vm_page_print_page_info) 1711{ 1712 db_printf("cnt.v_free_count: %d\n", cnt.v_free_count); 1713 db_printf("cnt.v_cache_count: %d\n", cnt.v_cache_count); 1714 db_printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count); 1715 db_printf("cnt.v_active_count: %d\n", cnt.v_active_count); 1716 db_printf("cnt.v_wire_count: %d\n", cnt.v_wire_count); 1717 db_printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved); 1718 db_printf("cnt.v_free_min: %d\n", cnt.v_free_min); 1719 db_printf("cnt.v_free_target: %d\n", cnt.v_free_target); 1720 db_printf("cnt.v_cache_min: %d\n", cnt.v_cache_min); 1721 db_printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target); 1722} 1723 1724DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) 1725{ 1726 int i; 1727 db_printf("PQ_FREE:"); 1728 for(i=0;i<PQ_L2_SIZE;i++) { 1729 db_printf(" %d", *vm_page_queues[PQ_FREE + i].lcnt); 1730 } 1731 db_printf("\n"); 1732 1733 db_printf("PQ_CACHE:"); 1734 for(i=0;i<PQ_L2_SIZE;i++) { 1735 db_printf(" %d", *vm_page_queues[PQ_CACHE + i].lcnt); 1736 } 1737 db_printf("\n"); 1738 1739 db_printf("PQ_ZERO:"); 1740 for(i=0;i<PQ_L2_SIZE;i++) { 1741 db_printf(" %d", *vm_page_queues[PQ_ZERO + i].lcnt); 1742 } 1743 db_printf("\n"); 1744 1745 db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n", 1746 *vm_page_queues[PQ_ACTIVE].lcnt, 1747 *vm_page_queues[PQ_INACTIVE].lcnt); 1748} 1749#endif /* DDB */ 1750