vm_page.c revision 40700
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 37 * $Id: vm_page.c,v 1.110 1998/10/25 17:44:59 phk Exp $ 38 */ 39 40/* 41 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 42 * All rights reserved. 43 * 44 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 45 * 46 * Permission to use, copy, modify and distribute this software and 47 * its documentation is hereby granted, provided that both the copyright 48 * notice and this permission notice appear in all copies of the 49 * software, derivative works or modified versions, and any portions 50 * thereof, and that both notices appear in supporting documentation. 51 * 52 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 53 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 54 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 55 * 56 * Carnegie Mellon requests users of this software to return to 57 * 58 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 59 * School of Computer Science 60 * Carnegie Mellon University 61 * Pittsburgh PA 15213-3890 62 * 63 * any improvements or extensions that they make and grant Carnegie the 64 * rights to redistribute these changes. 65 */ 66 67/* 68 * Resident memory management module. 69 */ 70 71#include <sys/param.h> 72#include <sys/systm.h> 73#include <sys/malloc.h> 74#include <sys/proc.h> 75#include <sys/vmmeter.h> 76#include <sys/vnode.h> 77 78#include <vm/vm.h> 79#include <vm/vm_param.h> 80#include <vm/vm_prot.h> 81#include <sys/lock.h> 82#include <vm/vm_kern.h> 83#include <vm/vm_object.h> 84#include <vm/vm_page.h> 85#include <vm/vm_pageout.h> 86#include <vm/vm_extern.h> 87 88static void vm_page_queue_init __P((void)); 89static vm_page_t vm_page_select_free __P((vm_object_t object, 90 vm_pindex_t pindex, int prefqueue)); 91static vm_page_t vm_page_select_cache __P((vm_object_t, vm_pindex_t)); 92 93/* 94 * Associated with page of user-allocatable memory is a 95 * page structure. 96 */ 97 98static struct pglist *vm_page_buckets; /* Array of buckets */ 99static int vm_page_bucket_count; /* How big is array? */ 100static int vm_page_hash_mask; /* Mask for hash function */ 101static volatile int vm_page_bucket_generation; 102 103struct pglist vm_page_queue_free[PQ_L2_SIZE] = {0}; 104struct pglist vm_page_queue_zero[PQ_L2_SIZE] = {0}; 105struct pglist vm_page_queue_active = {0}; 106struct pglist vm_page_queue_inactive = {0}; 107struct pglist vm_page_queue_cache[PQ_L2_SIZE] = {0}; 108 109static int no_queue=0; 110 111struct vpgqueues vm_page_queues[PQ_COUNT] = {0}; 112static int pqcnt[PQ_COUNT] = {0}; 113 114static void 115vm_page_queue_init(void) { 116 int i; 117 118 vm_page_queues[PQ_NONE].pl = NULL; 119 vm_page_queues[PQ_NONE].cnt = &no_queue; 120 for(i=0;i<PQ_L2_SIZE;i++) { 121 vm_page_queues[PQ_FREE+i].pl = &vm_page_queue_free[i]; 122 vm_page_queues[PQ_FREE+i].cnt = &cnt.v_free_count; 123 } 124 for(i=0;i<PQ_L2_SIZE;i++) { 125 vm_page_queues[PQ_ZERO+i].pl = &vm_page_queue_zero[i]; 126 vm_page_queues[PQ_ZERO+i].cnt = &cnt.v_free_count; 127 } 128 vm_page_queues[PQ_INACTIVE].pl = &vm_page_queue_inactive; 129 vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count; 130 131 vm_page_queues[PQ_ACTIVE].pl = &vm_page_queue_active; 132 vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count; 133 for(i=0;i<PQ_L2_SIZE;i++) { 134 vm_page_queues[PQ_CACHE+i].pl = &vm_page_queue_cache[i]; 135 vm_page_queues[PQ_CACHE+i].cnt = &cnt.v_cache_count; 136 } 137 for(i=0;i<PQ_COUNT;i++) { 138 if (vm_page_queues[i].pl) { 139 TAILQ_INIT(vm_page_queues[i].pl); 140 } else if (i != 0) { 141 panic("vm_page_queue_init: queue %d is null", i); 142 } 143 vm_page_queues[i].lcnt = &pqcnt[i]; 144 } 145} 146 147vm_page_t vm_page_array = 0; 148static int vm_page_array_size = 0; 149long first_page = 0; 150static long last_page; 151static vm_size_t page_mask; 152static int page_shift; 153int vm_page_zero_count = 0; 154 155/* 156 * map of contiguous valid DEV_BSIZE chunks in a page 157 * (this list is valid for page sizes upto 16*DEV_BSIZE) 158 */ 159static u_short vm_page_dev_bsize_chunks[] = { 160 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 161 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff 162}; 163 164static __inline int vm_page_hash __P((vm_object_t object, vm_pindex_t pindex)); 165static int vm_page_freechk_and_unqueue __P((vm_page_t m)); 166static void vm_page_free_wakeup __P((void)); 167 168/* 169 * vm_set_page_size: 170 * 171 * Sets the page size, perhaps based upon the memory 172 * size. Must be called before any use of page-size 173 * dependent functions. 174 * 175 * Sets page_shift and page_mask from cnt.v_page_size. 176 */ 177void 178vm_set_page_size() 179{ 180 181 if (cnt.v_page_size == 0) 182 cnt.v_page_size = DEFAULT_PAGE_SIZE; 183 page_mask = cnt.v_page_size - 1; 184 if ((page_mask & cnt.v_page_size) != 0) 185 panic("vm_set_page_size: page size not a power of two"); 186 for (page_shift = 0;; page_shift++) 187 if ((1 << page_shift) == cnt.v_page_size) 188 break; 189} 190 191/* 192 * vm_page_startup: 193 * 194 * Initializes the resident memory module. 195 * 196 * Allocates memory for the page cells, and 197 * for the object/offset-to-page hash table headers. 198 * Each page cell is initialized and placed on the free list. 199 */ 200 201vm_offset_t 202vm_page_startup(starta, enda, vaddr) 203 register vm_offset_t starta; 204 vm_offset_t enda; 205 register vm_offset_t vaddr; 206{ 207 register vm_offset_t mapped; 208 register vm_page_t m; 209 register struct pglist *bucket; 210 vm_size_t npages, page_range; 211 register vm_offset_t new_start; 212 int i; 213 vm_offset_t pa; 214 int nblocks; 215 vm_offset_t first_managed_page; 216 217 /* the biggest memory array is the second group of pages */ 218 vm_offset_t start; 219 vm_offset_t biggestone, biggestsize; 220 221 vm_offset_t total; 222 223 total = 0; 224 biggestsize = 0; 225 biggestone = 0; 226 nblocks = 0; 227 vaddr = round_page(vaddr); 228 229 for (i = 0; phys_avail[i + 1]; i += 2) { 230 phys_avail[i] = round_page(phys_avail[i]); 231 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 232 } 233 234 for (i = 0; phys_avail[i + 1]; i += 2) { 235 int size = phys_avail[i + 1] - phys_avail[i]; 236 237 if (size > biggestsize) { 238 biggestone = i; 239 biggestsize = size; 240 } 241 ++nblocks; 242 total += size; 243 } 244 245 start = phys_avail[biggestone]; 246 247 /* 248 * Initialize the queue headers for the free queue, the active queue 249 * and the inactive queue. 250 */ 251 252 vm_page_queue_init(); 253 254 /* 255 * Allocate (and initialize) the hash table buckets. 256 * 257 * The number of buckets MUST BE a power of 2, and the actual value is 258 * the next power of 2 greater than the number of physical pages in 259 * the system. 260 * 261 * Note: This computation can be tweaked if desired. 262 */ 263 vm_page_buckets = (struct pglist *) vaddr; 264 bucket = vm_page_buckets; 265 if (vm_page_bucket_count == 0) { 266 vm_page_bucket_count = 1; 267 while (vm_page_bucket_count < atop(total)) 268 vm_page_bucket_count <<= 1; 269 } 270 vm_page_hash_mask = vm_page_bucket_count - 1; 271 272 /* 273 * Validate these addresses. 274 */ 275 276 new_start = start + vm_page_bucket_count * sizeof(struct pglist); 277 new_start = round_page(new_start); 278 mapped = round_page(vaddr); 279 vaddr = pmap_map(mapped, start, new_start, 280 VM_PROT_READ | VM_PROT_WRITE); 281 start = new_start; 282 vaddr = round_page(vaddr); 283 bzero((caddr_t) mapped, vaddr - mapped); 284 285 for (i = 0; i < vm_page_bucket_count; i++) { 286 TAILQ_INIT(bucket); 287 bucket++; 288 } 289 290 /* 291 * Compute the number of pages of memory that will be available for 292 * use (taking into account the overhead of a page structure per 293 * page). 294 */ 295 296 first_page = phys_avail[0] / PAGE_SIZE; 297 last_page = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE; 298 299 page_range = last_page - (phys_avail[0] / PAGE_SIZE); 300 npages = (total - (page_range * sizeof(struct vm_page)) - 301 (start - phys_avail[biggestone])) / PAGE_SIZE; 302 303 /* 304 * Initialize the mem entry structures now, and put them in the free 305 * queue. 306 */ 307 vm_page_array = (vm_page_t) vaddr; 308 mapped = vaddr; 309 310 /* 311 * Validate these addresses. 312 */ 313 new_start = round_page(start + page_range * sizeof(struct vm_page)); 314 mapped = pmap_map(mapped, start, new_start, 315 VM_PROT_READ | VM_PROT_WRITE); 316 start = new_start; 317 318 first_managed_page = start / PAGE_SIZE; 319 320 /* 321 * Clear all of the page structures 322 */ 323 bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page)); 324 vm_page_array_size = page_range; 325 326 cnt.v_page_count = 0; 327 cnt.v_free_count = 0; 328 for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) { 329 if (i == biggestone) 330 pa = ptoa(first_managed_page); 331 else 332 pa = phys_avail[i]; 333 while (pa < phys_avail[i + 1] && npages-- > 0) { 334 ++cnt.v_page_count; 335 ++cnt.v_free_count; 336 m = PHYS_TO_VM_PAGE(pa); 337 m->phys_addr = pa; 338 m->flags = 0; 339 m->pc = (pa >> PAGE_SHIFT) & PQ_L2_MASK; 340 m->queue = m->pc + PQ_FREE; 341 TAILQ_INSERT_TAIL(vm_page_queues[m->queue].pl, m, pageq); 342 ++(*vm_page_queues[m->queue].lcnt); 343 pa += PAGE_SIZE; 344 } 345 } 346 return (mapped); 347} 348 349/* 350 * vm_page_hash: 351 * 352 * Distributes the object/offset key pair among hash buckets. 353 * 354 * NOTE: This macro depends on vm_page_bucket_count being a power of 2. 355 */ 356static __inline int 357vm_page_hash(object, pindex) 358 vm_object_t object; 359 vm_pindex_t pindex; 360{ 361 return ((((uintptr_t) object) >> 5) + (pindex >> 1)) & vm_page_hash_mask; 362} 363 364/* 365 * vm_page_insert: [ internal use only ] 366 * 367 * Inserts the given mem entry into the object/object-page 368 * table and object list. 369 * 370 * The object and page must be locked, and must be splhigh. 371 */ 372 373void 374vm_page_insert(m, object, pindex) 375 register vm_page_t m; 376 register vm_object_t object; 377 register vm_pindex_t pindex; 378{ 379 register struct pglist *bucket; 380 381 if (m->object != NULL) 382 panic("vm_page_insert: already inserted"); 383 384 /* 385 * Record the object/offset pair in this page 386 */ 387 388 m->object = object; 389 m->pindex = pindex; 390 391 /* 392 * Insert it into the object_object/offset hash table 393 */ 394 395 bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; 396 TAILQ_INSERT_TAIL(bucket, m, hashq); 397 vm_page_bucket_generation++; 398 399 /* 400 * Now link into the object's list of backed pages. 401 */ 402 403 TAILQ_INSERT_TAIL(&object->memq, m, listq); 404 m->object->page_hint = m; 405 m->object->generation++; 406 407 if (m->wire_count) 408 object->wire_count++; 409 410 if ((m->queue - m->pc) == PQ_CACHE) 411 object->cache_count++; 412 413 /* 414 * And show that the object has one more resident page. 415 */ 416 417 object->resident_page_count++; 418} 419 420/* 421 * vm_page_remove: [ internal use only ] 422 * NOTE: used by device pager as well -wfj 423 * 424 * Removes the given mem entry from the object/offset-page 425 * table and the object page list. 426 * 427 * The object and page must be locked, and at splhigh. 428 */ 429 430void 431vm_page_remove(m) 432 register vm_page_t m; 433{ 434 register struct pglist *bucket; 435 vm_object_t object; 436 437 if (m->object == NULL) 438 return; 439 440#if !defined(MAX_PERF) 441 if ((m->flags & PG_BUSY) == 0) { 442 panic("vm_page_remove: page not busy"); 443 } 444#endif 445 446 vm_page_flag_clear(m, PG_BUSY); 447 if (m->flags & PG_WANTED) { 448 vm_page_flag_clear(m, PG_WANTED); 449 wakeup(m); 450 } 451 452 object = m->object; 453 if (object->page_hint == m) 454 object->page_hint = NULL; 455 456 if (m->wire_count) 457 object->wire_count--; 458 459 if ((m->queue - m->pc) == PQ_CACHE) 460 object->cache_count--; 461 462 /* 463 * Remove from the object_object/offset hash table 464 */ 465 466 bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)]; 467 TAILQ_REMOVE(bucket, m, hashq); 468 vm_page_bucket_generation++; 469 470 /* 471 * Now remove from the object's list of backed pages. 472 */ 473 474 TAILQ_REMOVE(&object->memq, m, listq); 475 476 /* 477 * And show that the object has one fewer resident page. 478 */ 479 480 object->resident_page_count--; 481 object->generation++; 482 483 m->object = NULL; 484} 485 486/* 487 * vm_page_lookup: 488 * 489 * Returns the page associated with the object/offset 490 * pair specified; if none is found, NULL is returned. 491 * 492 * The object must be locked. No side effects. 493 */ 494 495vm_page_t 496vm_page_lookup(object, pindex) 497 register vm_object_t object; 498 register vm_pindex_t pindex; 499{ 500 register vm_page_t m; 501 register struct pglist *bucket; 502 int generation; 503 504 /* 505 * Search the hash table for this object/offset pair 506 */ 507 508 if (object->page_hint && (object->page_hint->pindex == pindex) && 509 (object->page_hint->object == object)) 510 return object->page_hint; 511 512retry: 513 generation = vm_page_bucket_generation; 514 bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; 515 for (m = TAILQ_FIRST(bucket); m != NULL; m = TAILQ_NEXT(m,hashq)) { 516 if ((m->object == object) && (m->pindex == pindex)) { 517 if (vm_page_bucket_generation != generation) 518 goto retry; 519 m->object->page_hint = m; 520 return (m); 521 } 522 } 523 if (vm_page_bucket_generation != generation) 524 goto retry; 525 return (NULL); 526} 527 528/* 529 * vm_page_rename: 530 * 531 * Move the given memory entry from its 532 * current object to the specified target object/offset. 533 * 534 * The object must be locked. 535 */ 536void 537vm_page_rename(m, new_object, new_pindex) 538 register vm_page_t m; 539 register vm_object_t new_object; 540 vm_pindex_t new_pindex; 541{ 542 int s; 543 544 s = splvm(); 545 vm_page_remove(m); 546 vm_page_insert(m, new_object, new_pindex); 547 splx(s); 548} 549 550/* 551 * vm_page_unqueue without any wakeup 552 */ 553void 554vm_page_unqueue_nowakeup(m) 555 vm_page_t m; 556{ 557 int queue = m->queue; 558 struct vpgqueues *pq; 559 if (queue != PQ_NONE) { 560 pq = &vm_page_queues[queue]; 561 m->queue = PQ_NONE; 562 TAILQ_REMOVE(pq->pl, m, pageq); 563 (*pq->cnt)--; 564 (*pq->lcnt)--; 565 if ((queue - m->pc) == PQ_CACHE) { 566 if (m->object) 567 m->object->cache_count--; 568 } 569 } 570} 571 572/* 573 * vm_page_unqueue must be called at splhigh(); 574 */ 575void 576vm_page_unqueue(m) 577 vm_page_t m; 578{ 579 int queue = m->queue; 580 struct vpgqueues *pq; 581 if (queue != PQ_NONE) { 582 m->queue = PQ_NONE; 583 pq = &vm_page_queues[queue]; 584 TAILQ_REMOVE(pq->pl, m, pageq); 585 (*pq->cnt)--; 586 (*pq->lcnt)--; 587 if ((queue - m->pc) == PQ_CACHE) { 588 if ((cnt.v_cache_count + cnt.v_free_count) < 589 (cnt.v_free_reserved + cnt.v_cache_min)) 590 pagedaemon_wakeup(); 591 if (m->object) 592 m->object->cache_count--; 593 } 594 } 595} 596 597/* 598 * Find a page on the specified queue with color optimization. 599 */ 600vm_page_t 601vm_page_list_find(basequeue, index) 602 int basequeue, index; 603{ 604#if PQ_L2_SIZE > 1 605 606 int i,j; 607 vm_page_t m; 608 int hindex; 609 struct vpgqueues *pq; 610 611 pq = &vm_page_queues[basequeue]; 612 613 m = TAILQ_FIRST(pq[index].pl); 614 if (m) 615 return m; 616 617 for(j = 0; j < PQ_L1_SIZE; j++) { 618 int ij; 619 for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE; 620 (ij = i + j) > 0; 621 i -= PQ_L1_SIZE) { 622 623 hindex = index + ij; 624 if (hindex >= PQ_L2_SIZE) 625 hindex -= PQ_L2_SIZE; 626 if (m = TAILQ_FIRST(pq[hindex].pl)) 627 return m; 628 629 hindex = index - ij; 630 if (hindex < 0) 631 hindex += PQ_L2_SIZE; 632 if (m = TAILQ_FIRST(pq[hindex].pl)) 633 return m; 634 } 635 } 636 637 hindex = index + PQ_L2_SIZE / 2; 638 if (hindex >= PQ_L2_SIZE) 639 hindex -= PQ_L2_SIZE; 640 m = TAILQ_FIRST(pq[hindex].pl); 641 if (m) 642 return m; 643 644 return NULL; 645#else 646 return TAILQ_FIRST(vm_page_queues[basequeue].pl); 647#endif 648 649} 650 651/* 652 * Find a page on the specified queue with color optimization. 653 */ 654vm_page_t 655vm_page_select(object, pindex, basequeue) 656 vm_object_t object; 657 vm_pindex_t pindex; 658 int basequeue; 659{ 660 661#if PQ_L2_SIZE > 1 662 int index; 663 index = (pindex + object->pg_color) & PQ_L2_MASK; 664 return vm_page_list_find(basequeue, index); 665 666#else 667 return TAILQ_FIRST(vm_page_queues[basequeue].pl); 668#endif 669 670} 671 672/* 673 * Find a page on the cache queue with color optimization. As pages 674 * might be found, but not applicable, they are deactivated. This 675 * keeps us from using potentially busy cached pages. 676 */ 677vm_page_t 678vm_page_select_cache(object, pindex) 679 vm_object_t object; 680 vm_pindex_t pindex; 681{ 682 vm_page_t m; 683 684 while (TRUE) { 685#if PQ_L2_SIZE > 1 686 int index; 687 index = (pindex + object->pg_color) & PQ_L2_MASK; 688 m = vm_page_list_find(PQ_CACHE, index); 689 690#else 691 m = TAILQ_FIRST(vm_page_queues[PQ_CACHE].pl); 692#endif 693 if (m && ((m->flags & PG_BUSY) || m->busy || 694 m->hold_count || m->wire_count)) { 695 vm_page_deactivate(m); 696 continue; 697 } 698 return m; 699 } 700} 701 702/* 703 * Find a free or zero page, with specified preference. 704 */ 705static vm_page_t 706vm_page_select_free(object, pindex, prefqueue) 707 vm_object_t object; 708 vm_pindex_t pindex; 709 int prefqueue; 710{ 711#if PQ_L2_SIZE > 1 712 int i,j; 713 int index, hindex; 714#endif 715 vm_page_t m, mh; 716 int oqueuediff; 717 struct vpgqueues *pq; 718 719 if (prefqueue == PQ_ZERO) 720 oqueuediff = PQ_FREE - PQ_ZERO; 721 else 722 oqueuediff = PQ_ZERO - PQ_FREE; 723 724 if (mh = object->page_hint) { 725 if (mh->pindex == (pindex - 1)) { 726 if ((mh->flags & PG_FICTITIOUS) == 0) { 727 if ((mh < &vm_page_array[cnt.v_page_count-1]) && 728 (mh >= &vm_page_array[0])) { 729 int queue; 730 m = mh + 1; 731 if (VM_PAGE_TO_PHYS(m) == (VM_PAGE_TO_PHYS(mh) + PAGE_SIZE)) { 732 queue = m->queue - m->pc; 733 if (queue == PQ_FREE || queue == PQ_ZERO) { 734 return m; 735 } 736 } 737 } 738 } 739 } 740 } 741 742 pq = &vm_page_queues[prefqueue]; 743 744#if PQ_L2_SIZE > 1 745 746 index = (pindex + object->pg_color) & PQ_L2_MASK; 747 748 if (m = TAILQ_FIRST(pq[index].pl)) 749 return m; 750 if (m = TAILQ_FIRST(pq[index + oqueuediff].pl)) 751 return m; 752 753 for(j = 0; j < PQ_L1_SIZE; j++) { 754 int ij; 755 for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE; 756 (ij = i + j) >= 0; 757 i -= PQ_L1_SIZE) { 758 759 hindex = index + ij; 760 if (hindex >= PQ_L2_SIZE) 761 hindex -= PQ_L2_SIZE; 762 if (m = TAILQ_FIRST(pq[hindex].pl)) 763 return m; 764 if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl)) 765 return m; 766 767 hindex = index - ij; 768 if (hindex < 0) 769 hindex += PQ_L2_SIZE; 770 if (m = TAILQ_FIRST(pq[hindex].pl)) 771 return m; 772 if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl)) 773 return m; 774 } 775 } 776 777 hindex = index + PQ_L2_SIZE / 2; 778 if (hindex >= PQ_L2_SIZE) 779 hindex -= PQ_L2_SIZE; 780 if (m = TAILQ_FIRST(pq[hindex].pl)) 781 return m; 782 if (m = TAILQ_FIRST(pq[hindex+oqueuediff].pl)) 783 return m; 784 785#else 786 if (m = TAILQ_FIRST(pq[0].pl)) 787 return m; 788 else 789 return TAILQ_FIRST(pq[oqueuediff].pl); 790#endif 791 792 return NULL; 793} 794 795/* 796 * vm_page_alloc: 797 * 798 * Allocate and return a memory cell associated 799 * with this VM object/offset pair. 800 * 801 * page_req classes: 802 * VM_ALLOC_NORMAL normal process request 803 * VM_ALLOC_SYSTEM system *really* needs a page 804 * VM_ALLOC_INTERRUPT interrupt time request 805 * VM_ALLOC_ZERO zero page 806 * 807 * Object must be locked. 808 */ 809vm_page_t 810vm_page_alloc(object, pindex, page_req) 811 vm_object_t object; 812 vm_pindex_t pindex; 813 int page_req; 814{ 815 register vm_page_t m; 816 struct vpgqueues *pq; 817 vm_object_t oldobject; 818 int queue, qtype; 819 int s; 820 821#ifdef DIAGNOSTIC 822 m = vm_page_lookup(object, pindex); 823 if (m) 824 panic("vm_page_alloc: page already allocated"); 825#endif 826 827 if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) { 828 page_req = VM_ALLOC_SYSTEM; 829 }; 830 831 s = splvm(); 832 833 switch (page_req) { 834 835 case VM_ALLOC_NORMAL: 836 if (cnt.v_free_count >= cnt.v_free_reserved) { 837 m = vm_page_select_free(object, pindex, PQ_FREE); 838#if defined(DIAGNOSTIC) 839 if (m == NULL) 840 panic("vm_page_alloc(NORMAL): missing page on free queue\n"); 841#endif 842 } else { 843 m = vm_page_select_cache(object, pindex); 844 if (m == NULL) { 845 splx(s); 846#if defined(DIAGNOSTIC) 847 if (cnt.v_cache_count > 0) 848 printf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", cnt.v_cache_count); 849#endif 850 vm_pageout_deficit++; 851 pagedaemon_wakeup(); 852 return (NULL); 853 } 854 } 855 break; 856 857 case VM_ALLOC_ZERO: 858 if (cnt.v_free_count >= cnt.v_free_reserved) { 859 m = vm_page_select_free(object, pindex, PQ_ZERO); 860#if defined(DIAGNOSTIC) 861 if (m == NULL) 862 panic("vm_page_alloc(ZERO): missing page on free queue\n"); 863#endif 864 } else { 865 m = vm_page_select_cache(object, pindex); 866 if (m == NULL) { 867 splx(s); 868#if defined(DIAGNOSTIC) 869 if (cnt.v_cache_count > 0) 870 printf("vm_page_alloc(ZERO): missing pages on cache queue: %d\n", cnt.v_cache_count); 871#endif 872 vm_pageout_deficit++; 873 pagedaemon_wakeup(); 874 return (NULL); 875 } 876 } 877 break; 878 879 case VM_ALLOC_SYSTEM: 880 if ((cnt.v_free_count >= cnt.v_free_reserved) || 881 ((cnt.v_cache_count == 0) && 882 (cnt.v_free_count >= cnt.v_interrupt_free_min))) { 883 m = vm_page_select_free(object, pindex, PQ_FREE); 884#if defined(DIAGNOSTIC) 885 if (m == NULL) 886 panic("vm_page_alloc(SYSTEM): missing page on free queue\n"); 887#endif 888 } else { 889 m = vm_page_select_cache(object, pindex); 890 if (m == NULL) { 891 splx(s); 892#if defined(DIAGNOSTIC) 893 if (cnt.v_cache_count > 0) 894 printf("vm_page_alloc(SYSTEM): missing pages on cache queue: %d\n", cnt.v_cache_count); 895#endif 896 vm_pageout_deficit++; 897 pagedaemon_wakeup(); 898 return (NULL); 899 } 900 } 901 break; 902 903 case VM_ALLOC_INTERRUPT: 904 if (cnt.v_free_count > 0) { 905 m = vm_page_select_free(object, pindex, PQ_FREE); 906#if defined(DIAGNOSTIC) 907 if (m == NULL) 908 panic("vm_page_alloc(INTERRUPT): missing page on free queue\n"); 909#endif 910 } else { 911 splx(s); 912 vm_pageout_deficit++; 913 pagedaemon_wakeup(); 914 return (NULL); 915 } 916 break; 917 918 default: 919 m = NULL; 920#if !defined(MAX_PERF) 921 panic("vm_page_alloc: invalid allocation class"); 922#endif 923 } 924 925 queue = m->queue; 926 qtype = queue - m->pc; 927 if (qtype == PQ_ZERO) 928 vm_page_zero_count--; 929 pq = &vm_page_queues[queue]; 930 TAILQ_REMOVE(pq->pl, m, pageq); 931 (*pq->cnt)--; 932 (*pq->lcnt)--; 933 oldobject = NULL; 934 if (qtype == PQ_ZERO) { 935 m->flags = PG_ZERO | PG_BUSY; 936 } else if (qtype == PQ_CACHE) { 937 oldobject = m->object; 938 vm_page_busy(m); 939 vm_page_remove(m); 940 m->flags = PG_BUSY; 941 } else { 942 m->flags = PG_BUSY; 943 } 944 m->wire_count = 0; 945 m->hold_count = 0; 946 m->act_count = 0; 947 m->busy = 0; 948 m->valid = 0; 949 m->dirty = 0; 950 m->queue = PQ_NONE; 951 952 /* XXX before splx until vm_page_insert is safe */ 953 vm_page_insert(m, object, pindex); 954 955 /* 956 * Don't wakeup too often - wakeup the pageout daemon when 957 * we would be nearly out of memory. 958 */ 959 if (((cnt.v_free_count + cnt.v_cache_count) < 960 (cnt.v_free_reserved + cnt.v_cache_min)) || 961 (cnt.v_free_count < cnt.v_pageout_free_min)) 962 pagedaemon_wakeup(); 963 964 if ((qtype == PQ_CACHE) && 965 ((page_req == VM_ALLOC_NORMAL) || (page_req == VM_ALLOC_ZERO)) && 966 oldobject && (oldobject->type == OBJT_VNODE) && 967 ((oldobject->flags & OBJ_DEAD) == 0)) { 968 struct vnode *vp; 969 vp = (struct vnode *) oldobject->handle; 970 if (vp && VSHOULDFREE(vp)) { 971 if ((vp->v_flag & (VFREE|VTBFREE|VDOOMED)) == 0) { 972 TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist); 973 vp->v_flag |= VTBFREE; 974 } 975 } 976 } 977 splx(s); 978 979 return (m); 980} 981 982void 983vm_wait() 984{ 985 int s; 986 987 s = splvm(); 988 if (curproc == pageproc) { 989 vm_pageout_pages_needed = 1; 990 tsleep(&vm_pageout_pages_needed, PSWP, "vmwait", 0); 991 } else { 992 if (!vm_pages_needed) { 993 vm_pages_needed++; 994 wakeup(&vm_pages_needed); 995 } 996 tsleep(&cnt.v_free_count, PVM, "vmwait", 0); 997 } 998 splx(s); 999} 1000 1001int 1002vm_page_sleep(vm_page_t m, char *msg, char *busy) { 1003 int slept = 0; 1004 if ((busy && *busy) || (m->flags & PG_BUSY)) { 1005 int s; 1006 s = splvm(); 1007 if ((busy && *busy) || (m->flags & PG_BUSY)) { 1008 vm_page_flag_set(m, PG_WANTED); 1009 tsleep(m, PVM, msg, 0); 1010 slept = 1; 1011 } 1012 splx(s); 1013 } 1014 return slept; 1015} 1016 1017/* 1018 * vm_page_activate: 1019 * 1020 * Put the specified page on the active list (if appropriate). 1021 * 1022 * The page queues must be locked. 1023 */ 1024void 1025vm_page_activate(m) 1026 register vm_page_t m; 1027{ 1028 int s; 1029 1030 s = splvm(); 1031 if (m->queue != PQ_ACTIVE) { 1032 if ((m->queue - m->pc) == PQ_CACHE) 1033 cnt.v_reactivated++; 1034 1035 vm_page_unqueue(m); 1036 1037 if (m->wire_count == 0) { 1038 m->queue = PQ_ACTIVE; 1039 ++(*vm_page_queues[PQ_ACTIVE].lcnt); 1040 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 1041 if (m->act_count < ACT_INIT) 1042 m->act_count = ACT_INIT; 1043 cnt.v_active_count++; 1044 } 1045 } else { 1046 if (m->act_count < ACT_INIT) 1047 m->act_count = ACT_INIT; 1048 } 1049 1050 splx(s); 1051} 1052 1053/* 1054 * helper routine for vm_page_free and vm_page_free_zero 1055 */ 1056static int 1057vm_page_freechk_and_unqueue(m) 1058 vm_page_t m; 1059{ 1060 vm_object_t oldobject; 1061 1062 oldobject = m->object; 1063 1064#if !defined(MAX_PERF) 1065 if (m->busy || ((m->queue - m->pc) == PQ_FREE) || 1066 (m->hold_count != 0)) { 1067 printf( 1068 "vm_page_free: pindex(%lu), busy(%d), PG_BUSY(%d), hold(%d)\n", 1069 (u_long)m->pindex, m->busy, (m->flags & PG_BUSY) ? 1 : 0, 1070 m->hold_count); 1071 if ((m->queue - m->pc) == PQ_FREE) 1072 panic("vm_page_free: freeing free page"); 1073 else 1074 panic("vm_page_free: freeing busy page"); 1075 } 1076#endif 1077 1078 vm_page_unqueue_nowakeup(m); 1079 vm_page_remove(m); 1080 1081 if ((m->flags & PG_FICTITIOUS) != 0) { 1082 return 0; 1083 } 1084 1085 m->valid = 0; 1086 1087 if (m->wire_count != 0) { 1088#if !defined(MAX_PERF) 1089 if (m->wire_count > 1) { 1090 panic("vm_page_free: invalid wire count (%d), pindex: 0x%x", 1091 m->wire_count, m->pindex); 1092 } 1093#endif 1094 printf("vm_page_free: freeing wired page\n"); 1095 m->wire_count = 0; 1096 if (m->object) 1097 m->object->wire_count--; 1098 cnt.v_wire_count--; 1099 } 1100 1101 if (oldobject && (oldobject->type == OBJT_VNODE) && 1102 ((oldobject->flags & OBJ_DEAD) == 0)) { 1103 struct vnode *vp; 1104 vp = (struct vnode *) oldobject->handle; 1105 if (vp && VSHOULDFREE(vp)) { 1106 if ((vp->v_flag & (VTBFREE|VDOOMED|VFREE)) == 0) { 1107 TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist); 1108 vp->v_flag |= VTBFREE; 1109 } 1110 } 1111 } 1112 1113#ifdef __alpha__ 1114 pmap_page_is_free(m); 1115#endif 1116 1117 return 1; 1118} 1119 1120/* 1121 * helper routine for vm_page_free and vm_page_free_zero 1122 */ 1123static __inline void 1124vm_page_free_wakeup() 1125{ 1126 1127/* 1128 * if pageout daemon needs pages, then tell it that there are 1129 * some free. 1130 */ 1131 if (vm_pageout_pages_needed) { 1132 wakeup(&vm_pageout_pages_needed); 1133 vm_pageout_pages_needed = 0; 1134 } 1135 /* 1136 * wakeup processes that are waiting on memory if we hit a 1137 * high water mark. And wakeup scheduler process if we have 1138 * lots of memory. this process will swapin processes. 1139 */ 1140 if (vm_pages_needed && 1141 ((cnt.v_free_count + cnt.v_cache_count) >= cnt.v_free_min)) { 1142 wakeup(&cnt.v_free_count); 1143 vm_pages_needed = 0; 1144 } 1145} 1146 1147/* 1148 * vm_page_free: 1149 * 1150 * Returns the given page to the free list, 1151 * disassociating it with any VM object. 1152 * 1153 * Object and page must be locked prior to entry. 1154 */ 1155void 1156vm_page_free(m) 1157 register vm_page_t m; 1158{ 1159 int s; 1160 struct vpgqueues *pq; 1161 1162 s = splvm(); 1163 1164 cnt.v_tfree++; 1165 1166 if (!vm_page_freechk_and_unqueue(m)) { 1167 splx(s); 1168 return; 1169 } 1170 1171 m->queue = PQ_FREE + m->pc; 1172 pq = &vm_page_queues[m->queue]; 1173 ++(*pq->lcnt); 1174 ++(*pq->cnt); 1175 /* 1176 * If the pageout process is grabbing the page, it is likely 1177 * that the page is NOT in the cache. It is more likely that 1178 * the page will be partially in the cache if it is being 1179 * explicitly freed. 1180 */ 1181 if (curproc == pageproc) { 1182 TAILQ_INSERT_TAIL(pq->pl, m, pageq); 1183 } else { 1184 TAILQ_INSERT_HEAD(pq->pl, m, pageq); 1185 } 1186 1187 vm_page_free_wakeup(); 1188 splx(s); 1189} 1190 1191void 1192vm_page_free_zero(m) 1193 register vm_page_t m; 1194{ 1195 int s; 1196 struct vpgqueues *pq; 1197 1198 s = splvm(); 1199 1200 cnt.v_tfree++; 1201 1202 if (!vm_page_freechk_and_unqueue(m)) { 1203 splx(s); 1204 return; 1205 } 1206 1207 m->queue = PQ_ZERO + m->pc; 1208 pq = &vm_page_queues[m->queue]; 1209 ++(*pq->lcnt); 1210 ++(*pq->cnt); 1211 1212 TAILQ_INSERT_HEAD(pq->pl, m, pageq); 1213 ++vm_page_zero_count; 1214 vm_page_free_wakeup(); 1215 splx(s); 1216} 1217 1218/* 1219 * vm_page_wire: 1220 * 1221 * Mark this page as wired down by yet 1222 * another map, removing it from paging queues 1223 * as necessary. 1224 * 1225 * The page queues must be locked. 1226 */ 1227void 1228vm_page_wire(m) 1229 register vm_page_t m; 1230{ 1231 int s; 1232 1233 if (m->wire_count == 0) { 1234 s = splvm(); 1235 vm_page_unqueue(m); 1236 splx(s); 1237 cnt.v_wire_count++; 1238 if (m->object) 1239 m->object->wire_count++; 1240 } 1241 (*vm_page_queues[PQ_NONE].lcnt)++; 1242 m->wire_count++; 1243 vm_page_flag_set(m, PG_MAPPED); 1244} 1245 1246/* 1247 * vm_page_unwire: 1248 * 1249 * Release one wiring of this page, potentially 1250 * enabling it to be paged again. 1251 * 1252 * The page queues must be locked. 1253 */ 1254void 1255vm_page_unwire(m, activate) 1256 register vm_page_t m; 1257 int activate; 1258{ 1259 int s; 1260 1261 s = splvm(); 1262 1263 if (m->wire_count > 0) { 1264 m->wire_count--; 1265 if (m->wire_count == 0) { 1266 if (m->object) 1267 m->object->wire_count--; 1268 cnt.v_wire_count--; 1269 if (activate) { 1270 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 1271 m->queue = PQ_ACTIVE; 1272 (*vm_page_queues[PQ_ACTIVE].lcnt)++; 1273 cnt.v_active_count++; 1274 } else { 1275 TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); 1276 m->queue = PQ_INACTIVE; 1277 (*vm_page_queues[PQ_INACTIVE].lcnt)++; 1278 cnt.v_inactive_count++; 1279 } 1280 } 1281 } else { 1282#if !defined(MAX_PERF) 1283 panic("vm_page_unwire: invalid wire count: %d\n", m->wire_count); 1284#endif 1285 } 1286 splx(s); 1287} 1288 1289 1290/* 1291 * vm_page_deactivate: 1292 * 1293 * Returns the given page to the inactive list, 1294 * indicating that no physical maps have access 1295 * to this page. [Used by the physical mapping system.] 1296 * 1297 * The page queues must be locked. 1298 */ 1299void 1300vm_page_deactivate(m) 1301 register vm_page_t m; 1302{ 1303 int s; 1304 1305 /* 1306 * Only move active pages -- ignore locked or already inactive ones. 1307 * 1308 * XXX: sometimes we get pages which aren't wired down or on any queue - 1309 * we need to put them on the inactive queue also, otherwise we lose 1310 * track of them. Paul Mackerras (paulus@cs.anu.edu.au) 9-Jan-93. 1311 */ 1312 if (m->queue == PQ_INACTIVE) 1313 return; 1314 1315 s = splvm(); 1316 if (m->wire_count == 0) { 1317 if ((m->queue - m->pc) == PQ_CACHE) 1318 cnt.v_reactivated++; 1319 vm_page_unqueue(m); 1320 TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); 1321 m->queue = PQ_INACTIVE; 1322 ++(*vm_page_queues[PQ_INACTIVE].lcnt); 1323 cnt.v_inactive_count++; 1324 } 1325 splx(s); 1326} 1327 1328/* 1329 * vm_page_cache 1330 * 1331 * Put the specified page onto the page cache queue (if appropriate). 1332 */ 1333void 1334vm_page_cache(m) 1335 register vm_page_t m; 1336{ 1337 int s; 1338 1339#if !defined(MAX_PERF) 1340 if ((m->flags & PG_BUSY) || m->busy || m->wire_count) { 1341 printf("vm_page_cache: attempting to cache busy page\n"); 1342 return; 1343 } 1344#endif 1345 if ((m->queue - m->pc) == PQ_CACHE) 1346 return; 1347 1348 vm_page_protect(m, VM_PROT_NONE); 1349#if !defined(MAX_PERF) 1350 if (m->dirty != 0) { 1351 panic("vm_page_cache: caching a dirty page, pindex: %d", m->pindex); 1352 } 1353#endif 1354 s = splvm(); 1355 vm_page_unqueue_nowakeup(m); 1356 m->queue = PQ_CACHE + m->pc; 1357 (*vm_page_queues[m->queue].lcnt)++; 1358 TAILQ_INSERT_TAIL(vm_page_queues[m->queue].pl, m, pageq); 1359 cnt.v_cache_count++; 1360 m->object->cache_count++; 1361 vm_page_free_wakeup(); 1362 splx(s); 1363} 1364 1365/* 1366 * Grab a page, waiting until we are waken up due to the page 1367 * changing state. We keep on waiting, if the page continues 1368 * to be in the object. If the page doesn't exist, allocate it. 1369 */ 1370vm_page_t 1371vm_page_grab(object, pindex, allocflags) 1372 vm_object_t object; 1373 vm_pindex_t pindex; 1374 int allocflags; 1375{ 1376 1377 vm_page_t m; 1378 int s, generation; 1379 1380retrylookup: 1381 if ((m = vm_page_lookup(object, pindex)) != NULL) { 1382 if (m->busy || (m->flags & PG_BUSY)) { 1383 generation = object->generation; 1384 1385 s = splvm(); 1386 while ((object->generation == generation) && 1387 (m->busy || (m->flags & PG_BUSY))) { 1388 vm_page_flag_set(m, PG_WANTED | PG_REFERENCED); 1389 tsleep(m, PVM, "pgrbwt", 0); 1390 if ((allocflags & VM_ALLOC_RETRY) == 0) { 1391 splx(s); 1392 return NULL; 1393 } 1394 } 1395 splx(s); 1396 goto retrylookup; 1397 } else { 1398 vm_page_busy(m); 1399 return m; 1400 } 1401 } 1402 1403 m = vm_page_alloc(object, pindex, allocflags & ~VM_ALLOC_RETRY); 1404 if (m == NULL) { 1405 VM_WAIT; 1406 if ((allocflags & VM_ALLOC_RETRY) == 0) 1407 return NULL; 1408 goto retrylookup; 1409 } 1410 1411 return m; 1412} 1413 1414/* 1415 * mapping function for valid bits or for dirty bits in 1416 * a page 1417 */ 1418__inline int 1419vm_page_bits(int base, int size) 1420{ 1421 u_short chunk; 1422 1423 if ((base == 0) && (size >= PAGE_SIZE)) 1424 return VM_PAGE_BITS_ALL; 1425 1426 size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 1427 base &= PAGE_MASK; 1428 if (size > PAGE_SIZE - base) { 1429 size = PAGE_SIZE - base; 1430 } 1431 1432 base = base / DEV_BSIZE; 1433 chunk = vm_page_dev_bsize_chunks[size / DEV_BSIZE]; 1434 return (chunk << base) & VM_PAGE_BITS_ALL; 1435} 1436 1437/* 1438 * set a page valid and clean 1439 */ 1440void 1441vm_page_set_validclean(m, base, size) 1442 vm_page_t m; 1443 int base; 1444 int size; 1445{ 1446 int pagebits = vm_page_bits(base, size); 1447 m->valid |= pagebits; 1448 m->dirty &= ~pagebits; 1449 if( base == 0 && size == PAGE_SIZE) 1450 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 1451} 1452 1453/* 1454 * set a page (partially) invalid 1455 */ 1456void 1457vm_page_set_invalid(m, base, size) 1458 vm_page_t m; 1459 int base; 1460 int size; 1461{ 1462 int bits; 1463 1464 m->valid &= ~(bits = vm_page_bits(base, size)); 1465 if (m->valid == 0) 1466 m->dirty &= ~bits; 1467 m->object->generation++; 1468} 1469 1470/* 1471 * is (partial) page valid? 1472 */ 1473int 1474vm_page_is_valid(m, base, size) 1475 vm_page_t m; 1476 int base; 1477 int size; 1478{ 1479 int bits = vm_page_bits(base, size); 1480 1481 if (m->valid && ((m->valid & bits) == bits)) 1482 return 1; 1483 else 1484 return 0; 1485} 1486 1487void 1488vm_page_test_dirty(m) 1489 vm_page_t m; 1490{ 1491 if ((m->dirty != VM_PAGE_BITS_ALL) && 1492 pmap_is_modified(VM_PAGE_TO_PHYS(m))) { 1493 m->dirty = VM_PAGE_BITS_ALL; 1494 } 1495} 1496 1497/* 1498 * This interface is for merging with malloc() someday. 1499 * Even if we never implement compaction so that contiguous allocation 1500 * works after initialization time, malloc()'s data structures are good 1501 * for statistics and for allocations of less than a page. 1502 */ 1503void * 1504contigmalloc1(size, type, flags, low, high, alignment, boundary, map) 1505 unsigned long size; /* should be size_t here and for malloc() */ 1506 struct malloc_type *type; 1507 int flags; 1508 unsigned long low; 1509 unsigned long high; 1510 unsigned long alignment; 1511 unsigned long boundary; 1512 vm_map_t map; 1513{ 1514 int i, s, start; 1515 vm_offset_t addr, phys, tmp_addr; 1516 int pass; 1517 vm_page_t pga = vm_page_array; 1518 1519 size = round_page(size); 1520#if !defined(MAX_PERF) 1521 if (size == 0) 1522 panic("contigmalloc1: size must not be 0"); 1523 if ((alignment & (alignment - 1)) != 0) 1524 panic("contigmalloc1: alignment must be a power of 2"); 1525 if ((boundary & (boundary - 1)) != 0) 1526 panic("contigmalloc1: boundary must be a power of 2"); 1527#endif 1528 1529 start = 0; 1530 for (pass = 0; pass <= 1; pass++) { 1531 s = splvm(); 1532again: 1533 /* 1534 * Find first page in array that is free, within range, aligned, and 1535 * such that the boundary won't be crossed. 1536 */ 1537 for (i = start; i < cnt.v_page_count; i++) { 1538 int pqtype; 1539 phys = VM_PAGE_TO_PHYS(&pga[i]); 1540 pqtype = pga[i].queue - pga[i].pc; 1541 if (((pqtype == PQ_ZERO) || (pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) && 1542 (phys >= low) && (phys < high) && 1543 ((phys & (alignment - 1)) == 0) && 1544 (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0)) 1545 break; 1546 } 1547 1548 /* 1549 * If the above failed or we will exceed the upper bound, fail. 1550 */ 1551 if ((i == cnt.v_page_count) || 1552 ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { 1553 vm_page_t m, next; 1554 1555again1: 1556 for (m = TAILQ_FIRST(&vm_page_queue_inactive); 1557 m != NULL; 1558 m = next) { 1559 1560 if (m->queue != PQ_INACTIVE) { 1561 break; 1562 } 1563 1564 next = TAILQ_NEXT(m, pageq); 1565 if (vm_page_sleep(m, "vpctw0", &m->busy)) 1566 goto again1; 1567 vm_page_test_dirty(m); 1568 if (m->dirty) { 1569 if (m->object->type == OBJT_VNODE) { 1570 vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc); 1571 vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC); 1572 VOP_UNLOCK(m->object->handle, 0, curproc); 1573 goto again1; 1574 } else if (m->object->type == OBJT_SWAP || 1575 m->object->type == OBJT_DEFAULT) { 1576 vm_pageout_flush(&m, 1, 0); 1577 goto again1; 1578 } 1579 } 1580 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) 1581 vm_page_cache(m); 1582 } 1583 1584 for (m = TAILQ_FIRST(&vm_page_queue_active); 1585 m != NULL; 1586 m = next) { 1587 1588 if (m->queue != PQ_ACTIVE) { 1589 break; 1590 } 1591 1592 next = TAILQ_NEXT(m, pageq); 1593 if (vm_page_sleep(m, "vpctw1", &m->busy)) 1594 goto again1; 1595 vm_page_test_dirty(m); 1596 if (m->dirty) { 1597 if (m->object->type == OBJT_VNODE) { 1598 vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc); 1599 vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC); 1600 VOP_UNLOCK(m->object->handle, 0, curproc); 1601 goto again1; 1602 } else if (m->object->type == OBJT_SWAP || 1603 m->object->type == OBJT_DEFAULT) { 1604 vm_pageout_flush(&m, 1, 0); 1605 goto again1; 1606 } 1607 } 1608 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) 1609 vm_page_cache(m); 1610 } 1611 1612 splx(s); 1613 continue; 1614 } 1615 start = i; 1616 1617 /* 1618 * Check successive pages for contiguous and free. 1619 */ 1620 for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { 1621 int pqtype; 1622 pqtype = pga[i].queue - pga[i].pc; 1623 if ((VM_PAGE_TO_PHYS(&pga[i]) != 1624 (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) || 1625 ((pqtype != PQ_ZERO) && (pqtype != PQ_FREE) && (pqtype != PQ_CACHE))) { 1626 start++; 1627 goto again; 1628 } 1629 } 1630 1631 for (i = start; i < (start + size / PAGE_SIZE); i++) { 1632 int pqtype; 1633 vm_page_t m = &pga[i]; 1634 1635 pqtype = m->queue - m->pc; 1636 if (pqtype == PQ_CACHE) { 1637 vm_page_busy(m); 1638 vm_page_free(m); 1639 } 1640 1641 TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq); 1642 (*vm_page_queues[m->queue].lcnt)--; 1643 cnt.v_free_count--; 1644 m->valid = VM_PAGE_BITS_ALL; 1645 m->flags = 0; 1646 m->dirty = 0; 1647 m->wire_count = 0; 1648 m->busy = 0; 1649 m->queue = PQ_NONE; 1650 m->object = NULL; 1651 vm_page_wire(m); 1652 } 1653 1654 /* 1655 * We've found a contiguous chunk that meets are requirements. 1656 * Allocate kernel VM, unfree and assign the physical pages to it and 1657 * return kernel VM pointer. 1658 */ 1659 tmp_addr = addr = kmem_alloc_pageable(map, size); 1660 if (addr == 0) { 1661 /* 1662 * XXX We almost never run out of kernel virtual 1663 * space, so we don't make the allocated memory 1664 * above available. 1665 */ 1666 splx(s); 1667 return (NULL); 1668 } 1669 1670 for (i = start; i < (start + size / PAGE_SIZE); i++) { 1671 vm_page_t m = &pga[i]; 1672 vm_page_insert(m, kernel_object, 1673 OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS)); 1674 pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m)); 1675 tmp_addr += PAGE_SIZE; 1676 } 1677 1678 splx(s); 1679 return ((void *)addr); 1680 } 1681 return NULL; 1682} 1683 1684void * 1685contigmalloc(size, type, flags, low, high, alignment, boundary) 1686 unsigned long size; /* should be size_t here and for malloc() */ 1687 struct malloc_type *type; 1688 int flags; 1689 unsigned long low; 1690 unsigned long high; 1691 unsigned long alignment; 1692 unsigned long boundary; 1693{ 1694 return contigmalloc1(size, type, flags, low, high, alignment, boundary, 1695 kernel_map); 1696} 1697 1698vm_offset_t 1699vm_page_alloc_contig(size, low, high, alignment) 1700 vm_offset_t size; 1701 vm_offset_t low; 1702 vm_offset_t high; 1703 vm_offset_t alignment; 1704{ 1705 return ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high, 1706 alignment, 0ul, kernel_map)); 1707} 1708 1709#include "opt_ddb.h" 1710#ifdef DDB 1711#include <sys/kernel.h> 1712 1713#include <ddb/ddb.h> 1714 1715DB_SHOW_COMMAND(page, vm_page_print_page_info) 1716{ 1717 db_printf("cnt.v_free_count: %d\n", cnt.v_free_count); 1718 db_printf("cnt.v_cache_count: %d\n", cnt.v_cache_count); 1719 db_printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count); 1720 db_printf("cnt.v_active_count: %d\n", cnt.v_active_count); 1721 db_printf("cnt.v_wire_count: %d\n", cnt.v_wire_count); 1722 db_printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved); 1723 db_printf("cnt.v_free_min: %d\n", cnt.v_free_min); 1724 db_printf("cnt.v_free_target: %d\n", cnt.v_free_target); 1725 db_printf("cnt.v_cache_min: %d\n", cnt.v_cache_min); 1726 db_printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target); 1727} 1728 1729DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) 1730{ 1731 int i; 1732 db_printf("PQ_FREE:"); 1733 for(i=0;i<PQ_L2_SIZE;i++) { 1734 db_printf(" %d", *vm_page_queues[PQ_FREE + i].lcnt); 1735 } 1736 db_printf("\n"); 1737 1738 db_printf("PQ_CACHE:"); 1739 for(i=0;i<PQ_L2_SIZE;i++) { 1740 db_printf(" %d", *vm_page_queues[PQ_CACHE + i].lcnt); 1741 } 1742 db_printf("\n"); 1743 1744 db_printf("PQ_ZERO:"); 1745 for(i=0;i<PQ_L2_SIZE;i++) { 1746 db_printf(" %d", *vm_page_queues[PQ_ZERO + i].lcnt); 1747 } 1748 db_printf("\n"); 1749 1750 db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n", 1751 *vm_page_queues[PQ_ACTIVE].lcnt, 1752 *vm_page_queues[PQ_INACTIVE].lcnt); 1753} 1754#endif /* DDB */ 1755