vm_page.c revision 254087
1/*- 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1998 Matthew Dillon. All Rights Reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * The Mach Operating System project at Carnegie-Mellon University. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 34 */ 35 36/*- 37 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 38 * All rights reserved. 39 * 40 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 41 * 42 * Permission to use, copy, modify and distribute this software and 43 * its documentation is hereby granted, provided that both the copyright 44 * notice and this permission notice appear in all copies of the 45 * software, derivative works or modified versions, and any portions 46 * thereof, and that both notices appear in supporting documentation. 47 * 48 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 49 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 50 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 51 * 52 * Carnegie Mellon requests users of this software to return to 53 * 54 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 55 * School of Computer Science 56 * Carnegie Mellon University 57 * Pittsburgh PA 15213-3890 58 * 59 * any improvements or extensions that they make and grant Carnegie the 60 * rights to redistribute these changes. 61 */ 62 63/* 64 * GENERAL RULES ON VM_PAGE MANIPULATION 65 * 66 * - a pageq mutex is required when adding or removing a page from a 67 * page queue (vm_page_queue[]), regardless of other mutexes or the 68 * busy state of a page. 69 * 70 * - The object mutex is held when inserting or removing 71 * pages from an object (vm_page_insert() or vm_page_remove()). 72 * 73 */ 74 75/* 76 * Resident memory management module. 77 */ 78 79#include <sys/cdefs.h> 80__FBSDID("$FreeBSD: stable/9/sys/vm/vm_page.c 254087 2013-08-08 06:03:34Z kib $"); 81 82#include "opt_vm.h" 83 84#include <sys/param.h> 85#include <sys/systm.h> 86#include <sys/lock.h> 87#include <sys/kernel.h> 88#include <sys/limits.h> 89#include <sys/malloc.h> 90#include <sys/msgbuf.h> 91#include <sys/mutex.h> 92#include <sys/proc.h> 93#include <sys/sysctl.h> 94#include <sys/vmmeter.h> 95#include <sys/vnode.h> 96 97#include <vm/vm.h> 98#include <vm/pmap.h> 99#include <vm/vm_param.h> 100#include <vm/vm_kern.h> 101#include <vm/vm_object.h> 102#include <vm/vm_page.h> 103#include <vm/vm_pageout.h> 104#include <vm/vm_pager.h> 105#include <vm/vm_phys.h> 106#include <vm/vm_reserv.h> 107#include <vm/vm_extern.h> 108#include <vm/uma.h> 109#include <vm/uma_int.h> 110 111#include <machine/md_var.h> 112 113/* 114 * Associated with page of user-allocatable memory is a 115 * page structure. 116 */ 117 118struct vpgqueues vm_page_queues[PQ_COUNT]; 119struct vpglocks vm_page_queue_lock; 120struct vpglocks vm_page_queue_free_lock; 121 122struct vpglocks pa_lock[PA_LOCK_COUNT]; 123 124vm_page_t vm_page_array; 125long vm_page_array_size; 126long first_page; 127int vm_page_zero_count; 128 129static int boot_pages = UMA_BOOT_PAGES; 130TUNABLE_INT("vm.boot_pages", &boot_pages); 131SYSCTL_INT(_vm, OID_AUTO, boot_pages, CTLFLAG_RD, &boot_pages, 0, 132 "number of pages allocated for bootstrapping the VM system"); 133 134int pa_tryrelock_restart; 135SYSCTL_INT(_vm, OID_AUTO, tryrelock_restart, CTLFLAG_RD, 136 &pa_tryrelock_restart, 0, "Number of tryrelock restarts"); 137 138static uma_zone_t fakepg_zone; 139 140static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); 141static void vm_page_queue_remove(int queue, vm_page_t m); 142static void vm_page_enqueue(int queue, vm_page_t m); 143static void vm_page_init_fakepg(void *dummy); 144 145SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init_fakepg, NULL); 146 147static void 148vm_page_init_fakepg(void *dummy) 149{ 150 151 fakepg_zone = uma_zcreate("fakepg", sizeof(struct vm_page), NULL, NULL, 152 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM); 153} 154 155/* Make sure that u_long is at least 64 bits when PAGE_SIZE is 32K. */ 156#if PAGE_SIZE == 32768 157#ifdef CTASSERT 158CTASSERT(sizeof(u_long) >= 8); 159#endif 160#endif 161 162/* 163 * Try to acquire a physical address lock while a pmap is locked. If we 164 * fail to trylock we unlock and lock the pmap directly and cache the 165 * locked pa in *locked. The caller should then restart their loop in case 166 * the virtual to physical mapping has changed. 167 */ 168int 169vm_page_pa_tryrelock(pmap_t pmap, vm_paddr_t pa, vm_paddr_t *locked) 170{ 171 vm_paddr_t lockpa; 172 173 lockpa = *locked; 174 *locked = pa; 175 if (lockpa) { 176 PA_LOCK_ASSERT(lockpa, MA_OWNED); 177 if (PA_LOCKPTR(pa) == PA_LOCKPTR(lockpa)) 178 return (0); 179 PA_UNLOCK(lockpa); 180 } 181 if (PA_TRYLOCK(pa)) 182 return (0); 183 PMAP_UNLOCK(pmap); 184 atomic_add_int(&pa_tryrelock_restart, 1); 185 PA_LOCK(pa); 186 PMAP_LOCK(pmap); 187 return (EAGAIN); 188} 189 190/* 191 * vm_set_page_size: 192 * 193 * Sets the page size, perhaps based upon the memory 194 * size. Must be called before any use of page-size 195 * dependent functions. 196 */ 197void 198vm_set_page_size(void) 199{ 200 if (cnt.v_page_size == 0) 201 cnt.v_page_size = PAGE_SIZE; 202 if (((cnt.v_page_size - 1) & cnt.v_page_size) != 0) 203 panic("vm_set_page_size: page size not a power of two"); 204} 205 206/* 207 * vm_page_blacklist_lookup: 208 * 209 * See if a physical address in this page has been listed 210 * in the blacklist tunable. Entries in the tunable are 211 * separated by spaces or commas. If an invalid integer is 212 * encountered then the rest of the string is skipped. 213 */ 214static int 215vm_page_blacklist_lookup(char *list, vm_paddr_t pa) 216{ 217 vm_paddr_t bad; 218 char *cp, *pos; 219 220 for (pos = list; *pos != '\0'; pos = cp) { 221 bad = strtoq(pos, &cp, 0); 222 if (*cp != '\0') { 223 if (*cp == ' ' || *cp == ',') { 224 cp++; 225 if (cp == pos) 226 continue; 227 } else 228 break; 229 } 230 if (pa == trunc_page(bad)) 231 return (1); 232 } 233 return (0); 234} 235 236/* 237 * vm_page_startup: 238 * 239 * Initializes the resident memory module. 240 * 241 * Allocates memory for the page cells, and 242 * for the object/offset-to-page hash table headers. 243 * Each page cell is initialized and placed on the free list. 244 */ 245vm_offset_t 246vm_page_startup(vm_offset_t vaddr) 247{ 248 vm_offset_t mapped; 249 vm_paddr_t page_range; 250 vm_paddr_t new_end; 251 int i; 252 vm_paddr_t pa; 253 vm_paddr_t last_pa; 254 char *list; 255 256 /* the biggest memory array is the second group of pages */ 257 vm_paddr_t end; 258 vm_paddr_t biggestsize; 259 vm_paddr_t low_water, high_water; 260 int biggestone; 261 262 biggestsize = 0; 263 biggestone = 0; 264 vaddr = round_page(vaddr); 265 266 for (i = 0; phys_avail[i + 1]; i += 2) { 267 phys_avail[i] = round_page(phys_avail[i]); 268 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 269 } 270 271 low_water = phys_avail[0]; 272 high_water = phys_avail[1]; 273 274 for (i = 0; phys_avail[i + 1]; i += 2) { 275 vm_paddr_t size = phys_avail[i + 1] - phys_avail[i]; 276 277 if (size > biggestsize) { 278 biggestone = i; 279 biggestsize = size; 280 } 281 if (phys_avail[i] < low_water) 282 low_water = phys_avail[i]; 283 if (phys_avail[i + 1] > high_water) 284 high_water = phys_avail[i + 1]; 285 } 286 287#ifdef XEN 288 low_water = 0; 289#endif 290 291 end = phys_avail[biggestone+1]; 292 293 /* 294 * Initialize the page and queue locks. 295 */ 296 mtx_init(&vm_page_queue_mtx, "vm page queue", NULL, MTX_DEF | 297 MTX_RECURSE); 298 mtx_init(&vm_page_queue_free_mtx, "vm page free queue", NULL, MTX_DEF); 299 for (i = 0; i < PA_LOCK_COUNT; i++) 300 mtx_init(&pa_lock[i].data, "vm page", NULL, MTX_DEF); 301 302 /* 303 * Initialize the queue headers for the hold queue, the active queue, 304 * and the inactive queue. 305 */ 306 for (i = 0; i < PQ_COUNT; i++) 307 TAILQ_INIT(&vm_page_queues[i].pl); 308 vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count; 309 vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count; 310 vm_page_queues[PQ_HOLD].cnt = &cnt.v_active_count; 311 312 /* 313 * Allocate memory for use when boot strapping the kernel memory 314 * allocator. 315 */ 316 new_end = end - (boot_pages * UMA_SLAB_SIZE); 317 new_end = trunc_page(new_end); 318 mapped = pmap_map(&vaddr, new_end, end, 319 VM_PROT_READ | VM_PROT_WRITE); 320 bzero((void *)mapped, end - new_end); 321 uma_startup((void *)mapped, boot_pages); 322 323#if defined(__amd64__) || defined(__i386__) || defined(__arm__) || \ 324 defined(__mips__) 325 /* 326 * Allocate a bitmap to indicate that a random physical page 327 * needs to be included in a minidump. 328 * 329 * The amd64 port needs this to indicate which direct map pages 330 * need to be dumped, via calls to dump_add_page()/dump_drop_page(). 331 * 332 * However, i386 still needs this workspace internally within the 333 * minidump code. In theory, they are not needed on i386, but are 334 * included should the sf_buf code decide to use them. 335 */ 336 last_pa = 0; 337 for (i = 0; dump_avail[i + 1] != 0; i += 2) 338 if (dump_avail[i + 1] > last_pa) 339 last_pa = dump_avail[i + 1]; 340 page_range = last_pa / PAGE_SIZE; 341 vm_page_dump_size = round_page(roundup2(page_range, NBBY) / NBBY); 342 new_end -= vm_page_dump_size; 343 vm_page_dump = (void *)(uintptr_t)pmap_map(&vaddr, new_end, 344 new_end + vm_page_dump_size, VM_PROT_READ | VM_PROT_WRITE); 345 bzero((void *)vm_page_dump, vm_page_dump_size); 346#endif 347#ifdef __amd64__ 348 /* 349 * Request that the physical pages underlying the message buffer be 350 * included in a crash dump. Since the message buffer is accessed 351 * through the direct map, they are not automatically included. 352 */ 353 pa = DMAP_TO_PHYS((vm_offset_t)msgbufp->msg_ptr); 354 last_pa = pa + round_page(msgbufsize); 355 while (pa < last_pa) { 356 dump_add_page(pa); 357 pa += PAGE_SIZE; 358 } 359#endif 360 /* 361 * Compute the number of pages of memory that will be available for 362 * use (taking into account the overhead of a page structure per 363 * page). 364 */ 365 first_page = low_water / PAGE_SIZE; 366#ifdef VM_PHYSSEG_SPARSE 367 page_range = 0; 368 for (i = 0; phys_avail[i + 1] != 0; i += 2) 369 page_range += atop(phys_avail[i + 1] - phys_avail[i]); 370#elif defined(VM_PHYSSEG_DENSE) 371 page_range = high_water / PAGE_SIZE - first_page; 372#else 373#error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined." 374#endif 375 end = new_end; 376 377 /* 378 * Reserve an unmapped guard page to trap access to vm_page_array[-1]. 379 */ 380 vaddr += PAGE_SIZE; 381 382 /* 383 * Initialize the mem entry structures now, and put them in the free 384 * queue. 385 */ 386 new_end = trunc_page(end - page_range * sizeof(struct vm_page)); 387 mapped = pmap_map(&vaddr, new_end, end, 388 VM_PROT_READ | VM_PROT_WRITE); 389 vm_page_array = (vm_page_t) mapped; 390#if VM_NRESERVLEVEL > 0 391 /* 392 * Allocate memory for the reservation management system's data 393 * structures. 394 */ 395 new_end = vm_reserv_startup(&vaddr, new_end, high_water); 396#endif 397#if defined(__amd64__) || defined(__mips__) 398 /* 399 * pmap_map on amd64 and mips can come out of the direct-map, not kvm 400 * like i386, so the pages must be tracked for a crashdump to include 401 * this data. This includes the vm_page_array and the early UMA 402 * bootstrap pages. 403 */ 404 for (pa = new_end; pa < phys_avail[biggestone + 1]; pa += PAGE_SIZE) 405 dump_add_page(pa); 406#endif 407 phys_avail[biggestone + 1] = new_end; 408 409 /* 410 * Clear all of the page structures 411 */ 412 bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page)); 413 for (i = 0; i < page_range; i++) 414 vm_page_array[i].order = VM_NFREEORDER; 415 vm_page_array_size = page_range; 416 417 /* 418 * Initialize the physical memory allocator. 419 */ 420 vm_phys_init(); 421 422 /* 423 * Add every available physical page that is not blacklisted to 424 * the free lists. 425 */ 426 cnt.v_page_count = 0; 427 cnt.v_free_count = 0; 428 list = getenv("vm.blacklist"); 429 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 430 pa = phys_avail[i]; 431 last_pa = phys_avail[i + 1]; 432 while (pa < last_pa) { 433 if (list != NULL && 434 vm_page_blacklist_lookup(list, pa)) 435 printf("Skipping page with pa 0x%jx\n", 436 (uintmax_t)pa); 437 else 438 vm_phys_add_page(pa); 439 pa += PAGE_SIZE; 440 } 441 } 442 freeenv(list); 443#if VM_NRESERVLEVEL > 0 444 /* 445 * Initialize the reservation management system. 446 */ 447 vm_reserv_init(); 448#endif 449 return (vaddr); 450} 451 452 453CTASSERT(offsetof(struct vm_page, aflags) % sizeof(uint32_t) == 0); 454 455void 456vm_page_aflag_set(vm_page_t m, uint8_t bits) 457{ 458 uint32_t *addr, val; 459 460 /* 461 * The PGA_WRITEABLE flag can only be set if the page is managed and 462 * VPO_BUSY. Currently, this flag is only set by pmap_enter(). 463 */ 464 KASSERT((bits & PGA_WRITEABLE) == 0 || 465 (m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == VPO_BUSY, 466 ("PGA_WRITEABLE and !VPO_BUSY")); 467 468 /* 469 * We want to use atomic updates for m->aflags, which is a 470 * byte wide. Not all architectures provide atomic operations 471 * on the single-byte destination. Punt and access the whole 472 * 4-byte word with an atomic update. Parallel non-atomic 473 * updates to the fields included in the update by proximity 474 * are handled properly by atomics. 475 */ 476 addr = (void *)&m->aflags; 477 MPASS(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0); 478 val = bits; 479#if BYTE_ORDER == BIG_ENDIAN 480 val <<= 24; 481#endif 482 atomic_set_32(addr, val); 483} 484 485void 486vm_page_aflag_clear(vm_page_t m, uint8_t bits) 487{ 488 uint32_t *addr, val; 489 490 /* 491 * The PGA_REFERENCED flag can only be cleared if the object 492 * containing the page is locked. 493 */ 494 KASSERT((bits & PGA_REFERENCED) == 0 || VM_OBJECT_LOCKED(m->object), 495 ("PGA_REFERENCED and !VM_OBJECT_LOCKED")); 496 497 /* 498 * See the comment in vm_page_aflag_set(). 499 */ 500 addr = (void *)&m->aflags; 501 MPASS(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0); 502 val = bits; 503#if BYTE_ORDER == BIG_ENDIAN 504 val <<= 24; 505#endif 506 atomic_clear_32(addr, val); 507} 508 509void 510vm_page_reference(vm_page_t m) 511{ 512 513 vm_page_aflag_set(m, PGA_REFERENCED); 514} 515 516void 517vm_page_busy(vm_page_t m) 518{ 519 520 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 521 KASSERT((m->oflags & VPO_BUSY) == 0, 522 ("vm_page_busy: page already busy!!!")); 523 m->oflags |= VPO_BUSY; 524} 525 526/* 527 * vm_page_flash: 528 * 529 * wakeup anyone waiting for the page. 530 */ 531void 532vm_page_flash(vm_page_t m) 533{ 534 535 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 536 if (m->oflags & VPO_WANTED) { 537 m->oflags &= ~VPO_WANTED; 538 wakeup(m); 539 } 540} 541 542/* 543 * vm_page_wakeup: 544 * 545 * clear the VPO_BUSY flag and wakeup anyone waiting for the 546 * page. 547 * 548 */ 549void 550vm_page_wakeup(vm_page_t m) 551{ 552 553 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 554 KASSERT(m->oflags & VPO_BUSY, ("vm_page_wakeup: page not busy!!!")); 555 m->oflags &= ~VPO_BUSY; 556 vm_page_flash(m); 557} 558 559void 560vm_page_io_start(vm_page_t m) 561{ 562 563 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 564 m->busy++; 565} 566 567void 568vm_page_io_finish(vm_page_t m) 569{ 570 571 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 572 KASSERT(m->busy > 0, ("vm_page_io_finish: page %p is not busy", m)); 573 m->busy--; 574 if (m->busy == 0) 575 vm_page_flash(m); 576} 577 578/* 579 * Keep page from being freed by the page daemon 580 * much of the same effect as wiring, except much lower 581 * overhead and should be used only for *very* temporary 582 * holding ("wiring"). 583 */ 584void 585vm_page_hold(vm_page_t mem) 586{ 587 588 vm_page_lock_assert(mem, MA_OWNED); 589 mem->hold_count++; 590} 591 592void 593vm_page_unhold(vm_page_t mem) 594{ 595 596 vm_page_lock_assert(mem, MA_OWNED); 597 --mem->hold_count; 598 KASSERT(mem->hold_count >= 0, ("vm_page_unhold: hold count < 0!!!")); 599 if (mem->hold_count == 0 && mem->queue == PQ_HOLD) 600 vm_page_free_toq(mem); 601} 602 603/* 604 * vm_page_unhold_pages: 605 * 606 * Unhold each of the pages that is referenced by the given array. 607 */ 608void 609vm_page_unhold_pages(vm_page_t *ma, int count) 610{ 611 struct mtx *mtx, *new_mtx; 612 613 mtx = NULL; 614 for (; count != 0; count--) { 615 /* 616 * Avoid releasing and reacquiring the same page lock. 617 */ 618 new_mtx = vm_page_lockptr(*ma); 619 if (mtx != new_mtx) { 620 if (mtx != NULL) 621 mtx_unlock(mtx); 622 mtx = new_mtx; 623 mtx_lock(mtx); 624 } 625 vm_page_unhold(*ma); 626 ma++; 627 } 628 if (mtx != NULL) 629 mtx_unlock(mtx); 630} 631 632vm_page_t 633PHYS_TO_VM_PAGE(vm_paddr_t pa) 634{ 635 vm_page_t m; 636 637#ifdef VM_PHYSSEG_SPARSE 638 m = vm_phys_paddr_to_vm_page(pa); 639 if (m == NULL) 640 m = vm_phys_fictitious_to_vm_page(pa); 641 return (m); 642#elif defined(VM_PHYSSEG_DENSE) 643 long pi; 644 645 pi = atop(pa); 646 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 647 m = &vm_page_array[pi - first_page]; 648 return (m); 649 } 650 return (vm_phys_fictitious_to_vm_page(pa)); 651#else 652#error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined." 653#endif 654} 655 656/* 657 * vm_page_getfake: 658 * 659 * Create a fictitious page with the specified physical address and 660 * memory attribute. The memory attribute is the only the machine- 661 * dependent aspect of a fictitious page that must be initialized. 662 */ 663vm_page_t 664vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr) 665{ 666 vm_page_t m; 667 668 m = uma_zalloc(fakepg_zone, M_WAITOK | M_ZERO); 669 vm_page_initfake(m, paddr, memattr); 670 return (m); 671} 672 673void 674vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) 675{ 676 677 if ((m->flags & PG_FICTITIOUS) != 0) { 678 /* 679 * The page's memattr might have changed since the 680 * previous initialization. Update the pmap to the 681 * new memattr. 682 */ 683 goto memattr; 684 } 685 m->phys_addr = paddr; 686 m->queue = PQ_NONE; 687 /* Fictitious pages don't use "segind". */ 688 m->flags = PG_FICTITIOUS; 689 /* Fictitious pages don't use "order" or "pool". */ 690 m->oflags = VPO_BUSY | VPO_UNMANAGED; 691 m->wire_count = 1; 692memattr: 693 pmap_page_set_memattr(m, memattr); 694} 695 696/* 697 * vm_page_putfake: 698 * 699 * Release a fictitious page. 700 */ 701void 702vm_page_putfake(vm_page_t m) 703{ 704 705 KASSERT((m->oflags & VPO_UNMANAGED) != 0, ("managed %p", m)); 706 KASSERT((m->flags & PG_FICTITIOUS) != 0, 707 ("vm_page_putfake: bad page %p", m)); 708 uma_zfree(fakepg_zone, m); 709} 710 711/* 712 * vm_page_updatefake: 713 * 714 * Update the given fictitious page to the specified physical address and 715 * memory attribute. 716 */ 717void 718vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) 719{ 720 721 KASSERT((m->flags & PG_FICTITIOUS) != 0, 722 ("vm_page_updatefake: bad page %p", m)); 723 m->phys_addr = paddr; 724 pmap_page_set_memattr(m, memattr); 725} 726 727/* 728 * vm_page_free: 729 * 730 * Free a page. 731 */ 732void 733vm_page_free(vm_page_t m) 734{ 735 736 m->flags &= ~PG_ZERO; 737 vm_page_free_toq(m); 738} 739 740/* 741 * vm_page_free_zero: 742 * 743 * Free a page to the zerod-pages queue 744 */ 745void 746vm_page_free_zero(vm_page_t m) 747{ 748 749 m->flags |= PG_ZERO; 750 vm_page_free_toq(m); 751} 752 753/* 754 * Unbusy and handle the page queueing for a page from the VOP_GETPAGES() 755 * array which is not the request page. 756 */ 757void 758vm_page_readahead_finish(vm_page_t m) 759{ 760 761 if (m->valid != 0) { 762 /* 763 * Since the page is not the requested page, whether 764 * it should be activated or deactivated is not 765 * obvious. Empirical results have shown that 766 * deactivating the page is usually the best choice, 767 * unless the page is wanted by another thread. 768 */ 769 if (m->oflags & VPO_WANTED) { 770 vm_page_lock(m); 771 vm_page_activate(m); 772 vm_page_unlock(m); 773 } else { 774 vm_page_lock(m); 775 vm_page_deactivate(m); 776 vm_page_unlock(m); 777 } 778 vm_page_wakeup(m); 779 } else { 780 /* 781 * Free the completely invalid page. Such page state 782 * occurs due to the short read operation which did 783 * not covered our page at all, or in case when a read 784 * error happens. 785 */ 786 vm_page_lock(m); 787 vm_page_free(m); 788 vm_page_unlock(m); 789 } 790} 791 792/* 793 * vm_page_sleep: 794 * 795 * Sleep and release the page and page queues locks. 796 * 797 * The object containing the given page must be locked. 798 */ 799void 800vm_page_sleep(vm_page_t m, const char *msg) 801{ 802 803 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 804 if (mtx_owned(&vm_page_queue_mtx)) 805 vm_page_unlock_queues(); 806 if (mtx_owned(vm_page_lockptr(m))) 807 vm_page_unlock(m); 808 809 /* 810 * It's possible that while we sleep, the page will get 811 * unbusied and freed. If we are holding the object 812 * lock, we will assume we hold a reference to the object 813 * such that even if m->object changes, we can re-lock 814 * it. 815 */ 816 m->oflags |= VPO_WANTED; 817 msleep(m, VM_OBJECT_MTX(m->object), PVM, msg, 0); 818} 819 820/* 821 * vm_page_dirty: 822 * 823 * Set all bits in the page's dirty field. 824 * 825 * The object containing the specified page must be locked if the 826 * call is made from the machine-independent layer. 827 * 828 * See vm_page_clear_dirty_mask(). 829 */ 830void 831vm_page_dirty(vm_page_t m) 832{ 833 834 KASSERT((m->flags & PG_CACHED) == 0, 835 ("vm_page_dirty: page in cache!")); 836 KASSERT(!VM_PAGE_IS_FREE(m), 837 ("vm_page_dirty: page is free!")); 838 KASSERT(m->valid == VM_PAGE_BITS_ALL, 839 ("vm_page_dirty: page is invalid!")); 840 m->dirty = VM_PAGE_BITS_ALL; 841} 842 843/* 844 * vm_page_splay: 845 * 846 * Implements Sleator and Tarjan's top-down splay algorithm. Returns 847 * the vm_page containing the given pindex. If, however, that 848 * pindex is not found in the vm_object, returns a vm_page that is 849 * adjacent to the pindex, coming before or after it. 850 */ 851vm_page_t 852vm_page_splay(vm_pindex_t pindex, vm_page_t root) 853{ 854 struct vm_page dummy; 855 vm_page_t lefttreemax, righttreemin, y; 856 857 if (root == NULL) 858 return (root); 859 lefttreemax = righttreemin = &dummy; 860 for (;; root = y) { 861 if (pindex < root->pindex) { 862 if ((y = root->left) == NULL) 863 break; 864 if (pindex < y->pindex) { 865 /* Rotate right. */ 866 root->left = y->right; 867 y->right = root; 868 root = y; 869 if ((y = root->left) == NULL) 870 break; 871 } 872 /* Link into the new root's right tree. */ 873 righttreemin->left = root; 874 righttreemin = root; 875 } else if (pindex > root->pindex) { 876 if ((y = root->right) == NULL) 877 break; 878 if (pindex > y->pindex) { 879 /* Rotate left. */ 880 root->right = y->left; 881 y->left = root; 882 root = y; 883 if ((y = root->right) == NULL) 884 break; 885 } 886 /* Link into the new root's left tree. */ 887 lefttreemax->right = root; 888 lefttreemax = root; 889 } else 890 break; 891 } 892 /* Assemble the new root. */ 893 lefttreemax->right = root->left; 894 righttreemin->left = root->right; 895 root->left = dummy.right; 896 root->right = dummy.left; 897 return (root); 898} 899 900/* 901 * vm_page_insert: [ internal use only ] 902 * 903 * Inserts the given mem entry into the object and object list. 904 * 905 * The object must be locked. 906 */ 907void 908vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) 909{ 910 vm_page_t root; 911 912 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 913 if (m->object != NULL) 914 panic("vm_page_insert: page already inserted"); 915 916 /* 917 * Record the object/offset pair in this page 918 */ 919 m->object = object; 920 m->pindex = pindex; 921 922 /* 923 * Now link into the object's ordered list of backed pages. 924 */ 925 root = object->root; 926 if (root == NULL) { 927 m->left = NULL; 928 m->right = NULL; 929 TAILQ_INSERT_TAIL(&object->memq, m, listq); 930 } else { 931 root = vm_page_splay(pindex, root); 932 if (pindex < root->pindex) { 933 m->left = root->left; 934 m->right = root; 935 root->left = NULL; 936 TAILQ_INSERT_BEFORE(root, m, listq); 937 } else if (pindex == root->pindex) 938 panic("vm_page_insert: offset already allocated"); 939 else { 940 m->right = root->right; 941 m->left = root; 942 root->right = NULL; 943 TAILQ_INSERT_AFTER(&object->memq, root, m, listq); 944 } 945 } 946 object->root = m; 947 948 /* 949 * Show that the object has one more resident page. 950 */ 951 object->resident_page_count++; 952 953 /* 954 * Hold the vnode until the last page is released. 955 */ 956 if (object->resident_page_count == 1 && object->type == OBJT_VNODE) 957 vhold(object->handle); 958 959 /* 960 * Since we are inserting a new and possibly dirty page, 961 * update the object's OBJ_MIGHTBEDIRTY flag. 962 */ 963 if (pmap_page_is_write_mapped(m)) 964 vm_object_set_writeable_dirty(object); 965} 966 967/* 968 * vm_page_remove: 969 * 970 * Removes the given mem entry from the object/offset-page 971 * table and the object page list, but do not invalidate/terminate 972 * the backing store. 973 * 974 * The object must be locked. The page must be locked if it is managed. 975 */ 976void 977vm_page_remove(vm_page_t m) 978{ 979 vm_object_t object; 980 vm_page_t next, prev, root; 981 982 if ((m->oflags & VPO_UNMANAGED) == 0) 983 vm_page_lock_assert(m, MA_OWNED); 984 if ((object = m->object) == NULL) 985 return; 986 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 987 if (m->oflags & VPO_BUSY) { 988 m->oflags &= ~VPO_BUSY; 989 vm_page_flash(m); 990 } 991 992 /* 993 * Now remove from the object's list of backed pages. 994 */ 995 if ((next = TAILQ_NEXT(m, listq)) != NULL && next->left == m) { 996 /* 997 * Since the page's successor in the list is also its parent 998 * in the tree, its right subtree must be empty. 999 */ 1000 next->left = m->left; 1001 KASSERT(m->right == NULL, 1002 ("vm_page_remove: page %p has right child", m)); 1003 } else if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL && 1004 prev->right == m) { 1005 /* 1006 * Since the page's predecessor in the list is also its parent 1007 * in the tree, its left subtree must be empty. 1008 */ 1009 KASSERT(m->left == NULL, 1010 ("vm_page_remove: page %p has left child", m)); 1011 prev->right = m->right; 1012 } else { 1013 if (m != object->root) 1014 vm_page_splay(m->pindex, object->root); 1015 if (m->left == NULL) 1016 root = m->right; 1017 else if (m->right == NULL) 1018 root = m->left; 1019 else { 1020 /* 1021 * Move the page's successor to the root, because 1022 * pages are usually removed in ascending order. 1023 */ 1024 if (m->right != next) 1025 vm_page_splay(m->pindex, m->right); 1026 next->left = m->left; 1027 root = next; 1028 } 1029 object->root = root; 1030 } 1031 TAILQ_REMOVE(&object->memq, m, listq); 1032 1033 /* 1034 * And show that the object has one fewer resident page. 1035 */ 1036 object->resident_page_count--; 1037 1038 /* 1039 * The vnode may now be recycled. 1040 */ 1041 if (object->resident_page_count == 0 && object->type == OBJT_VNODE) 1042 vdrop(object->handle); 1043 1044 m->object = NULL; 1045} 1046 1047/* 1048 * vm_page_lookup: 1049 * 1050 * Returns the page associated with the object/offset 1051 * pair specified; if none is found, NULL is returned. 1052 * 1053 * The object must be locked. 1054 */ 1055vm_page_t 1056vm_page_lookup(vm_object_t object, vm_pindex_t pindex) 1057{ 1058 vm_page_t m; 1059 1060 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 1061 if ((m = object->root) != NULL && m->pindex != pindex) { 1062 m = vm_page_splay(pindex, m); 1063 if ((object->root = m)->pindex != pindex) 1064 m = NULL; 1065 } 1066 return (m); 1067} 1068 1069/* 1070 * vm_page_find_least: 1071 * 1072 * Returns the page associated with the object with least pindex 1073 * greater than or equal to the parameter pindex, or NULL. 1074 * 1075 * The object must be locked. 1076 */ 1077vm_page_t 1078vm_page_find_least(vm_object_t object, vm_pindex_t pindex) 1079{ 1080 vm_page_t m; 1081 1082 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 1083 if ((m = TAILQ_FIRST(&object->memq)) != NULL) { 1084 if (m->pindex < pindex) { 1085 m = vm_page_splay(pindex, object->root); 1086 if ((object->root = m)->pindex < pindex) 1087 m = TAILQ_NEXT(m, listq); 1088 } 1089 } 1090 return (m); 1091} 1092 1093/* 1094 * Returns the given page's successor (by pindex) within the object if it is 1095 * resident; if none is found, NULL is returned. 1096 * 1097 * The object must be locked. 1098 */ 1099vm_page_t 1100vm_page_next(vm_page_t m) 1101{ 1102 vm_page_t next; 1103 1104 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 1105 if ((next = TAILQ_NEXT(m, listq)) != NULL && 1106 next->pindex != m->pindex + 1) 1107 next = NULL; 1108 return (next); 1109} 1110 1111/* 1112 * Returns the given page's predecessor (by pindex) within the object if it is 1113 * resident; if none is found, NULL is returned. 1114 * 1115 * The object must be locked. 1116 */ 1117vm_page_t 1118vm_page_prev(vm_page_t m) 1119{ 1120 vm_page_t prev; 1121 1122 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 1123 if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL && 1124 prev->pindex != m->pindex - 1) 1125 prev = NULL; 1126 return (prev); 1127} 1128 1129/* 1130 * vm_page_rename: 1131 * 1132 * Move the given memory entry from its 1133 * current object to the specified target object/offset. 1134 * 1135 * Note: swap associated with the page must be invalidated by the move. We 1136 * have to do this for several reasons: (1) we aren't freeing the 1137 * page, (2) we are dirtying the page, (3) the VM system is probably 1138 * moving the page from object A to B, and will then later move 1139 * the backing store from A to B and we can't have a conflict. 1140 * 1141 * Note: we *always* dirty the page. It is necessary both for the 1142 * fact that we moved it, and because we may be invalidating 1143 * swap. If the page is on the cache, we have to deactivate it 1144 * or vm_page_dirty() will panic. Dirty pages are not allowed 1145 * on the cache. 1146 * 1147 * The objects must be locked. The page must be locked if it is managed. 1148 */ 1149void 1150vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) 1151{ 1152 1153 vm_page_remove(m); 1154 vm_page_insert(m, new_object, new_pindex); 1155 vm_page_dirty(m); 1156} 1157 1158/* 1159 * Convert all of the given object's cached pages that have a 1160 * pindex within the given range into free pages. If the value 1161 * zero is given for "end", then the range's upper bound is 1162 * infinity. If the given object is backed by a vnode and it 1163 * transitions from having one or more cached pages to none, the 1164 * vnode's hold count is reduced. 1165 */ 1166void 1167vm_page_cache_free(vm_object_t object, vm_pindex_t start, vm_pindex_t end) 1168{ 1169 vm_page_t m, m_next; 1170 boolean_t empty; 1171 1172 mtx_lock(&vm_page_queue_free_mtx); 1173 if (__predict_false(object->cache == NULL)) { 1174 mtx_unlock(&vm_page_queue_free_mtx); 1175 return; 1176 } 1177 m = object->cache = vm_page_splay(start, object->cache); 1178 if (m->pindex < start) { 1179 if (m->right == NULL) 1180 m = NULL; 1181 else { 1182 m_next = vm_page_splay(start, m->right); 1183 m_next->left = m; 1184 m->right = NULL; 1185 m = object->cache = m_next; 1186 } 1187 } 1188 1189 /* 1190 * At this point, "m" is either (1) a reference to the page 1191 * with the least pindex that is greater than or equal to 1192 * "start" or (2) NULL. 1193 */ 1194 for (; m != NULL && (m->pindex < end || end == 0); m = m_next) { 1195 /* 1196 * Find "m"'s successor and remove "m" from the 1197 * object's cache. 1198 */ 1199 if (m->right == NULL) { 1200 object->cache = m->left; 1201 m_next = NULL; 1202 } else { 1203 m_next = vm_page_splay(start, m->right); 1204 m_next->left = m->left; 1205 object->cache = m_next; 1206 } 1207 /* Convert "m" to a free page. */ 1208 m->object = NULL; 1209 m->valid = 0; 1210 /* Clear PG_CACHED and set PG_FREE. */ 1211 m->flags ^= PG_CACHED | PG_FREE; 1212 KASSERT((m->flags & (PG_CACHED | PG_FREE)) == PG_FREE, 1213 ("vm_page_cache_free: page %p has inconsistent flags", m)); 1214 cnt.v_cache_count--; 1215 cnt.v_free_count++; 1216 } 1217 empty = object->cache == NULL; 1218 mtx_unlock(&vm_page_queue_free_mtx); 1219 if (object->type == OBJT_VNODE && empty) 1220 vdrop(object->handle); 1221} 1222 1223/* 1224 * Returns the cached page that is associated with the given 1225 * object and offset. If, however, none exists, returns NULL. 1226 * 1227 * The free page queue must be locked. 1228 */ 1229static inline vm_page_t 1230vm_page_cache_lookup(vm_object_t object, vm_pindex_t pindex) 1231{ 1232 vm_page_t m; 1233 1234 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1235 if ((m = object->cache) != NULL && m->pindex != pindex) { 1236 m = vm_page_splay(pindex, m); 1237 if ((object->cache = m)->pindex != pindex) 1238 m = NULL; 1239 } 1240 return (m); 1241} 1242 1243/* 1244 * Remove the given cached page from its containing object's 1245 * collection of cached pages. 1246 * 1247 * The free page queue must be locked. 1248 */ 1249void 1250vm_page_cache_remove(vm_page_t m) 1251{ 1252 vm_object_t object; 1253 vm_page_t root; 1254 1255 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1256 KASSERT((m->flags & PG_CACHED) != 0, 1257 ("vm_page_cache_remove: page %p is not cached", m)); 1258 object = m->object; 1259 if (m != object->cache) { 1260 root = vm_page_splay(m->pindex, object->cache); 1261 KASSERT(root == m, 1262 ("vm_page_cache_remove: page %p is not cached in object %p", 1263 m, object)); 1264 } 1265 if (m->left == NULL) 1266 root = m->right; 1267 else if (m->right == NULL) 1268 root = m->left; 1269 else { 1270 root = vm_page_splay(m->pindex, m->left); 1271 root->right = m->right; 1272 } 1273 object->cache = root; 1274 m->object = NULL; 1275 cnt.v_cache_count--; 1276} 1277 1278/* 1279 * Transfer all of the cached pages with offset greater than or 1280 * equal to 'offidxstart' from the original object's cache to the 1281 * new object's cache. However, any cached pages with offset 1282 * greater than or equal to the new object's size are kept in the 1283 * original object. Initially, the new object's cache must be 1284 * empty. Offset 'offidxstart' in the original object must 1285 * correspond to offset zero in the new object. 1286 * 1287 * The new object must be locked. 1288 */ 1289void 1290vm_page_cache_transfer(vm_object_t orig_object, vm_pindex_t offidxstart, 1291 vm_object_t new_object) 1292{ 1293 vm_page_t m, m_next; 1294 1295 /* 1296 * Insertion into an object's collection of cached pages 1297 * requires the object to be locked. In contrast, removal does 1298 * not. 1299 */ 1300 VM_OBJECT_LOCK_ASSERT(new_object, MA_OWNED); 1301 KASSERT(new_object->cache == NULL, 1302 ("vm_page_cache_transfer: object %p has cached pages", 1303 new_object)); 1304 mtx_lock(&vm_page_queue_free_mtx); 1305 if ((m = orig_object->cache) != NULL) { 1306 /* 1307 * Transfer all of the pages with offset greater than or 1308 * equal to 'offidxstart' from the original object's 1309 * cache to the new object's cache. 1310 */ 1311 m = vm_page_splay(offidxstart, m); 1312 if (m->pindex < offidxstart) { 1313 orig_object->cache = m; 1314 new_object->cache = m->right; 1315 m->right = NULL; 1316 } else { 1317 orig_object->cache = m->left; 1318 new_object->cache = m; 1319 m->left = NULL; 1320 } 1321 while ((m = new_object->cache) != NULL) { 1322 if ((m->pindex - offidxstart) >= new_object->size) { 1323 /* 1324 * Return all of the cached pages with 1325 * offset greater than or equal to the 1326 * new object's size to the original 1327 * object's cache. 1328 */ 1329 new_object->cache = m->left; 1330 m->left = orig_object->cache; 1331 orig_object->cache = m; 1332 break; 1333 } 1334 m_next = vm_page_splay(m->pindex, m->right); 1335 /* Update the page's object and offset. */ 1336 m->object = new_object; 1337 m->pindex -= offidxstart; 1338 if (m_next == NULL) 1339 break; 1340 m->right = NULL; 1341 m_next->left = m; 1342 new_object->cache = m_next; 1343 } 1344 KASSERT(new_object->cache == NULL || 1345 new_object->type == OBJT_SWAP, 1346 ("vm_page_cache_transfer: object %p's type is incompatible" 1347 " with cached pages", new_object)); 1348 } 1349 mtx_unlock(&vm_page_queue_free_mtx); 1350} 1351 1352/* 1353 * Returns TRUE if a cached page is associated with the given object and 1354 * offset, and FALSE otherwise. 1355 * 1356 * The object must be locked. 1357 */ 1358boolean_t 1359vm_page_is_cached(vm_object_t object, vm_pindex_t pindex) 1360{ 1361 vm_page_t m; 1362 1363 /* 1364 * Insertion into an object's collection of cached pages requires the 1365 * object to be locked. Therefore, if the object is locked and the 1366 * object's collection is empty, there is no need to acquire the free 1367 * page queues lock in order to prove that the specified page doesn't 1368 * exist. 1369 */ 1370 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 1371 if (object->cache == NULL) 1372 return (FALSE); 1373 mtx_lock(&vm_page_queue_free_mtx); 1374 m = vm_page_cache_lookup(object, pindex); 1375 mtx_unlock(&vm_page_queue_free_mtx); 1376 return (m != NULL); 1377} 1378 1379/* 1380 * vm_page_alloc: 1381 * 1382 * Allocate and return a memory cell associated 1383 * with this VM object/offset pair. 1384 * 1385 * The caller must always specify an allocation class. 1386 * 1387 * allocation classes: 1388 * VM_ALLOC_NORMAL normal process request 1389 * VM_ALLOC_SYSTEM system *really* needs a page 1390 * VM_ALLOC_INTERRUPT interrupt time request 1391 * 1392 * optional allocation flags: 1393 * VM_ALLOC_ZERO prefer a zeroed page 1394 * VM_ALLOC_WIRED wire the allocated page 1395 * VM_ALLOC_NOOBJ page is not associated with a vm object 1396 * VM_ALLOC_NOBUSY do not set the page busy 1397 * VM_ALLOC_IFCACHED return page only if it is cached 1398 * VM_ALLOC_IFNOTCACHED return NULL, do not reactivate if the page 1399 * is cached 1400 * 1401 * This routine may not sleep. 1402 */ 1403vm_page_t 1404vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) 1405{ 1406 struct vnode *vp = NULL; 1407 vm_object_t m_object; 1408 vm_page_t m; 1409 int flags, page_req; 1410 1411 if ((req & VM_ALLOC_NOOBJ) == 0) { 1412 KASSERT(object != NULL, 1413 ("vm_page_alloc: NULL object.")); 1414 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 1415 } 1416 1417 page_req = req & VM_ALLOC_CLASS_MASK; 1418 1419 /* 1420 * The pager is allowed to eat deeper into the free page list. 1421 */ 1422 if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) 1423 page_req = VM_ALLOC_SYSTEM; 1424 1425 mtx_lock(&vm_page_queue_free_mtx); 1426 if (cnt.v_free_count + cnt.v_cache_count > cnt.v_free_reserved || 1427 (page_req == VM_ALLOC_SYSTEM && 1428 cnt.v_free_count + cnt.v_cache_count > cnt.v_interrupt_free_min) || 1429 (page_req == VM_ALLOC_INTERRUPT && 1430 cnt.v_free_count + cnt.v_cache_count > 0)) { 1431 /* 1432 * Allocate from the free queue if the number of free pages 1433 * exceeds the minimum for the request class. 1434 */ 1435 if (object != NULL && 1436 (m = vm_page_cache_lookup(object, pindex)) != NULL) { 1437 if ((req & VM_ALLOC_IFNOTCACHED) != 0) { 1438 mtx_unlock(&vm_page_queue_free_mtx); 1439 return (NULL); 1440 } 1441 if (vm_phys_unfree_page(m)) 1442 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, 0); 1443#if VM_NRESERVLEVEL > 0 1444 else if (!vm_reserv_reactivate_page(m)) 1445#else 1446 else 1447#endif 1448 panic("vm_page_alloc: cache page %p is missing" 1449 " from the free queue", m); 1450 } else if ((req & VM_ALLOC_IFCACHED) != 0) { 1451 mtx_unlock(&vm_page_queue_free_mtx); 1452 return (NULL); 1453#if VM_NRESERVLEVEL > 0 1454 } else if (object == NULL || object->type == OBJT_DEVICE || 1455 object->type == OBJT_SG || 1456 (object->flags & OBJ_COLORED) == 0 || 1457 (m = vm_reserv_alloc_page(object, pindex)) == NULL) { 1458#else 1459 } else { 1460#endif 1461 m = vm_phys_alloc_pages(object != NULL ? 1462 VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0); 1463#if VM_NRESERVLEVEL > 0 1464 if (m == NULL && vm_reserv_reclaim_inactive()) { 1465 m = vm_phys_alloc_pages(object != NULL ? 1466 VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 1467 0); 1468 } 1469#endif 1470 } 1471 } else { 1472 /* 1473 * Not allocatable, give up. 1474 */ 1475 mtx_unlock(&vm_page_queue_free_mtx); 1476 atomic_add_int(&vm_pageout_deficit, 1477 MAX((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); 1478 pagedaemon_wakeup(); 1479 return (NULL); 1480 } 1481 1482 /* 1483 * At this point we had better have found a good page. 1484 */ 1485 1486 KASSERT(m != NULL, ("vm_page_alloc: missing page")); 1487 KASSERT(m->queue == PQ_NONE, 1488 ("vm_page_alloc: page %p has unexpected queue %d", m, m->queue)); 1489 KASSERT(m->wire_count == 0, ("vm_page_alloc: page %p is wired", m)); 1490 KASSERT(m->hold_count == 0, ("vm_page_alloc: page %p is held", m)); 1491 KASSERT(m->busy == 0, ("vm_page_alloc: page %p is busy", m)); 1492 KASSERT(m->dirty == 0, ("vm_page_alloc: page %p is dirty", m)); 1493 KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, 1494 ("vm_page_alloc: page %p has unexpected memattr %d", m, 1495 pmap_page_get_memattr(m))); 1496 if ((m->flags & PG_CACHED) != 0) { 1497 KASSERT(m->valid != 0, 1498 ("vm_page_alloc: cached page %p is invalid", m)); 1499 if (m->object == object && m->pindex == pindex) 1500 cnt.v_reactivated++; 1501 else 1502 m->valid = 0; 1503 m_object = m->object; 1504 vm_page_cache_remove(m); 1505 if (m_object->type == OBJT_VNODE && m_object->cache == NULL) 1506 vp = m_object->handle; 1507 } else { 1508 KASSERT(VM_PAGE_IS_FREE(m), 1509 ("vm_page_alloc: page %p is not free", m)); 1510 KASSERT(m->valid == 0, 1511 ("vm_page_alloc: free page %p is valid", m)); 1512 cnt.v_free_count--; 1513 } 1514 1515 /* 1516 * Only the PG_ZERO flag is inherited. The PG_CACHED or PG_FREE flag 1517 * must be cleared before the free page queues lock is released. 1518 */ 1519 flags = 0; 1520 if (req & VM_ALLOC_NODUMP) 1521 flags |= PG_NODUMP; 1522 if (m->flags & PG_ZERO) { 1523 vm_page_zero_count--; 1524 if (req & VM_ALLOC_ZERO) 1525 flags = PG_ZERO; 1526 } 1527 m->flags = flags; 1528 mtx_unlock(&vm_page_queue_free_mtx); 1529 m->aflags = 0; 1530 if (object == NULL || object->type == OBJT_PHYS) 1531 m->oflags = VPO_UNMANAGED; 1532 else 1533 m->oflags = 0; 1534 if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ)) == 0) 1535 m->oflags |= VPO_BUSY; 1536 if (req & VM_ALLOC_WIRED) { 1537 /* 1538 * The page lock is not required for wiring a page until that 1539 * page is inserted into the object. 1540 */ 1541 atomic_add_int(&cnt.v_wire_count, 1); 1542 m->wire_count = 1; 1543 } 1544 m->act_count = 0; 1545 1546 if (object != NULL) { 1547 /* Ignore device objects; the pager sets "memattr" for them. */ 1548 if (object->memattr != VM_MEMATTR_DEFAULT && 1549 object->type != OBJT_DEVICE && object->type != OBJT_SG) 1550 pmap_page_set_memattr(m, object->memattr); 1551 vm_page_insert(m, object, pindex); 1552 } else 1553 m->pindex = pindex; 1554 1555 /* 1556 * The following call to vdrop() must come after the above call 1557 * to vm_page_insert() in case both affect the same object and 1558 * vnode. Otherwise, the affected vnode's hold count could 1559 * temporarily become zero. 1560 */ 1561 if (vp != NULL) 1562 vdrop(vp); 1563 1564 /* 1565 * Don't wakeup too often - wakeup the pageout daemon when 1566 * we would be nearly out of memory. 1567 */ 1568 if (vm_paging_needed()) 1569 pagedaemon_wakeup(); 1570 1571 return (m); 1572} 1573 1574/* 1575 * Initialize a page that has been freshly dequeued from a freelist. 1576 * The caller has to drop the vnode returned, if it is not NULL. 1577 * 1578 * To be called with vm_page_queue_free_mtx held. 1579 */ 1580struct vnode * 1581vm_page_alloc_init(vm_page_t m) 1582{ 1583 struct vnode *drop; 1584 vm_object_t m_object; 1585 1586 KASSERT(m->queue == PQ_NONE, 1587 ("vm_page_alloc_init: page %p has unexpected queue %d", 1588 m, m->queue)); 1589 KASSERT(m->wire_count == 0, 1590 ("vm_page_alloc_init: page %p is wired", m)); 1591 KASSERT(m->hold_count == 0, 1592 ("vm_page_alloc_init: page %p is held", m)); 1593 KASSERT(m->busy == 0, 1594 ("vm_page_alloc_init: page %p is busy", m)); 1595 KASSERT(m->dirty == 0, 1596 ("vm_page_alloc_init: page %p is dirty", m)); 1597 KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, 1598 ("vm_page_alloc_init: page %p has unexpected memattr %d", 1599 m, pmap_page_get_memattr(m))); 1600 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1601 drop = NULL; 1602 if ((m->flags & PG_CACHED) != 0) { 1603 m->valid = 0; 1604 m_object = m->object; 1605 vm_page_cache_remove(m); 1606 if (m_object->type == OBJT_VNODE && 1607 m_object->cache == NULL) 1608 drop = m_object->handle; 1609 } else { 1610 KASSERT(VM_PAGE_IS_FREE(m), 1611 ("vm_page_alloc_init: page %p is not free", m)); 1612 KASSERT(m->valid == 0, 1613 ("vm_page_alloc_init: free page %p is valid", m)); 1614 cnt.v_free_count--; 1615 } 1616 if (m->flags & PG_ZERO) 1617 vm_page_zero_count--; 1618 /* Don't clear the PG_ZERO flag; we'll need it later. */ 1619 m->flags &= PG_ZERO; 1620 m->aflags = 0; 1621 m->oflags = VPO_UNMANAGED; 1622 /* Unmanaged pages don't use "act_count". */ 1623 return (drop); 1624} 1625 1626/* 1627 * vm_page_alloc_freelist: 1628 * 1629 * Allocate a page from the specified freelist. 1630 * Only the ALLOC_CLASS values in req are honored, other request flags 1631 * are ignored. 1632 */ 1633vm_page_t 1634vm_page_alloc_freelist(int flind, int req) 1635{ 1636 struct vnode *drop; 1637 vm_page_t m; 1638 int page_req; 1639 1640 m = NULL; 1641 page_req = req & VM_ALLOC_CLASS_MASK; 1642 mtx_lock(&vm_page_queue_free_mtx); 1643 /* 1644 * Do not allocate reserved pages unless the req has asked for it. 1645 */ 1646 if (cnt.v_free_count + cnt.v_cache_count > cnt.v_free_reserved || 1647 (page_req == VM_ALLOC_SYSTEM && 1648 cnt.v_free_count + cnt.v_cache_count > cnt.v_interrupt_free_min) || 1649 (page_req == VM_ALLOC_INTERRUPT && 1650 cnt.v_free_count + cnt.v_cache_count > 0)) { 1651 m = vm_phys_alloc_freelist_pages(flind, VM_FREEPOOL_DIRECT, 0); 1652 } 1653 if (m == NULL) { 1654 mtx_unlock(&vm_page_queue_free_mtx); 1655 return (NULL); 1656 } 1657 drop = vm_page_alloc_init(m); 1658 mtx_unlock(&vm_page_queue_free_mtx); 1659 if (drop) 1660 vdrop(drop); 1661 return (m); 1662} 1663 1664/* 1665 * vm_wait: (also see VM_WAIT macro) 1666 * 1667 * Sleep until free pages are available for allocation. 1668 * - Called in various places before memory allocations. 1669 */ 1670void 1671vm_wait(void) 1672{ 1673 1674 mtx_lock(&vm_page_queue_free_mtx); 1675 if (curproc == pageproc) { 1676 vm_pageout_pages_needed = 1; 1677 msleep(&vm_pageout_pages_needed, &vm_page_queue_free_mtx, 1678 PDROP | PSWP, "VMWait", 0); 1679 } else { 1680 if (!vm_pages_needed) { 1681 vm_pages_needed = 1; 1682 wakeup(&vm_pages_needed); 1683 } 1684 msleep(&cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PVM, 1685 "vmwait", 0); 1686 } 1687} 1688 1689/* 1690 * vm_waitpfault: (also see VM_WAITPFAULT macro) 1691 * 1692 * Sleep until free pages are available for allocation. 1693 * - Called only in vm_fault so that processes page faulting 1694 * can be easily tracked. 1695 * - Sleeps at a lower priority than vm_wait() so that vm_wait()ing 1696 * processes will be able to grab memory first. Do not change 1697 * this balance without careful testing first. 1698 */ 1699void 1700vm_waitpfault(void) 1701{ 1702 1703 mtx_lock(&vm_page_queue_free_mtx); 1704 if (!vm_pages_needed) { 1705 vm_pages_needed = 1; 1706 wakeup(&vm_pages_needed); 1707 } 1708 msleep(&cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PUSER, 1709 "pfault", 0); 1710} 1711 1712/* 1713 * vm_page_requeue: 1714 * 1715 * Move the given page to the tail of its present page queue. 1716 * 1717 * The page queues must be locked. 1718 */ 1719void 1720vm_page_requeue(vm_page_t m) 1721{ 1722 struct vpgqueues *vpq; 1723 int queue; 1724 1725 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1726 queue = m->queue; 1727 KASSERT(queue != PQ_NONE, 1728 ("vm_page_requeue: page %p is not queued", m)); 1729 vpq = &vm_page_queues[queue]; 1730 TAILQ_REMOVE(&vpq->pl, m, pageq); 1731 TAILQ_INSERT_TAIL(&vpq->pl, m, pageq); 1732} 1733 1734/* 1735 * vm_page_queue_remove: 1736 * 1737 * Remove the given page from the specified queue. 1738 * 1739 * The page and page queues must be locked. 1740 */ 1741static __inline void 1742vm_page_queue_remove(int queue, vm_page_t m) 1743{ 1744 struct vpgqueues *pq; 1745 1746 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1747 vm_page_lock_assert(m, MA_OWNED); 1748 pq = &vm_page_queues[queue]; 1749 TAILQ_REMOVE(&pq->pl, m, pageq); 1750 (*pq->cnt)--; 1751} 1752 1753/* 1754 * vm_pageq_remove: 1755 * 1756 * Remove a page from its queue. 1757 * 1758 * The given page must be locked. 1759 */ 1760void 1761vm_pageq_remove(vm_page_t m) 1762{ 1763 int queue; 1764 1765 vm_page_lock_assert(m, MA_OWNED); 1766 if ((queue = m->queue) != PQ_NONE) { 1767 vm_page_lock_queues(); 1768 m->queue = PQ_NONE; 1769 vm_page_queue_remove(queue, m); 1770 vm_page_unlock_queues(); 1771 } 1772} 1773 1774/* 1775 * vm_page_enqueue: 1776 * 1777 * Add the given page to the specified queue. 1778 * 1779 * The page queues must be locked. 1780 */ 1781static void 1782vm_page_enqueue(int queue, vm_page_t m) 1783{ 1784 struct vpgqueues *vpq; 1785 1786 vpq = &vm_page_queues[queue]; 1787 m->queue = queue; 1788 TAILQ_INSERT_TAIL(&vpq->pl, m, pageq); 1789 ++*vpq->cnt; 1790} 1791 1792/* 1793 * vm_page_activate: 1794 * 1795 * Put the specified page on the active list (if appropriate). 1796 * Ensure that act_count is at least ACT_INIT but do not otherwise 1797 * mess with it. 1798 * 1799 * The page must be locked. 1800 */ 1801void 1802vm_page_activate(vm_page_t m) 1803{ 1804 int queue; 1805 1806 vm_page_lock_assert(m, MA_OWNED); 1807 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 1808 if ((queue = m->queue) != PQ_ACTIVE) { 1809 if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { 1810 if (m->act_count < ACT_INIT) 1811 m->act_count = ACT_INIT; 1812 vm_page_lock_queues(); 1813 if (queue != PQ_NONE) 1814 vm_page_queue_remove(queue, m); 1815 vm_page_enqueue(PQ_ACTIVE, m); 1816 vm_page_unlock_queues(); 1817 } else 1818 KASSERT(queue == PQ_NONE, 1819 ("vm_page_activate: wired page %p is queued", m)); 1820 } else { 1821 if (m->act_count < ACT_INIT) 1822 m->act_count = ACT_INIT; 1823 } 1824} 1825 1826/* 1827 * vm_page_free_wakeup: 1828 * 1829 * Helper routine for vm_page_free_toq() and vm_page_cache(). This 1830 * routine is called when a page has been added to the cache or free 1831 * queues. 1832 * 1833 * The page queues must be locked. 1834 */ 1835static inline void 1836vm_page_free_wakeup(void) 1837{ 1838 1839 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1840 /* 1841 * if pageout daemon needs pages, then tell it that there are 1842 * some free. 1843 */ 1844 if (vm_pageout_pages_needed && 1845 cnt.v_cache_count + cnt.v_free_count >= cnt.v_pageout_free_min) { 1846 wakeup(&vm_pageout_pages_needed); 1847 vm_pageout_pages_needed = 0; 1848 } 1849 /* 1850 * wakeup processes that are waiting on memory if we hit a 1851 * high water mark. And wakeup scheduler process if we have 1852 * lots of memory. this process will swapin processes. 1853 */ 1854 if (vm_pages_needed && !vm_page_count_min()) { 1855 vm_pages_needed = 0; 1856 wakeup(&cnt.v_free_count); 1857 } 1858} 1859 1860/* 1861 * vm_page_free_toq: 1862 * 1863 * Returns the given page to the free list, 1864 * disassociating it with any VM object. 1865 * 1866 * The object must be locked. The page must be locked if it is managed. 1867 */ 1868void 1869vm_page_free_toq(vm_page_t m) 1870{ 1871 1872 if ((m->oflags & VPO_UNMANAGED) == 0) { 1873 vm_page_lock_assert(m, MA_OWNED); 1874 KASSERT(!pmap_page_is_mapped(m), 1875 ("vm_page_free_toq: freeing mapped page %p", m)); 1876 } 1877 PCPU_INC(cnt.v_tfree); 1878 1879 if (VM_PAGE_IS_FREE(m)) 1880 panic("vm_page_free: freeing free page %p", m); 1881 else if (m->busy != 0) 1882 panic("vm_page_free: freeing busy page %p", m); 1883 1884 /* 1885 * Unqueue, then remove page. Note that we cannot destroy 1886 * the page here because we do not want to call the pager's 1887 * callback routine until after we've put the page on the 1888 * appropriate free queue. 1889 */ 1890 if ((m->oflags & VPO_UNMANAGED) == 0) 1891 vm_pageq_remove(m); 1892 vm_page_remove(m); 1893 1894 /* 1895 * If fictitious remove object association and 1896 * return, otherwise delay object association removal. 1897 */ 1898 if ((m->flags & PG_FICTITIOUS) != 0) { 1899 return; 1900 } 1901 1902 m->valid = 0; 1903 vm_page_undirty(m); 1904 1905 if (m->wire_count != 0) 1906 panic("vm_page_free: freeing wired page %p", m); 1907 if (m->hold_count != 0) { 1908 m->flags &= ~PG_ZERO; 1909 vm_page_lock_queues(); 1910 vm_page_enqueue(PQ_HOLD, m); 1911 vm_page_unlock_queues(); 1912 } else { 1913 /* 1914 * Restore the default memory attribute to the page. 1915 */ 1916 if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT) 1917 pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT); 1918 1919 /* 1920 * Insert the page into the physical memory allocator's 1921 * cache/free page queues. 1922 */ 1923 mtx_lock(&vm_page_queue_free_mtx); 1924 m->flags |= PG_FREE; 1925 cnt.v_free_count++; 1926#if VM_NRESERVLEVEL > 0 1927 if (!vm_reserv_free_page(m)) 1928#else 1929 if (TRUE) 1930#endif 1931 vm_phys_free_pages(m, 0); 1932 if ((m->flags & PG_ZERO) != 0) 1933 ++vm_page_zero_count; 1934 else 1935 vm_page_zero_idle_wakeup(); 1936 vm_page_free_wakeup(); 1937 mtx_unlock(&vm_page_queue_free_mtx); 1938 } 1939} 1940 1941/* 1942 * vm_page_wire: 1943 * 1944 * Mark this page as wired down by yet 1945 * another map, removing it from paging queues 1946 * as necessary. 1947 * 1948 * If the page is fictitious, then its wire count must remain one. 1949 * 1950 * The page must be locked. 1951 */ 1952void 1953vm_page_wire(vm_page_t m) 1954{ 1955 1956 /* 1957 * Only bump the wire statistics if the page is not already wired, 1958 * and only unqueue the page if it is on some queue (if it is unmanaged 1959 * it is already off the queues). 1960 */ 1961 vm_page_lock_assert(m, MA_OWNED); 1962 if ((m->flags & PG_FICTITIOUS) != 0) { 1963 KASSERT(m->wire_count == 1, 1964 ("vm_page_wire: fictitious page %p's wire count isn't one", 1965 m)); 1966 return; 1967 } 1968 if (m->wire_count == 0) { 1969 if ((m->oflags & VPO_UNMANAGED) == 0) 1970 vm_pageq_remove(m); 1971 atomic_add_int(&cnt.v_wire_count, 1); 1972 } 1973 m->wire_count++; 1974 KASSERT(m->wire_count != 0, ("vm_page_wire: wire_count overflow m=%p", m)); 1975} 1976 1977/* 1978 * vm_page_unwire: 1979 * 1980 * Release one wiring of the specified page, potentially enabling it to be 1981 * paged again. If paging is enabled, then the value of the parameter 1982 * "activate" determines to which queue the page is added. If "activate" is 1983 * non-zero, then the page is added to the active queue. Otherwise, it is 1984 * added to the inactive queue. 1985 * 1986 * However, unless the page belongs to an object, it is not enqueued because 1987 * it cannot be paged out. 1988 * 1989 * If a page is fictitious, then its wire count must alway be one. 1990 * 1991 * A managed page must be locked. 1992 */ 1993void 1994vm_page_unwire(vm_page_t m, int activate) 1995{ 1996 1997 if ((m->oflags & VPO_UNMANAGED) == 0) 1998 vm_page_lock_assert(m, MA_OWNED); 1999 if ((m->flags & PG_FICTITIOUS) != 0) { 2000 KASSERT(m->wire_count == 1, 2001 ("vm_page_unwire: fictitious page %p's wire count isn't one", m)); 2002 return; 2003 } 2004 if (m->wire_count > 0) { 2005 m->wire_count--; 2006 if (m->wire_count == 0) { 2007 atomic_subtract_int(&cnt.v_wire_count, 1); 2008 if ((m->oflags & VPO_UNMANAGED) != 0 || 2009 m->object == NULL) 2010 return; 2011 if (!activate) 2012 m->flags &= ~PG_WINATCFLS; 2013 vm_page_lock_queues(); 2014 vm_page_enqueue(activate ? PQ_ACTIVE : PQ_INACTIVE, m); 2015 vm_page_unlock_queues(); 2016 } 2017 } else 2018 panic("vm_page_unwire: page %p's wire count is zero", m); 2019} 2020 2021/* 2022 * Move the specified page to the inactive queue. 2023 * 2024 * Many pages placed on the inactive queue should actually go 2025 * into the cache, but it is difficult to figure out which. What 2026 * we do instead, if the inactive target is well met, is to put 2027 * clean pages at the head of the inactive queue instead of the tail. 2028 * This will cause them to be moved to the cache more quickly and 2029 * if not actively re-referenced, reclaimed more quickly. If we just 2030 * stick these pages at the end of the inactive queue, heavy filesystem 2031 * meta-data accesses can cause an unnecessary paging load on memory bound 2032 * processes. This optimization causes one-time-use metadata to be 2033 * reused more quickly. 2034 * 2035 * Normally athead is 0 resulting in LRU operation. athead is set 2036 * to 1 if we want this page to be 'as if it were placed in the cache', 2037 * except without unmapping it from the process address space. 2038 * 2039 * The page must be locked. 2040 */ 2041static inline void 2042_vm_page_deactivate(vm_page_t m, int athead) 2043{ 2044 int queue; 2045 2046 vm_page_lock_assert(m, MA_OWNED); 2047 2048 /* 2049 * Ignore if already inactive. 2050 */ 2051 if ((queue = m->queue) == PQ_INACTIVE) 2052 return; 2053 if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { 2054 m->flags &= ~PG_WINATCFLS; 2055 vm_page_lock_queues(); 2056 if (queue != PQ_NONE) 2057 vm_page_queue_remove(queue, m); 2058 if (athead) 2059 TAILQ_INSERT_HEAD(&vm_page_queues[PQ_INACTIVE].pl, m, 2060 pageq); 2061 else 2062 TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, 2063 pageq); 2064 m->queue = PQ_INACTIVE; 2065 cnt.v_inactive_count++; 2066 vm_page_unlock_queues(); 2067 } 2068} 2069 2070/* 2071 * Move the specified page to the inactive queue. 2072 * 2073 * The page must be locked. 2074 */ 2075void 2076vm_page_deactivate(vm_page_t m) 2077{ 2078 2079 _vm_page_deactivate(m, 0); 2080} 2081 2082/* 2083 * vm_page_try_to_cache: 2084 * 2085 * Returns 0 on failure, 1 on success 2086 */ 2087int 2088vm_page_try_to_cache(vm_page_t m) 2089{ 2090 2091 vm_page_lock_assert(m, MA_OWNED); 2092 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2093 if (m->dirty || m->hold_count || m->busy || m->wire_count || 2094 (m->oflags & (VPO_BUSY | VPO_UNMANAGED)) != 0) 2095 return (0); 2096 pmap_remove_all(m); 2097 if (m->dirty) 2098 return (0); 2099 vm_page_cache(m); 2100 return (1); 2101} 2102 2103/* 2104 * vm_page_try_to_free() 2105 * 2106 * Attempt to free the page. If we cannot free it, we do nothing. 2107 * 1 is returned on success, 0 on failure. 2108 */ 2109int 2110vm_page_try_to_free(vm_page_t m) 2111{ 2112 2113 vm_page_lock_assert(m, MA_OWNED); 2114 if (m->object != NULL) 2115 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2116 if (m->dirty || m->hold_count || m->busy || m->wire_count || 2117 (m->oflags & (VPO_BUSY | VPO_UNMANAGED)) != 0) 2118 return (0); 2119 pmap_remove_all(m); 2120 if (m->dirty) 2121 return (0); 2122 vm_page_free(m); 2123 return (1); 2124} 2125 2126/* 2127 * vm_page_cache 2128 * 2129 * Put the specified page onto the page cache queue (if appropriate). 2130 * 2131 * The object and page must be locked. 2132 */ 2133void 2134vm_page_cache(vm_page_t m) 2135{ 2136 vm_object_t object; 2137 vm_page_t next, prev, root; 2138 2139 vm_page_lock_assert(m, MA_OWNED); 2140 object = m->object; 2141 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 2142 if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) || m->busy || 2143 m->hold_count || m->wire_count) 2144 panic("vm_page_cache: attempting to cache busy page"); 2145 pmap_remove_all(m); 2146 if (m->dirty != 0) 2147 panic("vm_page_cache: page %p is dirty", m); 2148 if (m->valid == 0 || object->type == OBJT_DEFAULT || 2149 (object->type == OBJT_SWAP && 2150 !vm_pager_has_page(object, m->pindex, NULL, NULL))) { 2151 /* 2152 * Hypothesis: A cache-elgible page belonging to a 2153 * default object or swap object but without a backing 2154 * store must be zero filled. 2155 */ 2156 vm_page_free(m); 2157 return; 2158 } 2159 KASSERT((m->flags & PG_CACHED) == 0, 2160 ("vm_page_cache: page %p is already cached", m)); 2161 PCPU_INC(cnt.v_tcached); 2162 2163 /* 2164 * Remove the page from the paging queues. 2165 */ 2166 vm_pageq_remove(m); 2167 2168 /* 2169 * Remove the page from the object's collection of resident 2170 * pages. 2171 */ 2172 if ((next = TAILQ_NEXT(m, listq)) != NULL && next->left == m) { 2173 /* 2174 * Since the page's successor in the list is also its parent 2175 * in the tree, its right subtree must be empty. 2176 */ 2177 next->left = m->left; 2178 KASSERT(m->right == NULL, 2179 ("vm_page_cache: page %p has right child", m)); 2180 } else if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL && 2181 prev->right == m) { 2182 /* 2183 * Since the page's predecessor in the list is also its parent 2184 * in the tree, its left subtree must be empty. 2185 */ 2186 KASSERT(m->left == NULL, 2187 ("vm_page_cache: page %p has left child", m)); 2188 prev->right = m->right; 2189 } else { 2190 if (m != object->root) 2191 vm_page_splay(m->pindex, object->root); 2192 if (m->left == NULL) 2193 root = m->right; 2194 else if (m->right == NULL) 2195 root = m->left; 2196 else { 2197 /* 2198 * Move the page's successor to the root, because 2199 * pages are usually removed in ascending order. 2200 */ 2201 if (m->right != next) 2202 vm_page_splay(m->pindex, m->right); 2203 next->left = m->left; 2204 root = next; 2205 } 2206 object->root = root; 2207 } 2208 TAILQ_REMOVE(&object->memq, m, listq); 2209 object->resident_page_count--; 2210 2211 /* 2212 * Restore the default memory attribute to the page. 2213 */ 2214 if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT) 2215 pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT); 2216 2217 /* 2218 * Insert the page into the object's collection of cached pages 2219 * and the physical memory allocator's cache/free page queues. 2220 */ 2221 m->flags &= ~PG_ZERO; 2222 mtx_lock(&vm_page_queue_free_mtx); 2223 m->flags |= PG_CACHED; 2224 cnt.v_cache_count++; 2225 root = object->cache; 2226 if (root == NULL) { 2227 m->left = NULL; 2228 m->right = NULL; 2229 } else { 2230 root = vm_page_splay(m->pindex, root); 2231 if (m->pindex < root->pindex) { 2232 m->left = root->left; 2233 m->right = root; 2234 root->left = NULL; 2235 } else if (__predict_false(m->pindex == root->pindex)) 2236 panic("vm_page_cache: offset already cached"); 2237 else { 2238 m->right = root->right; 2239 m->left = root; 2240 root->right = NULL; 2241 } 2242 } 2243 object->cache = m; 2244#if VM_NRESERVLEVEL > 0 2245 if (!vm_reserv_free_page(m)) { 2246#else 2247 if (TRUE) { 2248#endif 2249 vm_phys_set_pool(VM_FREEPOOL_CACHE, m, 0); 2250 vm_phys_free_pages(m, 0); 2251 } 2252 vm_page_free_wakeup(); 2253 mtx_unlock(&vm_page_queue_free_mtx); 2254 2255 /* 2256 * Increment the vnode's hold count if this is the object's only 2257 * cached page. Decrement the vnode's hold count if this was 2258 * the object's only resident page. 2259 */ 2260 if (object->type == OBJT_VNODE) { 2261 if (root == NULL && object->resident_page_count != 0) 2262 vhold(object->handle); 2263 else if (root != NULL && object->resident_page_count == 0) 2264 vdrop(object->handle); 2265 } 2266} 2267 2268/* 2269 * vm_page_dontneed 2270 * 2271 * Cache, deactivate, or do nothing as appropriate. This routine 2272 * is typically used by madvise() MADV_DONTNEED. 2273 * 2274 * Generally speaking we want to move the page into the cache so 2275 * it gets reused quickly. However, this can result in a silly syndrome 2276 * due to the page recycling too quickly. Small objects will not be 2277 * fully cached. On the otherhand, if we move the page to the inactive 2278 * queue we wind up with a problem whereby very large objects 2279 * unnecessarily blow away our inactive and cache queues. 2280 * 2281 * The solution is to move the pages based on a fixed weighting. We 2282 * either leave them alone, deactivate them, or move them to the cache, 2283 * where moving them to the cache has the highest weighting. 2284 * By forcing some pages into other queues we eventually force the 2285 * system to balance the queues, potentially recovering other unrelated 2286 * space from active. The idea is to not force this to happen too 2287 * often. 2288 * 2289 * The object and page must be locked. 2290 */ 2291void 2292vm_page_dontneed(vm_page_t m) 2293{ 2294 int dnw; 2295 int head; 2296 2297 vm_page_lock_assert(m, MA_OWNED); 2298 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2299 dnw = PCPU_GET(dnweight); 2300 PCPU_INC(dnweight); 2301 2302 /* 2303 * Occasionally leave the page alone. 2304 */ 2305 if ((dnw & 0x01F0) == 0 || m->queue == PQ_INACTIVE) { 2306 if (m->act_count >= ACT_INIT) 2307 --m->act_count; 2308 return; 2309 } 2310 2311 /* 2312 * Clear any references to the page. Otherwise, the page daemon will 2313 * immediately reactivate the page. 2314 * 2315 * Perform the pmap_clear_reference() first. Otherwise, a concurrent 2316 * pmap operation, such as pmap_remove(), could clear a reference in 2317 * the pmap and set PGA_REFERENCED on the page before the 2318 * pmap_clear_reference() had completed. Consequently, the page would 2319 * appear referenced based upon an old reference that occurred before 2320 * this function ran. 2321 */ 2322 pmap_clear_reference(m); 2323 vm_page_aflag_clear(m, PGA_REFERENCED); 2324 2325 if (m->dirty == 0 && pmap_is_modified(m)) 2326 vm_page_dirty(m); 2327 2328 if (m->dirty || (dnw & 0x0070) == 0) { 2329 /* 2330 * Deactivate the page 3 times out of 32. 2331 */ 2332 head = 0; 2333 } else { 2334 /* 2335 * Cache the page 28 times out of every 32. Note that 2336 * the page is deactivated instead of cached, but placed 2337 * at the head of the queue instead of the tail. 2338 */ 2339 head = 1; 2340 } 2341 _vm_page_deactivate(m, head); 2342} 2343 2344/* 2345 * Grab a page, waiting until we are waken up due to the page 2346 * changing state. We keep on waiting, if the page continues 2347 * to be in the object. If the page doesn't exist, first allocate it 2348 * and then conditionally zero it. 2349 * 2350 * The caller must always specify the VM_ALLOC_RETRY flag. This is intended 2351 * to facilitate its eventual removal. 2352 * 2353 * This routine may sleep. 2354 * 2355 * The object must be locked on entry. The lock will, however, be released 2356 * and reacquired if the routine sleeps. 2357 */ 2358vm_page_t 2359vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) 2360{ 2361 vm_page_t m; 2362 2363 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 2364 KASSERT((allocflags & VM_ALLOC_RETRY) != 0, 2365 ("vm_page_grab: VM_ALLOC_RETRY is required")); 2366retrylookup: 2367 if ((m = vm_page_lookup(object, pindex)) != NULL) { 2368 if ((m->oflags & VPO_BUSY) != 0 || 2369 ((allocflags & VM_ALLOC_IGN_SBUSY) == 0 && m->busy != 0)) { 2370 /* 2371 * Reference the page before unlocking and 2372 * sleeping so that the page daemon is less 2373 * likely to reclaim it. 2374 */ 2375 vm_page_aflag_set(m, PGA_REFERENCED); 2376 vm_page_sleep(m, "pgrbwt"); 2377 goto retrylookup; 2378 } else { 2379 if ((allocflags & VM_ALLOC_WIRED) != 0) { 2380 vm_page_lock(m); 2381 vm_page_wire(m); 2382 vm_page_unlock(m); 2383 } 2384 if ((allocflags & VM_ALLOC_NOBUSY) == 0) 2385 vm_page_busy(m); 2386 return (m); 2387 } 2388 } 2389 m = vm_page_alloc(object, pindex, allocflags & ~(VM_ALLOC_RETRY | 2390 VM_ALLOC_IGN_SBUSY)); 2391 if (m == NULL) { 2392 VM_OBJECT_UNLOCK(object); 2393 VM_WAIT; 2394 VM_OBJECT_LOCK(object); 2395 goto retrylookup; 2396 } else if (m->valid != 0) 2397 return (m); 2398 if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0) 2399 pmap_zero_page(m); 2400 return (m); 2401} 2402 2403/* 2404 * Mapping function for valid or dirty bits in a page. 2405 * 2406 * Inputs are required to range within a page. 2407 */ 2408vm_page_bits_t 2409vm_page_bits(int base, int size) 2410{ 2411 int first_bit; 2412 int last_bit; 2413 2414 KASSERT( 2415 base + size <= PAGE_SIZE, 2416 ("vm_page_bits: illegal base/size %d/%d", base, size) 2417 ); 2418 2419 if (size == 0) /* handle degenerate case */ 2420 return (0); 2421 2422 first_bit = base >> DEV_BSHIFT; 2423 last_bit = (base + size - 1) >> DEV_BSHIFT; 2424 2425 return (((vm_page_bits_t)2 << last_bit) - 2426 ((vm_page_bits_t)1 << first_bit)); 2427} 2428 2429/* 2430 * vm_page_set_valid: 2431 * 2432 * Sets portions of a page valid. The arguments are expected 2433 * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive 2434 * of any partial chunks touched by the range. The invalid portion of 2435 * such chunks will be zeroed. 2436 * 2437 * (base + size) must be less then or equal to PAGE_SIZE. 2438 */ 2439void 2440vm_page_set_valid(vm_page_t m, int base, int size) 2441{ 2442 int endoff, frag; 2443 2444 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2445 if (size == 0) /* handle degenerate case */ 2446 return; 2447 2448 /* 2449 * If the base is not DEV_BSIZE aligned and the valid 2450 * bit is clear, we have to zero out a portion of the 2451 * first block. 2452 */ 2453 if ((frag = base & ~(DEV_BSIZE - 1)) != base && 2454 (m->valid & (1 << (base >> DEV_BSHIFT))) == 0) 2455 pmap_zero_page_area(m, frag, base - frag); 2456 2457 /* 2458 * If the ending offset is not DEV_BSIZE aligned and the 2459 * valid bit is clear, we have to zero out a portion of 2460 * the last block. 2461 */ 2462 endoff = base + size; 2463 if ((frag = endoff & ~(DEV_BSIZE - 1)) != endoff && 2464 (m->valid & (1 << (endoff >> DEV_BSHIFT))) == 0) 2465 pmap_zero_page_area(m, endoff, 2466 DEV_BSIZE - (endoff & (DEV_BSIZE - 1))); 2467 2468 /* 2469 * Assert that no previously invalid block that is now being validated 2470 * is already dirty. 2471 */ 2472 KASSERT((~m->valid & vm_page_bits(base, size) & m->dirty) == 0, 2473 ("vm_page_set_valid: page %p is dirty", m)); 2474 2475 /* 2476 * Set valid bits inclusive of any overlap. 2477 */ 2478 m->valid |= vm_page_bits(base, size); 2479} 2480 2481/* 2482 * Clear the given bits from the specified page's dirty field. 2483 */ 2484static __inline void 2485vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits) 2486{ 2487 uintptr_t addr; 2488#if PAGE_SIZE < 16384 2489 int shift; 2490#endif 2491 2492 /* 2493 * If the object is locked and the page is neither VPO_BUSY nor 2494 * write mapped, then the page's dirty field cannot possibly be 2495 * set by a concurrent pmap operation. 2496 */ 2497 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2498 if ((m->oflags & VPO_BUSY) == 0 && !pmap_page_is_write_mapped(m)) 2499 m->dirty &= ~pagebits; 2500 else { 2501 /* 2502 * The pmap layer can call vm_page_dirty() without 2503 * holding a distinguished lock. The combination of 2504 * the object's lock and an atomic operation suffice 2505 * to guarantee consistency of the page dirty field. 2506 * 2507 * For PAGE_SIZE == 32768 case, compiler already 2508 * properly aligns the dirty field, so no forcible 2509 * alignment is needed. Only require existence of 2510 * atomic_clear_64 when page size is 32768. 2511 */ 2512 addr = (uintptr_t)&m->dirty; 2513#if PAGE_SIZE == 32768 2514 atomic_clear_64((uint64_t *)addr, pagebits); 2515#elif PAGE_SIZE == 16384 2516 atomic_clear_32((uint32_t *)addr, pagebits); 2517#else /* PAGE_SIZE <= 8192 */ 2518 /* 2519 * Use a trick to perform a 32-bit atomic on the 2520 * containing aligned word, to not depend on the existence 2521 * of atomic_clear_{8, 16}. 2522 */ 2523 shift = addr & (sizeof(uint32_t) - 1); 2524#if BYTE_ORDER == BIG_ENDIAN 2525 shift = (sizeof(uint32_t) - sizeof(m->dirty) - shift) * NBBY; 2526#else 2527 shift *= NBBY; 2528#endif 2529 addr &= ~(sizeof(uint32_t) - 1); 2530 atomic_clear_32((uint32_t *)addr, pagebits << shift); 2531#endif /* PAGE_SIZE */ 2532 } 2533} 2534 2535/* 2536 * vm_page_set_validclean: 2537 * 2538 * Sets portions of a page valid and clean. The arguments are expected 2539 * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive 2540 * of any partial chunks touched by the range. The invalid portion of 2541 * such chunks will be zero'd. 2542 * 2543 * (base + size) must be less then or equal to PAGE_SIZE. 2544 */ 2545void 2546vm_page_set_validclean(vm_page_t m, int base, int size) 2547{ 2548 vm_page_bits_t oldvalid, pagebits; 2549 int endoff, frag; 2550 2551 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2552 if (size == 0) /* handle degenerate case */ 2553 return; 2554 2555 /* 2556 * If the base is not DEV_BSIZE aligned and the valid 2557 * bit is clear, we have to zero out a portion of the 2558 * first block. 2559 */ 2560 if ((frag = base & ~(DEV_BSIZE - 1)) != base && 2561 (m->valid & ((vm_page_bits_t)1 << (base >> DEV_BSHIFT))) == 0) 2562 pmap_zero_page_area(m, frag, base - frag); 2563 2564 /* 2565 * If the ending offset is not DEV_BSIZE aligned and the 2566 * valid bit is clear, we have to zero out a portion of 2567 * the last block. 2568 */ 2569 endoff = base + size; 2570 if ((frag = endoff & ~(DEV_BSIZE - 1)) != endoff && 2571 (m->valid & ((vm_page_bits_t)1 << (endoff >> DEV_BSHIFT))) == 0) 2572 pmap_zero_page_area(m, endoff, 2573 DEV_BSIZE - (endoff & (DEV_BSIZE - 1))); 2574 2575 /* 2576 * Set valid, clear dirty bits. If validating the entire 2577 * page we can safely clear the pmap modify bit. We also 2578 * use this opportunity to clear the VPO_NOSYNC flag. If a process 2579 * takes a write fault on a MAP_NOSYNC memory area the flag will 2580 * be set again. 2581 * 2582 * We set valid bits inclusive of any overlap, but we can only 2583 * clear dirty bits for DEV_BSIZE chunks that are fully within 2584 * the range. 2585 */ 2586 oldvalid = m->valid; 2587 pagebits = vm_page_bits(base, size); 2588 m->valid |= pagebits; 2589#if 0 /* NOT YET */ 2590 if ((frag = base & (DEV_BSIZE - 1)) != 0) { 2591 frag = DEV_BSIZE - frag; 2592 base += frag; 2593 size -= frag; 2594 if (size < 0) 2595 size = 0; 2596 } 2597 pagebits = vm_page_bits(base, size & (DEV_BSIZE - 1)); 2598#endif 2599 if (base == 0 && size == PAGE_SIZE) { 2600 /* 2601 * The page can only be modified within the pmap if it is 2602 * mapped, and it can only be mapped if it was previously 2603 * fully valid. 2604 */ 2605 if (oldvalid == VM_PAGE_BITS_ALL) 2606 /* 2607 * Perform the pmap_clear_modify() first. Otherwise, 2608 * a concurrent pmap operation, such as 2609 * pmap_protect(), could clear a modification in the 2610 * pmap and set the dirty field on the page before 2611 * pmap_clear_modify() had begun and after the dirty 2612 * field was cleared here. 2613 */ 2614 pmap_clear_modify(m); 2615 m->dirty = 0; 2616 m->oflags &= ~VPO_NOSYNC; 2617 } else if (oldvalid != VM_PAGE_BITS_ALL) 2618 m->dirty &= ~pagebits; 2619 else 2620 vm_page_clear_dirty_mask(m, pagebits); 2621} 2622 2623void 2624vm_page_clear_dirty(vm_page_t m, int base, int size) 2625{ 2626 2627 vm_page_clear_dirty_mask(m, vm_page_bits(base, size)); 2628} 2629 2630/* 2631 * vm_page_set_invalid: 2632 * 2633 * Invalidates DEV_BSIZE'd chunks within a page. Both the 2634 * valid and dirty bits for the effected areas are cleared. 2635 */ 2636void 2637vm_page_set_invalid(vm_page_t m, int base, int size) 2638{ 2639 vm_page_bits_t bits; 2640 2641 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2642 bits = vm_page_bits(base, size); 2643 if (m->valid == VM_PAGE_BITS_ALL && bits != 0) 2644 pmap_remove_all(m); 2645 KASSERT(!pmap_page_is_mapped(m), 2646 ("vm_page_set_invalid: page %p is mapped", m)); 2647 m->valid &= ~bits; 2648 m->dirty &= ~bits; 2649} 2650 2651/* 2652 * vm_page_zero_invalid() 2653 * 2654 * The kernel assumes that the invalid portions of a page contain 2655 * garbage, but such pages can be mapped into memory by user code. 2656 * When this occurs, we must zero out the non-valid portions of the 2657 * page so user code sees what it expects. 2658 * 2659 * Pages are most often semi-valid when the end of a file is mapped 2660 * into memory and the file's size is not page aligned. 2661 */ 2662void 2663vm_page_zero_invalid(vm_page_t m, boolean_t setvalid) 2664{ 2665 int b; 2666 int i; 2667 2668 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2669 /* 2670 * Scan the valid bits looking for invalid sections that 2671 * must be zerod. Invalid sub-DEV_BSIZE'd areas ( where the 2672 * valid bit may be set ) have already been zerod by 2673 * vm_page_set_validclean(). 2674 */ 2675 for (b = i = 0; i <= PAGE_SIZE / DEV_BSIZE; ++i) { 2676 if (i == (PAGE_SIZE / DEV_BSIZE) || 2677 (m->valid & ((vm_page_bits_t)1 << i))) { 2678 if (i > b) { 2679 pmap_zero_page_area(m, 2680 b << DEV_BSHIFT, (i - b) << DEV_BSHIFT); 2681 } 2682 b = i + 1; 2683 } 2684 } 2685 2686 /* 2687 * setvalid is TRUE when we can safely set the zero'd areas 2688 * as being valid. We can do this if there are no cache consistancy 2689 * issues. e.g. it is ok to do with UFS, but not ok to do with NFS. 2690 */ 2691 if (setvalid) 2692 m->valid = VM_PAGE_BITS_ALL; 2693} 2694 2695/* 2696 * vm_page_is_valid: 2697 * 2698 * Is (partial) page valid? Note that the case where size == 0 2699 * will return FALSE in the degenerate case where the page is 2700 * entirely invalid, and TRUE otherwise. 2701 */ 2702int 2703vm_page_is_valid(vm_page_t m, int base, int size) 2704{ 2705 vm_page_bits_t bits; 2706 2707 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2708 bits = vm_page_bits(base, size); 2709 if (m->valid && ((m->valid & bits) == bits)) 2710 return 1; 2711 else 2712 return 0; 2713} 2714 2715/* 2716 * Set the page's dirty bits if the page is modified. 2717 */ 2718void 2719vm_page_test_dirty(vm_page_t m) 2720{ 2721 2722 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2723 if (m->dirty != VM_PAGE_BITS_ALL && pmap_is_modified(m)) 2724 vm_page_dirty(m); 2725} 2726 2727void 2728vm_page_lock_KBI(vm_page_t m, const char *file, int line) 2729{ 2730 2731 mtx_lock_flags_(vm_page_lockptr(m), 0, file, line); 2732} 2733 2734void 2735vm_page_unlock_KBI(vm_page_t m, const char *file, int line) 2736{ 2737 2738 mtx_unlock_flags_(vm_page_lockptr(m), 0, file, line); 2739} 2740 2741int 2742vm_page_trylock_KBI(vm_page_t m, const char *file, int line) 2743{ 2744 2745 return (mtx_trylock_flags_(vm_page_lockptr(m), 0, file, line)); 2746} 2747 2748#if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) 2749void 2750vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line) 2751{ 2752 2753 mtx_assert_(vm_page_lockptr(m), a, file, line); 2754} 2755#endif 2756 2757int so_zerocp_fullpage = 0; 2758 2759/* 2760 * Replace the given page with a copy. The copied page assumes 2761 * the portion of the given page's "wire_count" that is not the 2762 * responsibility of this copy-on-write mechanism. 2763 * 2764 * The object containing the given page must have a non-zero 2765 * paging-in-progress count and be locked. 2766 */ 2767void 2768vm_page_cowfault(vm_page_t m) 2769{ 2770 vm_page_t mnew; 2771 vm_object_t object; 2772 vm_pindex_t pindex; 2773 2774 mtx_assert(&vm_page_queue_mtx, MA_NOTOWNED); 2775 vm_page_lock_assert(m, MA_OWNED); 2776 object = m->object; 2777 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 2778 KASSERT(object->paging_in_progress != 0, 2779 ("vm_page_cowfault: object %p's paging-in-progress count is zero.", 2780 object)); 2781 pindex = m->pindex; 2782 2783 retry_alloc: 2784 pmap_remove_all(m); 2785 vm_page_remove(m); 2786 mnew = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY); 2787 if (mnew == NULL) { 2788 vm_page_insert(m, object, pindex); 2789 vm_page_unlock(m); 2790 VM_OBJECT_UNLOCK(object); 2791 VM_WAIT; 2792 VM_OBJECT_LOCK(object); 2793 if (m == vm_page_lookup(object, pindex)) { 2794 vm_page_lock(m); 2795 goto retry_alloc; 2796 } else { 2797 /* 2798 * Page disappeared during the wait. 2799 */ 2800 return; 2801 } 2802 } 2803 2804 if (m->cow == 0) { 2805 /* 2806 * check to see if we raced with an xmit complete when 2807 * waiting to allocate a page. If so, put things back 2808 * the way they were 2809 */ 2810 vm_page_unlock(m); 2811 vm_page_lock(mnew); 2812 vm_page_free(mnew); 2813 vm_page_unlock(mnew); 2814 vm_page_insert(m, object, pindex); 2815 } else { /* clear COW & copy page */ 2816 if (!so_zerocp_fullpage) 2817 pmap_copy_page(m, mnew); 2818 mnew->valid = VM_PAGE_BITS_ALL; 2819 vm_page_dirty(mnew); 2820 mnew->wire_count = m->wire_count - m->cow; 2821 m->wire_count = m->cow; 2822 vm_page_unlock(m); 2823 } 2824} 2825 2826void 2827vm_page_cowclear(vm_page_t m) 2828{ 2829 2830 vm_page_lock_assert(m, MA_OWNED); 2831 if (m->cow) { 2832 m->cow--; 2833 /* 2834 * let vm_fault add back write permission lazily 2835 */ 2836 } 2837 /* 2838 * sf_buf_free() will free the page, so we needn't do it here 2839 */ 2840} 2841 2842int 2843vm_page_cowsetup(vm_page_t m) 2844{ 2845 2846 vm_page_lock_assert(m, MA_OWNED); 2847 if ((m->flags & PG_FICTITIOUS) != 0 || 2848 (m->oflags & VPO_UNMANAGED) != 0 || 2849 m->cow == USHRT_MAX - 1 || !VM_OBJECT_TRYLOCK(m->object)) 2850 return (EBUSY); 2851 m->cow++; 2852 pmap_remove_write(m); 2853 VM_OBJECT_UNLOCK(m->object); 2854 return (0); 2855} 2856 2857#ifdef INVARIANTS 2858void 2859vm_page_object_lock_assert(vm_page_t m) 2860{ 2861 2862 /* 2863 * Certain of the page's fields may only be modified by the 2864 * holder of the containing object's lock or the setter of the 2865 * page's VPO_BUSY flag. Unfortunately, the setter of the 2866 * VPO_BUSY flag is not recorded, and thus cannot be checked 2867 * here. 2868 */ 2869 if (m->object != NULL && (m->oflags & VPO_BUSY) == 0) 2870 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2871} 2872#endif 2873 2874#include "opt_ddb.h" 2875#ifdef DDB 2876#include <sys/kernel.h> 2877 2878#include <ddb/ddb.h> 2879 2880DB_SHOW_COMMAND(page, vm_page_print_page_info) 2881{ 2882 db_printf("cnt.v_free_count: %d\n", cnt.v_free_count); 2883 db_printf("cnt.v_cache_count: %d\n", cnt.v_cache_count); 2884 db_printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count); 2885 db_printf("cnt.v_active_count: %d\n", cnt.v_active_count); 2886 db_printf("cnt.v_wire_count: %d\n", cnt.v_wire_count); 2887 db_printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved); 2888 db_printf("cnt.v_free_min: %d\n", cnt.v_free_min); 2889 db_printf("cnt.v_free_target: %d\n", cnt.v_free_target); 2890 db_printf("cnt.v_cache_min: %d\n", cnt.v_cache_min); 2891 db_printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target); 2892} 2893 2894DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) 2895{ 2896 2897 db_printf("PQ_FREE:"); 2898 db_printf(" %d", cnt.v_free_count); 2899 db_printf("\n"); 2900 2901 db_printf("PQ_CACHE:"); 2902 db_printf(" %d", cnt.v_cache_count); 2903 db_printf("\n"); 2904 2905 db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n", 2906 *vm_page_queues[PQ_ACTIVE].cnt, 2907 *vm_page_queues[PQ_INACTIVE].cnt); 2908} 2909 2910DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo) 2911{ 2912 vm_page_t m; 2913 boolean_t phys; 2914 2915 if (!have_addr) { 2916 db_printf("show pginfo addr\n"); 2917 return; 2918 } 2919 2920 phys = strchr(modif, 'p') != NULL; 2921 if (phys) 2922 m = PHYS_TO_VM_PAGE(addr); 2923 else 2924 m = (vm_page_t)addr; 2925 db_printf( 2926 "page %p obj %p pidx 0x%jx phys 0x%jx q %d hold %d wire %d\n" 2927 " af 0x%x of 0x%x f 0x%x act %d busy %d valid 0x%x dirty 0x%x\n", 2928 m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr, 2929 m->queue, m->hold_count, m->wire_count, m->aflags, m->oflags, 2930 m->flags, m->act_count, m->busy, m->valid, m->dirty); 2931} 2932#endif /* DDB */ 2933