vm_page.c revision 232066
1193326Sed/*- 2193326Sed * Copyright (c) 1991 Regents of the University of California. 3193326Sed * All rights reserved. 4193326Sed * Copyright (c) 1998 Matthew Dillon. All Rights Reserved. 5193326Sed * 6193326Sed * This code is derived from software contributed to Berkeley by 7193326Sed * The Mach Operating System project at Carnegie-Mellon University. 8193326Sed * 9193326Sed * Redistribution and use in source and binary forms, with or without 10193326Sed * modification, are permitted provided that the following conditions 11193326Sed * are met: 12193326Sed * 1. Redistributions of source code must retain the above copyright 13193326Sed * notice, this list of conditions and the following disclaimer. 14212904Sdim * 2. Redistributions in binary form must reproduce the above copyright 15221345Sdim * notice, this list of conditions and the following disclaimer in the 16193326Sed * documentation and/or other materials provided with the distribution. 17193326Sed * 4. Neither the name of the University nor the names of its contributors 18263508Sdim * may be used to endorse or promote products derived from this software 19198092Srdivacky * without specific prior written permission. 20249423Sdim * 21239462Sdim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22212904Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23212904Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24193326Sed * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25221345Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26193326Sed * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27193326Sed * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28198092Srdivacky * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29198092Srdivacky * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30193326Sed * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31249423Sdim * SUCH DAMAGE. 32249423Sdim * 33249423Sdim * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 34249423Sdim */ 35249423Sdim 36249423Sdim/*- 37249423Sdim * Copyright (c) 1987, 1990 Carnegie-Mellon University. 38249423Sdim * All rights reserved. 39249423Sdim * 40249423Sdim * Authors: Avadis Tevanian, Jr., Michael Wayne Young 41249423Sdim * 42249423Sdim * Permission to use, copy, modify and distribute this software and 43263508Sdim * its documentation is hereby granted, provided that both the copyright 44234353Sdim * notice and this permission notice appear in all copies of the 45201361Srdivacky * software, derivative works or modified versions, and any portions 46193326Sed * thereof, and that both notices appear in supporting documentation. 47198092Srdivacky * 48193326Sed * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 49193326Sed * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 50212904Sdim * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 51193326Sed * 52224145Sdim * Carnegie Mellon requests users of this software to return to 53224145Sdim * 54224145Sdim * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 55224145Sdim * School of Computer Science 56224145Sdim * Carnegie Mellon University 57224145Sdim * Pittsburgh PA 15213-3890 58212904Sdim * 59193326Sed * any improvements or extensions that they make and grant Carnegie the 60193326Sed * rights to redistribute these changes. 61234353Sdim */ 62234353Sdim 63234353Sdim/* 64234353Sdim * GENERAL RULES ON VM_PAGE MANIPULATION 65239462Sdim * 66239462Sdim * - a pageq mutex is required when adding or removing a page from a 67234353Sdim * page queue (vm_page_queue[]), regardless of other mutexes or the 68234353Sdim * busy state of a page. 69234353Sdim * 70234353Sdim * - The object mutex is held when inserting or removing 71234353Sdim * pages from an object (vm_page_insert() or vm_page_remove()). 72234353Sdim * 73234353Sdim */ 74234353Sdim 75234353Sdim/* 76234353Sdim * Resident memory management module. 77239462Sdim */ 78234353Sdim 79234353Sdim#include <sys/cdefs.h> 80234353Sdim__FBSDID("$FreeBSD: stable/9/sys/vm/vm_page.c 232066 2012-02-23 19:20:36Z kmacy $"); 81234353Sdim 82239462Sdim#include "opt_vm.h" 83234353Sdim 84234353Sdim#include <sys/param.h> 85234353Sdim#include <sys/systm.h> 86234353Sdim#include <sys/lock.h> 87239462Sdim#include <sys/kernel.h> 88239462Sdim#include <sys/limits.h> 89239462Sdim#include <sys/malloc.h> 90239462Sdim#include <sys/msgbuf.h> 91239462Sdim#include <sys/mutex.h> 92239462Sdim#include <sys/proc.h> 93239462Sdim#include <sys/sysctl.h> 94239462Sdim#include <sys/vmmeter.h> 95239462Sdim#include <sys/vnode.h> 96239462Sdim 97239462Sdim#include <vm/vm.h> 98239462Sdim#include <vm/pmap.h> 99239462Sdim#include <vm/vm_param.h> 100239462Sdim#include <vm/vm_kern.h> 101239462Sdim#include <vm/vm_object.h> 102239462Sdim#include <vm/vm_page.h> 103239462Sdim#include <vm/vm_pageout.h> 104239462Sdim#include <vm/vm_pager.h> 105239462Sdim#include <vm/vm_phys.h> 106239462Sdim#include <vm/vm_reserv.h> 107239462Sdim#include <vm/vm_extern.h> 108239462Sdim#include <vm/uma.h> 109239462Sdim#include <vm/uma_int.h> 110239462Sdim 111239462Sdim#include <machine/md_var.h> 112239462Sdim 113263508Sdim/* 114239462Sdim * Associated with page of user-allocatable memory is a 115239462Sdim * page structure. 116239462Sdim */ 117239462Sdim 118239462Sdimstruct vpgqueues vm_page_queues[PQ_COUNT]; 119239462Sdimstruct vpglocks vm_page_queue_lock; 120239462Sdimstruct vpglocks vm_page_queue_free_lock; 121239462Sdim 122239462Sdimstruct vpglocks pa_lock[PA_LOCK_COUNT]; 123239462Sdim 124193326Sedvm_page_t vm_page_array = 0; 125193326Sedint vm_page_array_size = 0; 126193326Sedlong first_page = 0; 127193326Sedint vm_page_zero_count = 0; 128193326Sed 129193326Sedstatic int boot_pages = UMA_BOOT_PAGES; 130193326SedTUNABLE_INT("vm.boot_pages", &boot_pages); 131193326SedSYSCTL_INT(_vm, OID_AUTO, boot_pages, CTLFLAG_RD, &boot_pages, 0, 132251662Sdim "number of pages allocated for bootstrapping the VM system"); 133212904Sdim 134218893Sdimstatic int pa_tryrelock_restart; 135221345SdimSYSCTL_INT(_vm, OID_AUTO, tryrelock_restart, CTLFLAG_RD, 136234353Sdim &pa_tryrelock_restart, 0, "Number of tryrelock restarts"); 137226633Sdim 138226633Sdimstatic uma_zone_t fakepg_zone; 139199990Srdivacky 140199990Srdivackystatic void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); 141199990Srdivackystatic void vm_page_queue_remove(int queue, vm_page_t m); 142212904Sdimstatic void vm_page_enqueue(int queue, vm_page_t m); 143199990Srdivackystatic void vm_page_init_fakepg(void *dummy); 144199990Srdivacky 145207619SrdivackySYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init_fakepg, NULL); 146199990Srdivacky 147199990Srdivackystatic void 148199990Srdivackyvm_page_init_fakepg(void *dummy) 149199990Srdivacky{ 150199990Srdivacky 151199990Srdivacky fakepg_zone = uma_zcreate("fakepg", sizeof(struct vm_page), NULL, NULL, 152199990Srdivacky NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM); 153199990Srdivacky} 154199990Srdivacky 155199990Srdivacky/* Make sure that u_long is at least 64 bits when PAGE_SIZE is 32K. */ 156199990Srdivacky#if PAGE_SIZE == 32768 157199990Srdivacky#ifdef CTASSERT 158199990SrdivackyCTASSERT(sizeof(u_long) >= 8); 159234353Sdim#endif 160212904Sdim#endif 161199990Srdivacky 162210299Sed/* 163210299Sed * Try to acquire a physical address lock while a pmap is locked. If we 164221345Sdim * fail to trylock we unlock and lock the pmap directly and cache the 165221345Sdim * locked pa in *locked. The caller should then restart their loop in case 166221345Sdim * the virtual to physical mapping has changed. 167221345Sdim */ 168212904Sdimint 169221345Sdimvm_page_pa_tryrelock(pmap_t pmap, vm_paddr_t pa, vm_paddr_t *locked) 170221345Sdim{ 171221345Sdim vm_paddr_t lockpa; 172221345Sdim 173199990Srdivacky lockpa = *locked; 174199990Srdivacky *locked = pa; 175212904Sdim if (lockpa) { 176199990Srdivacky PA_LOCK_ASSERT(lockpa, MA_OWNED); 177199990Srdivacky if (PA_LOCKPTR(pa) == PA_LOCKPTR(lockpa)) 178207619Srdivacky return (0); 179207619Srdivacky PA_UNLOCK(lockpa); 180212904Sdim } 181198092Srdivacky if (PA_TRYLOCK(pa)) 182201361Srdivacky return (0); 183201361Srdivacky PMAP_UNLOCK(pmap); 184201361Srdivacky atomic_add_int(&pa_tryrelock_restart, 1); 185201361Srdivacky PA_LOCK(pa); 186201361Srdivacky PMAP_LOCK(pmap); 187201361Srdivacky return (EAGAIN); 188199990Srdivacky} 189199990Srdivacky 190199990Srdivacky/* 191199990Srdivacky * vm_set_page_size: 192199990Srdivacky * 193199990Srdivacky * Sets the page size, perhaps based upon the memory 194198092Srdivacky * size. Must be called before any use of page-size 195199990Srdivacky * dependent functions. 196199990Srdivacky */ 197199990Srdivackyvoid 198199990Srdivackyvm_set_page_size(void) 199199990Srdivacky{ 200199990Srdivacky if (cnt.v_page_size == 0) 201199990Srdivacky cnt.v_page_size = PAGE_SIZE; 202199990Srdivacky if (((cnt.v_page_size - 1) & cnt.v_page_size) != 0) 203199990Srdivacky panic("vm_set_page_size: page size not a power of two"); 204199990Srdivacky} 205199990Srdivacky 206199990Srdivacky/* 207199990Srdivacky * vm_page_blacklist_lookup: 208199990Srdivacky * 209193326Sed * See if a physical address in this page has been listed 210199482Srdivacky * in the blacklist tunable. Entries in the tunable are 211193326Sed * separated by spaces or commas. If an invalid integer is 212202379Srdivacky * encountered then the rest of the string is skipped. 213226633Sdim */ 214239462Sdimstatic int 215226633Sdimvm_page_blacklist_lookup(char *list, vm_paddr_t pa) 216234353Sdim{ 217226633Sdim vm_paddr_t bad; 218226633Sdim char *cp, *pos; 219226633Sdim 220226633Sdim for (pos = list; *pos != '\0'; pos = cp) { 221226633Sdim bad = strtoq(pos, &cp, 0); 222226633Sdim if (*cp != '\0') { 223226633Sdim if (*cp == ' ' || *cp == ',') { 224226633Sdim cp++; 225226633Sdim if (cp == pos) 226226633Sdim continue; 227226633Sdim } else 228226633Sdim break; 229226633Sdim } 230226633Sdim if (pa == trunc_page(bad)) 231226633Sdim return (1); 232234353Sdim } 233226633Sdim return (0); 234226633Sdim} 235226633Sdim 236226633Sdim/* 237234353Sdim * vm_page_startup: 238226633Sdim * 239226633Sdim * Initializes the resident memory module. 240263508Sdim * 241263508Sdim * Allocates memory for the page cells, and 242263508Sdim * for the object/offset-to-page hash table headers. 243226633Sdim * Each page cell is initialized and placed on the free list. 244226633Sdim */ 245226633Sdimvm_offset_t 246226633Sdimvm_page_startup(vm_offset_t vaddr) 247226633Sdim{ 248226633Sdim vm_offset_t mapped; 249226633Sdim vm_paddr_t page_range; 250226633Sdim vm_paddr_t new_end; 251193326Sed int i; 252199482Srdivacky vm_paddr_t pa; 253203955Srdivacky vm_paddr_t last_pa; 254212904Sdim char *list; 255193326Sed 256198893Srdivacky /* the biggest memory array is the second group of pages */ 257198092Srdivacky vm_paddr_t end; 258198092Srdivacky vm_paddr_t biggestsize; 259198092Srdivacky vm_paddr_t low_water, high_water; 260198092Srdivacky int biggestone; 261198092Srdivacky 262199482Srdivacky biggestsize = 0; 263199482Srdivacky biggestone = 0; 264212904Sdim vaddr = round_page(vaddr); 265199482Srdivacky 266198092Srdivacky for (i = 0; phys_avail[i + 1]; i += 2) { 267193326Sed phys_avail[i] = round_page(phys_avail[i]); 268193326Sed phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 269193326Sed } 270193326Sed 271198092Srdivacky low_water = phys_avail[0]; 272198092Srdivacky high_water = phys_avail[1]; 273193326Sed 274193326Sed for (i = 0; phys_avail[i + 1]; i += 2) { 275193326Sed vm_paddr_t size = phys_avail[i + 1] - phys_avail[i]; 276193326Sed 277193326Sed if (size > biggestsize) { 278193326Sed biggestone = i; 279193326Sed biggestsize = size; 280193326Sed } 281193326Sed if (phys_avail[i] < low_water) 282193326Sed low_water = phys_avail[i]; 283193326Sed if (phys_avail[i + 1] > high_water) 284193326Sed high_water = phys_avail[i + 1]; 285199482Srdivacky } 286212904Sdim 287193326Sed#ifdef XEN 288193326Sed low_water = 0; 289193326Sed#endif 290193326Sed 291193326Sed end = phys_avail[biggestone+1]; 292193326Sed 293193326Sed /* 294193326Sed * Initialize the locks. 295193326Sed */ 296198092Srdivacky mtx_init(&vm_page_queue_mtx, "vm page queue mutex", NULL, MTX_DEF | 297193326Sed MTX_RECURSE); 298193326Sed mtx_init(&vm_page_queue_free_mtx, "vm page queue free mutex", NULL, 299193326Sed MTX_DEF); 300198893Srdivacky 301198092Srdivacky /* Setup page locks. */ 302198893Srdivacky for (i = 0; i < PA_LOCK_COUNT; i++) 303198893Srdivacky mtx_init(&pa_lock[i].data, "page lock", NULL, MTX_DEF); 304198893Srdivacky 305199482Srdivacky /* 306198893Srdivacky * Initialize the queue headers for the hold queue, the active queue, 307198893Srdivacky * and the inactive queue. 308234353Sdim */ 309234353Sdim for (i = 0; i < PQ_COUNT; i++) 310234353Sdim TAILQ_INIT(&vm_page_queues[i].pl); 311234353Sdim vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count; 312234353Sdim vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count; 313221345Sdim vm_page_queues[PQ_HOLD].cnt = &cnt.v_active_count; 314221345Sdim 315221345Sdim /* 316221345Sdim * Allocate memory for use when boot strapping the kernel memory 317221345Sdim * allocator. 318221345Sdim */ 319221345Sdim new_end = end - (boot_pages * UMA_SLAB_SIZE); 320234353Sdim new_end = trunc_page(new_end); 321221345Sdim mapped = pmap_map(&vaddr, new_end, end, 322221345Sdim VM_PROT_READ | VM_PROT_WRITE); 323221345Sdim bzero((void *)mapped, end - new_end); 324221345Sdim uma_startup((void *)mapped, boot_pages); 325221345Sdim 326221345Sdim#if defined(__amd64__) || defined(__i386__) || defined(__arm__) || \ 327198893Srdivacky defined(__mips__) 328221345Sdim /* 329218893Sdim * Allocate a bitmap to indicate that a random physical page 330218893Sdim * needs to be included in a minidump. 331218893Sdim * 332218893Sdim * The amd64 port needs this to indicate which direct map pages 333218893Sdim * need to be dumped, via calls to dump_add_page()/dump_drop_page(). 334199482Srdivacky * 335199482Srdivacky * However, i386 still needs this workspace internally within the 336212904Sdim * minidump code. In theory, they are not needed on i386, but are 337199482Srdivacky * included should the sf_buf code decide to use them. 338212904Sdim */ 339193326Sed last_pa = 0; 340193326Sed for (i = 0; dump_avail[i + 1] != 0; i += 2) 341193326Sed if (dump_avail[i + 1] > last_pa) 342193326Sed last_pa = dump_avail[i + 1]; 343193326Sed page_range = last_pa / PAGE_SIZE; 344243830Sdim vm_page_dump_size = round_page(roundup2(page_range, NBBY) / NBBY); 345243830Sdim new_end -= vm_page_dump_size; 346193326Sed vm_page_dump = (void *)(uintptr_t)pmap_map(&vaddr, new_end, 347193326Sed new_end + vm_page_dump_size, VM_PROT_READ | VM_PROT_WRITE); 348199482Srdivacky bzero((void *)vm_page_dump, vm_page_dump_size); 349199482Srdivacky#endif 350199482Srdivacky#ifdef __amd64__ 351199482Srdivacky /* 352200583Srdivacky * Request that the physical pages underlying the message buffer be 353193326Sed * included in a crash dump. Since the message buffer is accessed 354208600Srdivacky * through the direct map, they are not automatically included. 355243830Sdim */ 356208600Srdivacky pa = DMAP_TO_PHYS((vm_offset_t)msgbufp->msg_ptr); 357208600Srdivacky last_pa = pa + round_page(msgbufsize); 358208600Srdivacky while (pa < last_pa) { 359193326Sed dump_add_page(pa); 360193326Sed pa += PAGE_SIZE; 361198092Srdivacky } 362193326Sed#endif 363193326Sed /* 364193326Sed * Compute the number of pages of memory that will be available for 365221345Sdim * use (taking into account the overhead of a page structure per 366221345Sdim * page). 367221345Sdim */ 368221345Sdim first_page = low_water / PAGE_SIZE; 369221345Sdim#ifdef VM_PHYSSEG_SPARSE 370221345Sdim page_range = 0; 371221345Sdim for (i = 0; phys_avail[i + 1] != 0; i += 2) 372221345Sdim page_range += atop(phys_avail[i + 1] - phys_avail[i]); 373221345Sdim#elif defined(VM_PHYSSEG_DENSE) 374221345Sdim page_range = high_water / PAGE_SIZE - first_page; 375221345Sdim#else 376221345Sdim#error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined." 377221345Sdim#endif 378221345Sdim end = new_end; 379226633Sdim 380221345Sdim /* 381221345Sdim * Reserve an unmapped guard page to trap access to vm_page_array[-1]. 382221345Sdim */ 383221345Sdim vaddr += PAGE_SIZE; 384221345Sdim 385221345Sdim /* 386221345Sdim * Initialize the mem entry structures now, and put them in the free 387221345Sdim * queue. 388226633Sdim */ 389221345Sdim new_end = trunc_page(end - page_range * sizeof(struct vm_page)); 390226633Sdim mapped = pmap_map(&vaddr, new_end, end, 391221345Sdim VM_PROT_READ | VM_PROT_WRITE); 392221345Sdim vm_page_array = (vm_page_t) mapped; 393239462Sdim#if VM_NRESERVLEVEL > 0 394198092Srdivacky /* 395198092Srdivacky * Allocate memory for the reservation management system's data 396207619Srdivacky * structures. 397212904Sdim */ 398198092Srdivacky new_end = vm_reserv_startup(&vaddr, new_end, high_water); 399212904Sdim#endif 400198092Srdivacky#if defined(__amd64__) || defined(__mips__) 401201361Srdivacky /* 402201361Srdivacky * pmap_map on amd64 and mips can come out of the direct-map, not kvm 403234353Sdim * like i386, so the pages must be tracked for a crashdump to include 404239462Sdim * this data. This includes the vm_page_array and the early UMA 405234353Sdim * bootstrap pages. 406234353Sdim */ 407224145Sdim for (pa = new_end; pa < phys_avail[biggestone + 1]; pa += PAGE_SIZE) 408224145Sdim dump_add_page(pa); 409263508Sdim#endif 410263508Sdim phys_avail[biggestone + 1] = new_end; 411224145Sdim 412234353Sdim /* 413263508Sdim * Clear all of the page structures 414263508Sdim */ 415263508Sdim bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page)); 416263508Sdim for (i = 0; i < page_range; i++) 417263508Sdim vm_page_array[i].order = VM_NFREEORDER; 418263508Sdim vm_page_array_size = page_range; 419263508Sdim 420263508Sdim /* 421263508Sdim * Initialize the physical memory allocator. 422263508Sdim */ 423263508Sdim vm_phys_init(); 424234353Sdim 425263508Sdim /* 426201361Srdivacky * Add every available physical page that is not blacklisted to 427263508Sdim * the free lists. 428263508Sdim */ 429263508Sdim cnt.v_page_count = 0; 430263508Sdim cnt.v_free_count = 0; 431263508Sdim list = getenv("vm.blacklist"); 432263508Sdim for (i = 0; phys_avail[i + 1] != 0; i += 2) { 433263508Sdim pa = phys_avail[i]; 434234353Sdim last_pa = phys_avail[i + 1]; 435234353Sdim while (pa < last_pa) { 436207619Srdivacky if (list != NULL && 437234353Sdim vm_page_blacklist_lookup(list, pa)) 438207619Srdivacky printf("Skipping page with pa 0x%jx\n", 439201361Srdivacky (uintmax_t)pa); 440234353Sdim else 441207619Srdivacky vm_phys_add_page(pa); 442207619Srdivacky pa += PAGE_SIZE; 443239462Sdim } 444207619Srdivacky } 445207619Srdivacky freeenv(list); 446208600Srdivacky#if VM_NRESERVLEVEL > 0 447212904Sdim /* 448212904Sdim * Initialize the reservation management system. 449208600Srdivacky */ 450263508Sdim vm_reserv_init(); 451207619Srdivacky#endif 452207619Srdivacky return (vaddr); 453207619Srdivacky} 454207619Srdivacky 455207619Srdivacky 456201361SrdivackyCTASSERT(offsetof(struct vm_page, aflags) % sizeof(uint32_t) == 0); 457201361Srdivacky 458201361Srdivackyvoid 459201361Srdivackyvm_page_aflag_set(vm_page_t m, uint8_t bits) 460198092Srdivacky{ 461198092Srdivacky uint32_t *addr, val; 462198092Srdivacky 463201361Srdivacky /* 464239462Sdim * The PGA_WRITEABLE flag can only be set if the page is managed and 465198092Srdivacky * VPO_BUSY. Currently, this flag is only set by pmap_enter(). 466198092Srdivacky */ 467239462Sdim KASSERT((bits & PGA_WRITEABLE) == 0 || 468198092Srdivacky (m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == VPO_BUSY, 469221345Sdim ("PGA_WRITEABLE and !VPO_BUSY")); 470234353Sdim 471221345Sdim /* 472221345Sdim * We want to use atomic updates for m->aflags, which is a 473221345Sdim * byte wide. Not all architectures provide atomic operations 474239462Sdim * on the single-byte destination. Punt and access the whole 475198092Srdivacky * 4-byte word with an atomic update. Parallel non-atomic 476206084Srdivacky * updates to the fields included in the update by proximity 477239462Sdim * are handled properly by atomics. 478239462Sdim */ 479198092Srdivacky addr = (void *)&m->aflags; 480198092Srdivacky MPASS(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0); 481198092Srdivacky val = bits; 482198092Srdivacky#if BYTE_ORDER == BIG_ENDIAN 483198092Srdivacky val <<= 24; 484198092Srdivacky#endif 485198092Srdivacky atomic_set_32(addr, val); 486193326Sed} 487221345Sdim 488221345Sdimvoid 489221345Sdimvm_page_aflag_clear(vm_page_t m, uint8_t bits) 490234353Sdim{ 491221345Sdim uint32_t *addr, val; 492221345Sdim 493221345Sdim /* 494221345Sdim * The PGA_REFERENCED flag can only be cleared if the object 495221345Sdim * containing the page is locked. 496221345Sdim */ 497221345Sdim KASSERT((bits & PGA_REFERENCED) == 0 || VM_OBJECT_LOCKED(m->object), 498221345Sdim ("PGA_REFERENCED and !VM_OBJECT_LOCKED")); 499221345Sdim 500221345Sdim /* 501221345Sdim * See the comment in vm_page_aflag_set(). 502221345Sdim */ 503221345Sdim addr = (void *)&m->aflags; 504239462Sdim MPASS(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0); 505239462Sdim val = bits; 506239462Sdim#if BYTE_ORDER == BIG_ENDIAN 507239462Sdim val <<= 24; 508243830Sdim#endif 509243830Sdim atomic_clear_32(addr, val); 510243830Sdim} 511239462Sdim 512239462Sdimvoid 513239462Sdimvm_page_reference(vm_page_t m) 514239462Sdim{ 515239462Sdim 516239462Sdim vm_page_aflag_set(m, PGA_REFERENCED); 517239462Sdim} 518239462Sdim 519239462Sdimvoid 520239462Sdimvm_page_busy(vm_page_t m) 521239462Sdim{ 522239462Sdim 523239462Sdim VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 524239462Sdim KASSERT((m->oflags & VPO_BUSY) == 0, 525239462Sdim ("vm_page_busy: page already busy!!!")); 526239462Sdim m->oflags |= VPO_BUSY; 527239462Sdim} 528239462Sdim 529243830Sdim/* 530243830Sdim * vm_page_flash: 531243830Sdim * 532243830Sdim * wakeup anyone waiting for the page. 533243830Sdim */ 534239462Sdimvoid 535239462Sdimvm_page_flash(vm_page_t m) 536239462Sdim{ 537239462Sdim 538239462Sdim VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 539239462Sdim if (m->oflags & VPO_WANTED) { 540239462Sdim m->oflags &= ~VPO_WANTED; 541239462Sdim wakeup(m); 542239462Sdim } 543239462Sdim} 544243830Sdim 545243830Sdim/* 546243830Sdim * vm_page_wakeup: 547243830Sdim * 548243830Sdim * clear the VPO_BUSY flag and wakeup anyone waiting for the 549243830Sdim * page. 550243830Sdim * 551243830Sdim */ 552239462Sdimvoid 553239462Sdimvm_page_wakeup(vm_page_t m) 554239462Sdim{ 555239462Sdim 556239462Sdim VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 557239462Sdim KASSERT(m->oflags & VPO_BUSY, ("vm_page_wakeup: page not busy!!!")); 558243830Sdim m->oflags &= ~VPO_BUSY; 559243830Sdim vm_page_flash(m); 560243830Sdim} 561243830Sdim 562243830Sdimvoid 563243830Sdimvm_page_io_start(vm_page_t m) 564243830Sdim{ 565243830Sdim 566243830Sdim VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 567243830Sdim m->busy++; 568243830Sdim} 569243830Sdim 570243830Sdimvoid 571243830Sdimvm_page_io_finish(vm_page_t m) 572243830Sdim{ 573221345Sdim 574221345Sdim VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 575221345Sdim KASSERT(m->busy > 0, ("vm_page_io_finish: page %p is not busy", m)); 576221345Sdim m->busy--; 577243830Sdim if (m->busy == 0) 578243830Sdim vm_page_flash(m); 579243830Sdim} 580221345Sdim 581221345Sdim/* 582221345Sdim * Keep page from being freed by the page daemon 583221345Sdim * much of the same effect as wiring, except much lower 584221345Sdim * overhead and should be used only for *very* temporary 585221345Sdim * holding ("wiring"). 586221345Sdim */ 587221345Sdimvoid 588221345Sdimvm_page_hold(vm_page_t mem) 589221345Sdim{ 590221345Sdim 591221345Sdim vm_page_lock_assert(mem, MA_OWNED); 592221345Sdim mem->hold_count++; 593221345Sdim} 594221345Sdim 595221345Sdimvoid 596221345Sdimvm_page_unhold(vm_page_t mem) 597221345Sdim{ 598221345Sdim 599221345Sdim vm_page_lock_assert(mem, MA_OWNED); 600221345Sdim --mem->hold_count; 601221345Sdim KASSERT(mem->hold_count >= 0, ("vm_page_unhold: hold count < 0!!!")); 602221345Sdim if (mem->hold_count == 0 && mem->queue == PQ_HOLD) 603221345Sdim vm_page_free_toq(mem); 604221345Sdim} 605221345Sdim 606221345Sdim/* 607221345Sdim * vm_page_unhold_pages: 608221345Sdim * 609221345Sdim * Unhold each of the pages that is referenced by the given array. 610221345Sdim */ 611221345Sdimvoid 612221345Sdimvm_page_unhold_pages(vm_page_t *ma, int count) 613234353Sdim{ 614221345Sdim struct mtx *mtx, *new_mtx; 615221345Sdim 616221345Sdim mtx = NULL; 617221345Sdim for (; count != 0; count--) { 618221345Sdim /* 619221345Sdim * Avoid releasing and reacquiring the same page lock. 620221345Sdim */ 621221345Sdim new_mtx = vm_page_lockptr(*ma); 622221345Sdim if (mtx != new_mtx) { 623221345Sdim if (mtx != NULL) 624221345Sdim mtx_unlock(mtx); 625221345Sdim mtx = new_mtx; 626221345Sdim mtx_lock(mtx); 627221345Sdim } 628221345Sdim vm_page_unhold(*ma); 629221345Sdim ma++; 630221345Sdim } 631221345Sdim if (mtx != NULL) 632221345Sdim mtx_unlock(mtx); 633221345Sdim} 634221345Sdim 635221345Sdim/* 636221345Sdim * vm_page_getfake: 637221345Sdim * 638239462Sdim * Create a fictitious page with the specified physical address and 639239462Sdim * memory attribute. The memory attribute is the only the machine- 640239462Sdim * dependent aspect of a fictitious page that must be initialized. 641221345Sdim */ 642221345Sdimvm_page_t 643221345Sdimvm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr) 644221345Sdim{ 645243830Sdim vm_page_t m; 646224145Sdim 647224145Sdim m = uma_zalloc(fakepg_zone, M_WAITOK | M_ZERO); 648234353Sdim m->phys_addr = paddr; 649243830Sdim m->queue = PQ_NONE; 650221345Sdim /* Fictitious pages don't use "segind". */ 651221345Sdim m->flags = PG_FICTITIOUS; 652263508Sdim /* Fictitious pages don't use "order" or "pool". */ 653224145Sdim m->oflags = VPO_BUSY | VPO_UNMANAGED; 654221345Sdim m->wire_count = 1; 655221345Sdim pmap_page_set_memattr(m, memattr); 656234353Sdim return (m); 657221345Sdim} 658221345Sdim 659221345Sdim/* 660221345Sdim * vm_page_putfake: 661221345Sdim * 662221345Sdim * Release a fictitious page. 663221345Sdim */ 664249423Sdimvoid 665249423Sdimvm_page_putfake(vm_page_t m) 666249423Sdim{ 667224145Sdim 668263508Sdim KASSERT((m->flags & PG_FICTITIOUS) != 0, 669263508Sdim ("vm_page_putfake: bad page %p", m)); 670263508Sdim uma_zfree(fakepg_zone, m); 671263508Sdim} 672263508Sdim 673263508Sdim/* 674263508Sdim * vm_page_updatefake: 675263508Sdim * 676263508Sdim * Update the given fictitious page to the specified physical address and 677263508Sdim * memory attribute. 678221345Sdim */ 679221345Sdimvoid 680224145Sdimvm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) 681263508Sdim{ 682234353Sdim 683234353Sdim KASSERT((m->flags & PG_FICTITIOUS) != 0, 684263508Sdim ("vm_page_updatefake: bad page %p", m)); 685234353Sdim m->phys_addr = paddr; 686224145Sdim pmap_page_set_memattr(m, memattr); 687224145Sdim} 688224145Sdim 689224145Sdim/* 690263508Sdim * vm_page_free: 691234353Sdim * 692221345Sdim * Free a page. 693221345Sdim */ 694221345Sdimvoid 695221345Sdimvm_page_free(vm_page_t m) 696221345Sdim{ 697221345Sdim 698221345Sdim m->flags &= ~PG_ZERO; 699221345Sdim vm_page_free_toq(m); 700243830Sdim} 701221345Sdim 702221345Sdim/* 703221345Sdim * vm_page_free_zero: 704221345Sdim * 705221345Sdim * Free a page to the zerod-pages queue 706221345Sdim */ 707221345Sdimvoid 708221345Sdimvm_page_free_zero(vm_page_t m) 709221345Sdim{ 710221345Sdim 711234353Sdim m->flags |= PG_ZERO; 712221345Sdim vm_page_free_toq(m); 713221345Sdim} 714221345Sdim 715221345Sdim/* 716221345Sdim * vm_page_sleep: 717221345Sdim * 718221345Sdim * Sleep and release the page and page queues locks. 719221345Sdim * 720221345Sdim * The object containing the given page must be locked. 721221345Sdim */ 722221345Sdimvoid 723221345Sdimvm_page_sleep(vm_page_t m, const char *msg) 724221345Sdim{ 725221345Sdim 726243830Sdim VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 727243830Sdim if (mtx_owned(&vm_page_queue_mtx)) 728243830Sdim vm_page_unlock_queues(); 729234353Sdim if (mtx_owned(vm_page_lockptr(m))) 730221345Sdim vm_page_unlock(m); 731221345Sdim 732221345Sdim /* 733221345Sdim * It's possible that while we sleep, the page will get 734221345Sdim * unbusied and freed. If we are holding the object 735221345Sdim * lock, we will assume we hold a reference to the object 736221345Sdim * such that even if m->object changes, we can re-lock 737234353Sdim * it. 738221345Sdim */ 739221345Sdim m->oflags |= VPO_WANTED; 740221345Sdim msleep(m, VM_OBJECT_MTX(m->object), PVM, msg, 0); 741221345Sdim} 742221345Sdim 743221345Sdim/* 744221345Sdim * vm_page_dirty: 745221345Sdim * 746221345Sdim * Set all bits in the page's dirty field. 747221345Sdim * 748221345Sdim * The object containing the specified page must be locked if the 749221345Sdim * call is made from the machine-independent layer. 750221345Sdim * 751221345Sdim * See vm_page_clear_dirty_mask(). 752221345Sdim */ 753221345Sdimvoid 754221345Sdimvm_page_dirty(vm_page_t m) 755221345Sdim{ 756221345Sdim 757221345Sdim KASSERT((m->flags & PG_CACHED) == 0, 758221345Sdim ("vm_page_dirty: page in cache!")); 759221345Sdim KASSERT(!VM_PAGE_IS_FREE(m), 760221345Sdim ("vm_page_dirty: page is free!")); 761234353Sdim KASSERT(m->valid == VM_PAGE_BITS_ALL, 762221345Sdim ("vm_page_dirty: page is invalid!")); 763221345Sdim m->dirty = VM_PAGE_BITS_ALL; 764221345Sdim} 765221345Sdim 766221345Sdim/* 767221345Sdim * vm_page_splay: 768221345Sdim * 769221345Sdim * Implements Sleator and Tarjan's top-down splay algorithm. Returns 770221345Sdim * the vm_page containing the given pindex. If, however, that 771221345Sdim * pindex is not found in the vm_object, returns a vm_page that is 772221345Sdim * adjacent to the pindex, coming before or after it. 773221345Sdim */ 774263508Sdimvm_page_t 775221345Sdimvm_page_splay(vm_pindex_t pindex, vm_page_t root) 776221345Sdim{ 777221345Sdim struct vm_page dummy; 778221345Sdim vm_page_t lefttreemax, righttreemin, y; 779221345Sdim 780221345Sdim if (root == NULL) 781221345Sdim return (root); 782221345Sdim lefttreemax = righttreemin = &dummy; 783221345Sdim for (;; root = y) { 784263508Sdim if (pindex < root->pindex) { 785263508Sdim if ((y = root->left) == NULL) 786221345Sdim break; 787221345Sdim if (pindex < y->pindex) { 788221345Sdim /* Rotate right. */ 789221345Sdim root->left = y->right; 790221345Sdim y->right = root; 791221345Sdim root = y; 792221345Sdim if ((y = root->left) == NULL) 793221345Sdim break; 794221345Sdim } 795221345Sdim /* Link into the new root's right tree. */ 796221345Sdim righttreemin->left = root; 797221345Sdim righttreemin = root; 798221345Sdim } else if (pindex > root->pindex) { 799221345Sdim if ((y = root->right) == NULL) 800221345Sdim break; 801221345Sdim if (pindex > y->pindex) { 802263508Sdim /* Rotate left. */ 803263508Sdim root->right = y->left; 804263508Sdim y->left = root; 805221345Sdim root = y; 806221345Sdim if ((y = root->right) == NULL) 807243830Sdim break; 808221345Sdim } 809221345Sdim /* Link into the new root's left tree. */ 810221345Sdim lefttreemax->right = root; 811221345Sdim lefttreemax = root; 812243830Sdim } else 813243830Sdim break; 814239462Sdim } 815221345Sdim /* Assemble the new root. */ 816243830Sdim lefttreemax->right = root->left; 817221345Sdim righttreemin->left = root->right; 818221345Sdim root->left = dummy.right; 819221345Sdim root->right = dummy.left; 820221345Sdim return (root); 821221345Sdim} 822221345Sdim 823221345Sdim/* 824221345Sdim * vm_page_insert: [ internal use only ] 825221345Sdim * 826221345Sdim * Inserts the given mem entry into the object and object list. 827221345Sdim * 828221345Sdim * The pagetables are not updated but will presumably fault the page 829221345Sdim * in if necessary, or if a kernel page the caller will at some point 830221345Sdim * enter the page into the kernel's pmap. We are not allowed to block 831221345Sdim * here so we *can't* do this anyway. 832221345Sdim * 833221345Sdim * The object and page must be locked. 834221345Sdim * This routine may not block. 835221345Sdim */ 836221345Sdimvoid 837221345Sdimvm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) 838239462Sdim{ 839243830Sdim vm_page_t root; 840243830Sdim 841243830Sdim VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 842243830Sdim if (m->object != NULL) 843243830Sdim panic("vm_page_insert: page already inserted"); 844239462Sdim 845239462Sdim /* 846263508Sdim * Record the object/offset pair in this page 847263508Sdim */ 848263508Sdim m->object = object; 849263508Sdim m->pindex = pindex; 850263508Sdim 851263508Sdim /* 852263508Sdim * Now link into the object's ordered list of backed pages. 853263508Sdim */ 854263508Sdim root = object->root; 855263508Sdim if (root == NULL) { 856239462Sdim m->left = NULL; 857221345Sdim m->right = NULL; 858243830Sdim TAILQ_INSERT_TAIL(&object->memq, m, listq); 859234353Sdim } else { 860221345Sdim root = vm_page_splay(pindex, root); 861221345Sdim if (pindex < root->pindex) { 862221345Sdim m->left = root->left; 863221345Sdim m->right = root; 864221345Sdim root->left = NULL; 865198092Srdivacky TAILQ_INSERT_BEFORE(root, m, listq); 866198092Srdivacky } else if (pindex == root->pindex) 867198092Srdivacky panic("vm_page_insert: offset already allocated"); 868193326Sed else { 869193326Sed m->right = root->right; 870198092Srdivacky m->left = root; 871198092Srdivacky root->right = NULL; 872198092Srdivacky TAILQ_INSERT_AFTER(&object->memq, root, m, listq); 873263508Sdim } 874263508Sdim } 875263508Sdim object->root = m; 876263508Sdim 877263508Sdim /* 878263508Sdim * show that the object has one more resident page. 879263508Sdim */ 880263508Sdim object->resident_page_count++; 881263508Sdim /* 882263508Sdim * Hold the vnode until the last page is released. 883198092Srdivacky */ 884198092Srdivacky if (object->resident_page_count == 1 && object->type == OBJT_VNODE) 885198092Srdivacky vhold((struct vnode *)object->handle); 886198092Srdivacky 887198092Srdivacky /* 888198092Srdivacky * Since we are inserting a new and possibly dirty page, 889198092Srdivacky * update the object's OBJ_MIGHTBEDIRTY flag. 890198092Srdivacky */ 891198092Srdivacky if (m->aflags & PGA_WRITEABLE) 892198092Srdivacky vm_object_set_writeable_dirty(object); 893193326Sed} 894193326Sed 895193326Sed/* 896193326Sed * vm_page_remove: 897193326Sed * NOTE: used by device pager as well -wfj 898193326Sed * 899193326Sed * Removes the given mem entry from the object/offset-page 900193326Sed * table and the object page list, but do not invalidate/terminate 901193326Sed * the backing store. 902193326Sed * 903193326Sed * The object and page must be locked. 904193326Sed * The underlying pmap entry (if any) is NOT removed here. 905193326Sed * This routine may not block. 906193326Sed */ 907193326Sedvoid 908193326Sedvm_page_remove(vm_page_t m) 909193326Sed{ 910193326Sed vm_object_t object; 911193326Sed vm_page_t next, prev, root; 912193326Sed 913193326Sed if ((m->oflags & VPO_UNMANAGED) == 0) 914193326Sed vm_page_lock_assert(m, MA_OWNED); 915212904Sdim if ((object = m->object) == NULL) 916193326Sed return; 917193326Sed VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 918194613Sed if (m->oflags & VPO_BUSY) { 919194613Sed m->oflags &= ~VPO_BUSY; 920201361Srdivacky vm_page_flash(m); 921194613Sed } 922201361Srdivacky 923201361Srdivacky /* 924201361Srdivacky * Now remove from the object's list of backed pages. 925201361Srdivacky */ 926201361Srdivacky if ((next = TAILQ_NEXT(m, listq)) != NULL && next->left == m) { 927201361Srdivacky /* 928201361Srdivacky * Since the page's successor in the list is also its parent 929201361Srdivacky * in the tree, its right subtree must be empty. 930201361Srdivacky */ 931201361Srdivacky next->left = m->left; 932201361Srdivacky KASSERT(m->right == NULL, 933201361Srdivacky ("vm_page_remove: page %p has right child", m)); 934201361Srdivacky } else if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL && 935201361Srdivacky prev->right == m) { 936201361Srdivacky /* 937201361Srdivacky * Since the page's predecessor in the list is also its parent 938201361Srdivacky * in the tree, its left subtree must be empty. 939201361Srdivacky */ 940201361Srdivacky KASSERT(m->left == NULL, 941201361Srdivacky ("vm_page_remove: page %p has left child", m)); 942201361Srdivacky prev->right = m->right; 943201361Srdivacky } else { 944201361Srdivacky if (m != object->root) 945201361Srdivacky vm_page_splay(m->pindex, object->root); 946194613Sed if (m->left == NULL) 947201361Srdivacky root = m->right; 948194613Sed else if (m->right == NULL) 949194613Sed root = m->left; 950194613Sed else { 951201361Srdivacky /* 952194613Sed * Move the page's successor to the root, because 953201361Srdivacky * pages are usually removed in ascending order. 954201361Srdivacky */ 955201361Srdivacky if (m->right != next) 956201361Srdivacky vm_page_splay(m->pindex, m->right); 957263508Sdim next->left = m->left; 958201361Srdivacky root = next; 959201361Srdivacky } 960201361Srdivacky object->root = root; 961194613Sed } 962194613Sed TAILQ_REMOVE(&object->memq, m, listq); 963226633Sdim 964226633Sdim /* 965226633Sdim * And show that the object has one fewer resident page. 966226633Sdim */ 967226633Sdim object->resident_page_count--; 968226633Sdim /* 969226633Sdim * The vnode may now be recycled. 970226633Sdim */ 971226633Sdim if (object->resident_page_count == 0 && object->type == OBJT_VNODE) 972226633Sdim vdrop((struct vnode *)object->handle); 973226633Sdim 974234353Sdim m->object = NULL; 975234353Sdim} 976234353Sdim 977234353Sdim/* 978234353Sdim * vm_page_lookup: 979234353Sdim * 980234353Sdim * Returns the page associated with the object/offset 981226633Sdim * pair specified; if none is found, NULL is returned. 982226633Sdim * 983226633Sdim * The object must be locked. 984226633Sdim * This routine may not block. 985226633Sdim * This is a critical path routine 986226633Sdim */ 987226633Sdimvm_page_t 988226633Sdimvm_page_lookup(vm_object_t object, vm_pindex_t pindex) 989226633Sdim{ 990226633Sdim vm_page_t m; 991226633Sdim 992234353Sdim VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 993234353Sdim if ((m = object->root) != NULL && m->pindex != pindex) { 994234353Sdim m = vm_page_splay(pindex, m); 995234353Sdim if ((object->root = m)->pindex != pindex) 996234353Sdim m = NULL; 997234353Sdim } 998234353Sdim return (m); 999234353Sdim} 1000234353Sdim 1001193326Sed/* 1002193326Sed * vm_page_find_least: 1003193326Sed * 1004193326Sed * Returns the page associated with the object with least pindex 1005193326Sed * greater than or equal to the parameter pindex, or NULL. 1006193326Sed * 1007193326Sed * The object must be locked. 1008193326Sed * The routine may not block. 1009193326Sed */ 1010199512Srdivackyvm_page_t 1011199512Srdivackyvm_page_find_least(vm_object_t object, vm_pindex_t pindex) 1012234353Sdim{ 1013193326Sed vm_page_t m; 1014193326Sed 1015199512Srdivacky VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 1016193326Sed if ((m = TAILQ_FIRST(&object->memq)) != NULL) { 1017193326Sed if (m->pindex < pindex) { 1018199512Srdivacky m = vm_page_splay(pindex, object->root); 1019199512Srdivacky if ((object->root = m)->pindex < pindex) 1020193326Sed m = TAILQ_NEXT(m, listq); 1021193326Sed } 1022193326Sed } 1023198092Srdivacky return (m); 1024193326Sed} 1025193326Sed 1026193326Sed/* 1027263508Sdim * Returns the given page's successor (by pindex) within the object if it is 1028193326Sed * resident; if none is found, NULL is returned. 1029193326Sed * 1030193326Sed * The object must be locked. 1031193326Sed */ 1032193326Sedvm_page_t 1033198092Srdivackyvm_page_next(vm_page_t m) 1034198092Srdivacky{ 1035193326Sed vm_page_t next; 1036263508Sdim 1037263508Sdim VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 1038263508Sdim if ((next = TAILQ_NEXT(m, listq)) != NULL && 1039226633Sdim next->pindex != m->pindex + 1) 1040263508Sdim next = NULL; 1041263508Sdim return (next); 1042198092Srdivacky} 1043193326Sed 1044204643Srdivacky/* 1045204643Srdivacky * Returns the given page's predecessor (by pindex) within the object if it is 1046204643Srdivacky * resident; if none is found, NULL is returned. 1047204643Srdivacky * 1048204643Srdivacky * The object must be locked. 1049198092Srdivacky */ 1050198092Srdivackyvm_page_t 1051198092Srdivackyvm_page_prev(vm_page_t m) 1052198092Srdivacky{ 1053212904Sdim vm_page_t prev; 1054212904Sdim 1055198092Srdivacky VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 1056198092Srdivacky if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL && 1057198092Srdivacky prev->pindex != m->pindex - 1) 1058198092Srdivacky prev = NULL; 1059193326Sed return (prev); 1060193326Sed} 1061193326Sed 1062212904Sdim/* 1063221345Sdim * vm_page_rename: 1064221345Sdim * 1065221345Sdim * Move the given memory entry from its 1066221345Sdim * current object to the specified target object/offset. 1067221345Sdim * 1068221345Sdim * The object must be locked. 1069221345Sdim * This routine may not block. 1070221345Sdim * 1071221345Sdim * Note: swap associated with the page must be invalidated by the move. We 1072221345Sdim * have to do this for several reasons: (1) we aren't freeing the 1073221345Sdim * page, (2) we are dirtying the page, (3) the VM system is probably 1074221345Sdim * moving the page from object A to B, and will then later move 1075221345Sdim * the backing store from A to B and we can't have a conflict. 1076221345Sdim * 1077221345Sdim * Note: we *always* dirty the page. It is necessary both for the 1078221345Sdim * fact that we moved it, and because we may be invalidating 1079221345Sdim * swap. If the page is on the cache, we have to deactivate it 1080221345Sdim * or vm_page_dirty() will panic. Dirty pages are not allowed 1081193326Sed * on the cache. 1082193326Sed */ 1083234353Sdimvoid 1084234353Sdimvm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) 1085234353Sdim{ 1086234353Sdim 1087234353Sdim vm_page_remove(m); 1088263508Sdim vm_page_insert(m, new_object, new_pindex); 1089221345Sdim vm_page_dirty(m); 1090249423Sdim} 1091221345Sdim 1092198092Srdivacky/* 1093198092Srdivacky * Convert all of the given object's cached pages that have a 1094212904Sdim * pindex within the given range into free pages. If the value 1095212904Sdim * zero is given for "end", then the range's upper bound is 1096212904Sdim * infinity. If the given object is backed by a vnode and it 1097263508Sdim * transitions from having one or more cached pages to none, the 1098212904Sdim * vnode's hold count is reduced. 1099212904Sdim */ 1100212904Sdimvoid 1101212904Sdimvm_page_cache_free(vm_object_t object, vm_pindex_t start, vm_pindex_t end) 1102212904Sdim{ 1103212904Sdim vm_page_t m, m_next; 1104212904Sdim boolean_t empty; 1105199512Srdivacky 1106199512Srdivacky mtx_lock(&vm_page_queue_free_mtx); 1107199512Srdivacky if (__predict_false(object->cache == NULL)) { 1108199512Srdivacky mtx_unlock(&vm_page_queue_free_mtx); 1109199512Srdivacky return; 1110199512Srdivacky } 1111223017Sdim m = object->cache = vm_page_splay(start, object->cache); 1112223017Sdim if (m->pindex < start) { 1113223017Sdim if (m->right == NULL) 1114223017Sdim m = NULL; 1115199512Srdivacky else { 1116199512Srdivacky m_next = vm_page_splay(start, m->right); 1117199512Srdivacky m_next->left = m; 1118199512Srdivacky m->right = NULL; 1119223017Sdim m = object->cache = m_next; 1120199512Srdivacky } 1121199512Srdivacky } 1122199512Srdivacky 1123223017Sdim /* 1124199512Srdivacky * At this point, "m" is either (1) a reference to the page 1125199512Srdivacky * with the least pindex that is greater than or equal to 1126199512Srdivacky * "start" or (2) NULL. 1127199512Srdivacky */ 1128199512Srdivacky for (; m != NULL && (m->pindex < end || end == 0); m = m_next) { 1129199512Srdivacky /* 1130199512Srdivacky * Find "m"'s successor and remove "m" from the 1131199512Srdivacky * object's cache. 1132199512Srdivacky */ 1133199512Srdivacky if (m->right == NULL) { 1134199512Srdivacky object->cache = m->left; 1135199512Srdivacky m_next = NULL; 1136199512Srdivacky } else { 1137199512Srdivacky m_next = vm_page_splay(start, m->right); 1138199512Srdivacky m_next->left = m->left; 1139199512Srdivacky object->cache = m_next; 1140199512Srdivacky } 1141199512Srdivacky /* Convert "m" to a free page. */ 1142199512Srdivacky m->object = NULL; 1143199512Srdivacky m->valid = 0; 1144199512Srdivacky /* Clear PG_CACHED and set PG_FREE. */ 1145199512Srdivacky m->flags ^= PG_CACHED | PG_FREE; 1146199512Srdivacky KASSERT((m->flags & (PG_CACHED | PG_FREE)) == PG_FREE, 1147199512Srdivacky ("vm_page_cache_free: page %p has inconsistent flags", m)); 1148212904Sdim cnt.v_cache_count--; 1149212904Sdim cnt.v_free_count++; 1150212904Sdim } 1151212904Sdim empty = object->cache == NULL; 1152212904Sdim mtx_unlock(&vm_page_queue_free_mtx); 1153212904Sdim if (object->type == OBJT_VNODE && empty) 1154212904Sdim vdrop(object->handle); 1155212904Sdim} 1156212904Sdim 1157212904Sdim/* 1158223017Sdim * Returns the cached page that is associated with the given 1159212904Sdim * object and offset. If, however, none exists, returns NULL. 1160212904Sdim * 1161212904Sdim * The free page queue must be locked. 1162212904Sdim */ 1163218893Sdimstatic inline vm_page_t 1164218893Sdimvm_page_cache_lookup(vm_object_t object, vm_pindex_t pindex) 1165218893Sdim{ 1166212904Sdim vm_page_t m; 1167212904Sdim 1168249423Sdim mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1169249423Sdim if ((m = object->cache) != NULL && m->pindex != pindex) { 1170249423Sdim m = vm_page_splay(pindex, m); 1171249423Sdim if ((object->cache = m)->pindex != pindex) 1172249423Sdim m = NULL; 1173249423Sdim } 1174249423Sdim return (m); 1175249423Sdim} 1176249423Sdim 1177249423Sdim/* 1178249423Sdim * Remove the given cached page from its containing object's 1179249423Sdim * collection of cached pages. 1180249423Sdim * 1181249423Sdim * The free page queue must be locked. 1182249423Sdim */ 1183249423Sdimvoid 1184249423Sdimvm_page_cache_remove(vm_page_t m) 1185249423Sdim{ 1186249423Sdim vm_object_t object; 1187249423Sdim vm_page_t root; 1188249423Sdim 1189249423Sdim mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1190263508Sdim KASSERT((m->flags & PG_CACHED) != 0, 1191249423Sdim ("vm_page_cache_remove: page %p is not cached", m)); 1192249423Sdim object = m->object; 1193263508Sdim if (m != object->cache) { 1194263508Sdim root = vm_page_splay(m->pindex, object->cache); 1195263508Sdim KASSERT(root == m, 1196263508Sdim ("vm_page_cache_remove: page %p is not cached in object %p", 1197263508Sdim m, object)); 1198263508Sdim } 1199263508Sdim if (m->left == NULL) 1200263508Sdim root = m->right; 1201212904Sdim else if (m->right == NULL) 1202212904Sdim root = m->left; 1203212904Sdim else { 1204212904Sdim root = vm_page_splay(m->pindex, m->left); 1205212904Sdim root->right = m->right; 1206212904Sdim } 1207212904Sdim object->cache = root; 1208218893Sdim m->object = NULL; 1209218893Sdim cnt.v_cache_count--; 1210212904Sdim} 1211212904Sdim 1212212904Sdim/* 1213212904Sdim * Transfer all of the cached pages with offset greater than or 1214212904Sdim * equal to 'offidxstart' from the original object's cache to the 1215212904Sdim * new object's cache. However, any cached pages with offset 1216212904Sdim * greater than or equal to the new object's size are kept in the 1217212904Sdim * original object. Initially, the new object's cache must be 1218212904Sdim * empty. Offset 'offidxstart' in the original object must 1219212904Sdim * correspond to offset zero in the new object. 1220263508Sdim * 1221263508Sdim * The new object must be locked. 1222263508Sdim */ 1223212904Sdimvoid 1224212904Sdimvm_page_cache_transfer(vm_object_t orig_object, vm_pindex_t offidxstart, 1225212904Sdim vm_object_t new_object) 1226223017Sdim{ 1227218893Sdim vm_page_t m, m_next; 1228218893Sdim 1229218893Sdim /* 1230263508Sdim * Insertion into an object's collection of cached pages 1231263508Sdim * requires the object to be locked. In contrast, removal does 1232263508Sdim * not. 1233263508Sdim */ 1234218893Sdim VM_OBJECT_LOCK_ASSERT(new_object, MA_OWNED); 1235218893Sdim KASSERT(new_object->cache == NULL, 1236263508Sdim ("vm_page_cache_transfer: object %p has cached pages", 1237263508Sdim new_object)); 1238263508Sdim mtx_lock(&vm_page_queue_free_mtx); 1239212904Sdim if ((m = orig_object->cache) != NULL) { 1240212904Sdim /* 1241212904Sdim * Transfer all of the pages with offset greater than or 1242218893Sdim * equal to 'offidxstart' from the original object's 1243218893Sdim * cache to the new object's cache. 1244212904Sdim */ 1245212904Sdim m = vm_page_splay(offidxstart, m); 1246218893Sdim if (m->pindex < offidxstart) { 1247249423Sdim orig_object->cache = m; 1248218893Sdim new_object->cache = m->right; 1249218893Sdim m->right = NULL; 1250218893Sdim } else { 1251212904Sdim orig_object->cache = m->left; 1252212904Sdim new_object->cache = m; 1253212904Sdim m->left = NULL; 1254212904Sdim } 1255263508Sdim while ((m = new_object->cache) != NULL) { 1256212904Sdim if ((m->pindex - offidxstart) >= new_object->size) { 1257212904Sdim /* 1258212904Sdim * Return all of the cached pages with 1259212904Sdim * offset greater than or equal to the 1260212904Sdim * new object's size to the original 1261263508Sdim * object's cache. 1262212904Sdim */ 1263212904Sdim new_object->cache = m->left; 1264212904Sdim m->left = orig_object->cache; 1265212904Sdim orig_object->cache = m; 1266239462Sdim break; 1267239462Sdim } 1268239462Sdim m_next = vm_page_splay(m->pindex, m->right); 1269212904Sdim /* Update the page's object and offset. */ 1270199482Srdivacky m->object = new_object; 1271203955Srdivacky m->pindex -= offidxstart; 1272203955Srdivacky if (m_next == NULL) 1273203955Srdivacky break; 1274234353Sdim m->right = NULL; 1275199482Srdivacky m_next->left = m; 1276203955Srdivacky new_object->cache = m_next; 1277218893Sdim } 1278218893Sdim KASSERT(new_object->cache == NULL || 1279218893Sdim new_object->type == OBJT_SWAP, 1280203955Srdivacky ("vm_page_cache_transfer: object %p's type is incompatible" 1281203955Srdivacky " with cached pages", new_object)); 1282203955Srdivacky } 1283203955Srdivacky mtx_unlock(&vm_page_queue_free_mtx); 1284203955Srdivacky} 1285203955Srdivacky 1286234353Sdim/* 1287206084Srdivacky * vm_page_alloc: 1288206084Srdivacky * 1289206084Srdivacky * Allocate and return a memory cell associated 1290206084Srdivacky * with this VM object/offset pair. 1291206084Srdivacky * 1292243830Sdim * The caller must always specify an allocation class. 1293206084Srdivacky * 1294206084Srdivacky * allocation classes: 1295206084Srdivacky * VM_ALLOC_NORMAL normal process request 1296206084Srdivacky * VM_ALLOC_SYSTEM system *really* needs a page 1297208600Srdivacky * VM_ALLOC_INTERRUPT interrupt time request 1298208600Srdivacky * 1299208600Srdivacky * optional allocation flags: 1300207619Srdivacky * VM_ALLOC_ZERO prefer a zeroed page 1301207619Srdivacky * VM_ALLOC_WIRED wire the allocated page 1302206084Srdivacky * VM_ALLOC_NOOBJ page is not associated with a vm object 1303206084Srdivacky * VM_ALLOC_NOBUSY do not set the page busy 1304206084Srdivacky * VM_ALLOC_IFCACHED return page only if it is cached 1305206084Srdivacky * VM_ALLOC_IFNOTCACHED return NULL, do not reactivate if the page 1306206084Srdivacky * is cached 1307206084Srdivacky * 1308263508Sdim * This routine may not sleep. 1309199482Srdivacky */ 1310234353Sdimvm_page_t 1311234353Sdimvm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) 1312243830Sdim{ 1313243830Sdim struct vnode *vp = NULL; 1314234353Sdim vm_object_t m_object; 1315234353Sdim vm_page_t m; 1316234353Sdim int flags, page_req; 1317234353Sdim 1318263508Sdim if ((req & VM_ALLOC_NOOBJ) == 0) { 1319234353Sdim KASSERT(object != NULL, 1320234353Sdim ("vm_page_alloc: NULL object.")); 1321234353Sdim VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 1322199482Srdivacky } 1323199482Srdivacky 1324206084Srdivacky page_req = req & VM_ALLOC_CLASS_MASK; 1325206084Srdivacky 1326199482Srdivacky /* 1327199482Srdivacky * The pager is allowed to eat deeper into the free page list. 1328203955Srdivacky */ 1329199482Srdivacky if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) 1330199482Srdivacky page_req = VM_ALLOC_SYSTEM; 1331226633Sdim 1332226633Sdim mtx_lock(&vm_page_queue_free_mtx); 1333226633Sdim if (cnt.v_free_count + cnt.v_cache_count > cnt.v_free_reserved || 1334226633Sdim (page_req == VM_ALLOC_SYSTEM && 1335234353Sdim cnt.v_free_count + cnt.v_cache_count > cnt.v_interrupt_free_min) || 1336226633Sdim (page_req == VM_ALLOC_INTERRUPT && 1337226633Sdim cnt.v_free_count + cnt.v_cache_count > 0)) { 1338226633Sdim /* 1339226633Sdim * Allocate from the free queue if the number of free pages 1340226633Sdim * exceeds the minimum for the request class. 1341226633Sdim */ 1342226633Sdim if (object != NULL && 1343226633Sdim (m = vm_page_cache_lookup(object, pindex)) != NULL) { 1344218893Sdim if ((req & VM_ALLOC_IFNOTCACHED) != 0) { 1345218893Sdim mtx_unlock(&vm_page_queue_free_mtx); 1346208600Srdivacky return (NULL); 1347226633Sdim } 1348208600Srdivacky if (vm_phys_unfree_page(m)) 1349208600Srdivacky vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, 0); 1350208600Srdivacky#if VM_NRESERVLEVEL > 0 1351226633Sdim else if (!vm_reserv_reactivate_page(m)) 1352226633Sdim#else 1353218893Sdim else 1354208600Srdivacky#endif 1355218893Sdim panic("vm_page_alloc: cache page %p is missing" 1356218893Sdim " from the free queue", m); 1357218893Sdim } else if ((req & VM_ALLOC_IFCACHED) != 0) { 1358208600Srdivacky mtx_unlock(&vm_page_queue_free_mtx); 1359218893Sdim return (NULL); 1360218893Sdim#if VM_NRESERVLEVEL > 0 1361226633Sdim } else if (object == NULL || object->type == OBJT_DEVICE || 1362208600Srdivacky object->type == OBJT_SG || 1363208600Srdivacky (object->flags & OBJ_COLORED) == 0 || 1364218893Sdim (m = vm_reserv_alloc_page(object, pindex)) == NULL) { 1365218893Sdim#else 1366218893Sdim } else { 1367218893Sdim#endif 1368218893Sdim m = vm_phys_alloc_pages(object != NULL ? 1369218893Sdim VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0); 1370218893Sdim#if VM_NRESERVLEVEL > 0 1371218893Sdim if (m == NULL && vm_reserv_reclaim_inactive()) { 1372193326Sed m = vm_phys_alloc_pages(object != NULL ? 1373193326Sed VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 1374193326Sed 0); 1375198092Srdivacky } 1376193326Sed#endif 1377193326Sed } 1378193326Sed } else { 1379212904Sdim /* 1380193326Sed * Not allocatable, give up. 1381193326Sed */ 1382193326Sed mtx_unlock(&vm_page_queue_free_mtx); 1383193326Sed atomic_add_int(&vm_pageout_deficit, 1384193326Sed MAX((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); 1385193326Sed pagedaemon_wakeup(); 1386193326Sed return (NULL); 1387198092Srdivacky } 1388249423Sdim 1389208600Srdivacky /* 1390208600Srdivacky * At this point we had better have found a good page. 1391218893Sdim */ 1392218893Sdim 1393218893Sdim KASSERT(m != NULL, ("vm_page_alloc: missing page")); 1394218893Sdim KASSERT(m->queue == PQ_NONE, 1395193326Sed ("vm_page_alloc: page %p has unexpected queue %d", m, m->queue)); 1396193326Sed KASSERT(m->wire_count == 0, ("vm_page_alloc: page %p is wired", m)); 1397193326Sed KASSERT(m->hold_count == 0, ("vm_page_alloc: page %p is held", m)); 1398263508Sdim KASSERT(m->busy == 0, ("vm_page_alloc: page %p is busy", m)); 1399193326Sed KASSERT(m->dirty == 0, ("vm_page_alloc: page %p is dirty", m)); 1400193326Sed KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, 1401234353Sdim ("vm_page_alloc: page %p has unexpected memattr %d", m, 1402234353Sdim pmap_page_get_memattr(m))); 1403234353Sdim if ((m->flags & PG_CACHED) != 0) { 1404234353Sdim KASSERT(m->valid != 0, 1405234353Sdim ("vm_page_alloc: cached page %p is invalid", m)); 1406234353Sdim if (m->object == object && m->pindex == pindex) 1407234353Sdim cnt.v_reactivated++; 1408234353Sdim else 1409234353Sdim m->valid = 0; 1410207619Srdivacky m_object = m->object; 1411202379Srdivacky vm_page_cache_remove(m); 1412207619Srdivacky if (m_object->type == OBJT_VNODE && m_object->cache == NULL) 1413202379Srdivacky vp = m_object->handle; 1414202379Srdivacky } else { 1415202379Srdivacky KASSERT(VM_PAGE_IS_FREE(m), 1416207619Srdivacky ("vm_page_alloc: page %p is not free", m)); 1417207619Srdivacky KASSERT(m->valid == 0, 1418239462Sdim ("vm_page_alloc: free page %p is valid", m)); 1419207619Srdivacky cnt.v_free_count--; 1420207619Srdivacky } 1421207619Srdivacky 1422207619Srdivacky /* 1423202379Srdivacky * Only the PG_ZERO flag is inherited. The PG_CACHED or PG_FREE flag 1424207619Srdivacky * must be cleared before the free page queues lock is released. 1425224145Sdim */ 1426193326Sed flags = 0; 1427193326Sed if (req & VM_ALLOC_NODUMP) 1428207619Srdivacky flags |= PG_NODUMP; 1429198092Srdivacky if (m->flags & PG_ZERO) { 1430224145Sdim vm_page_zero_count--; 1431202379Srdivacky if (req & VM_ALLOC_ZERO) 1432202379Srdivacky flags = PG_ZERO; 1433234353Sdim } 1434234353Sdim m->flags = flags; 1435234353Sdim mtx_unlock(&vm_page_queue_free_mtx); 1436234353Sdim m->aflags = 0; 1437263508Sdim if (object == NULL || object->type == OBJT_PHYS) 1438234353Sdim m->oflags = VPO_UNMANAGED; 1439202379Srdivacky else 1440202379Srdivacky m->oflags = 0; 1441202379Srdivacky if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ)) == 0) 1442234353Sdim m->oflags |= VPO_BUSY; 1443234353Sdim if (req & VM_ALLOC_WIRED) { 1444234353Sdim /* 1445234353Sdim * The page lock is not required for wiring a page until that 1446234353Sdim * page is inserted into the object. 1447193326Sed */ 1448193326Sed atomic_add_int(&cnt.v_wire_count, 1); 1449193326Sed m->wire_count = 1; 1450193326Sed } 1451193326Sed m->act_count = 0; 1452193326Sed 1453193326Sed if (object != NULL) { 1454193326Sed /* Ignore device objects; the pager sets "memattr" for them. */ 1455193326Sed if (object->memattr != VM_MEMATTR_DEFAULT && 1456193326Sed object->type != OBJT_DEVICE && object->type != OBJT_SG) 1457193326Sed pmap_page_set_memattr(m, object->memattr); 1458198092Srdivacky vm_page_insert(m, object, pindex); 1459193326Sed } else 1460193326Sed m->pindex = pindex; 1461193326Sed 1462193326Sed /* 1463193326Sed * The following call to vdrop() must come after the above call 1464193326Sed * to vm_page_insert() in case both affect the same object and 1465193326Sed * vnode. Otherwise, the affected vnode's hold count could 1466193326Sed * temporarily become zero. 1467193326Sed */ 1468193326Sed if (vp != NULL) 1469193326Sed vdrop(vp); 1470193326Sed 1471193326Sed /* 1472193326Sed * Don't wakeup too often - wakeup the pageout daemon when 1473193326Sed * we would be nearly out of memory. 1474263508Sdim */ 1475234353Sdim if (vm_paging_needed()) 1476193326Sed pagedaemon_wakeup(); 1477193326Sed 1478193326Sed return (m); 1479193326Sed} 1480249423Sdim 1481249423Sdim/* 1482249423Sdim * Initialize a page that has been freshly dequeued from a freelist. 1483249423Sdim * The caller has to drop the vnode returned, if it is not NULL. 1484249423Sdim * 1485249423Sdim * To be called with vm_page_queue_free_mtx held. 1486249423Sdim */ 1487249423Sdimstruct vnode * 1488249423Sdimvm_page_alloc_init(vm_page_t m) 1489249423Sdim{ 1490249423Sdim struct vnode *drop; 1491249423Sdim vm_object_t m_object; 1492249423Sdim 1493249423Sdim KASSERT(m->queue == PQ_NONE, 1494249423Sdim ("vm_page_alloc_init: page %p has unexpected queue %d", 1495249423Sdim m, m->queue)); 1496249423Sdim KASSERT(m->wire_count == 0, 1497249423Sdim ("vm_page_alloc_init: page %p is wired", m)); 1498193326Sed KASSERT(m->hold_count == 0, 1499193326Sed ("vm_page_alloc_init: page %p is held", m)); 1500193326Sed KASSERT(m->busy == 0, 1501193326Sed ("vm_page_alloc_init: page %p is busy", m)); 1502193326Sed KASSERT(m->dirty == 0, 1503193326Sed ("vm_page_alloc_init: page %p is dirty", m)); 1504193326Sed KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, 1505249423Sdim ("vm_page_alloc_init: page %p has unexpected memattr %d", 1506249423Sdim m, pmap_page_get_memattr(m))); 1507193326Sed mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1508193326Sed drop = NULL; 1509194179Sed if ((m->flags & PG_CACHED) != 0) { 1510198092Srdivacky m->valid = 0; 1511193326Sed m_object = m->object; 1512194179Sed vm_page_cache_remove(m); 1513193326Sed if (m_object->type == OBJT_VNODE && 1514193326Sed m_object->cache == NULL) 1515193326Sed drop = m_object->handle; 1516198092Srdivacky } else { 1517193326Sed KASSERT(VM_PAGE_IS_FREE(m), 1518218893Sdim ("vm_page_alloc_init: page %p is not free", m)); 1519193326Sed KASSERT(m->valid == 0, 1520193326Sed ("vm_page_alloc_init: free page %p is valid", m)); 1521198092Srdivacky cnt.v_free_count--; 1522198092Srdivacky } 1523198092Srdivacky if (m->flags & PG_ZERO) 1524218893Sdim vm_page_zero_count--; 1525198092Srdivacky /* Don't clear the PG_ZERO flag; we'll need it later. */ 1526198092Srdivacky m->flags &= PG_ZERO; 1527227737Sdim m->aflags = 0; 1528227737Sdim m->oflags = VPO_UNMANAGED; 1529227737Sdim /* Unmanaged pages don't use "act_count". */ 1530227737Sdim return (drop); 1531227737Sdim} 1532227737Sdim 1533193326Sed/* 1534193326Sed * vm_page_alloc_freelist: 1535193326Sed * 1536193326Sed * Allocate a page from the specified freelist. 1537193326Sed * Only the ALLOC_CLASS values in req are honored, other request flags 1538193326Sed * are ignored. 1539193326Sed */ 1540218893Sdimvm_page_t 1541226633Sdimvm_page_alloc_freelist(int flind, int req) 1542193326Sed{ 1543193326Sed struct vnode *drop; 1544193326Sed vm_page_t m; 1545193326Sed int page_req; 1546193326Sed 1547263508Sdim m = NULL; 1548263508Sdim page_req = req & VM_ALLOC_CLASS_MASK; 1549263508Sdim mtx_lock(&vm_page_queue_free_mtx); 1550263508Sdim /* 1551263508Sdim * Do not allocate reserved pages unless the req has asked for it. 1552263508Sdim */ 1553263508Sdim if (cnt.v_free_count + cnt.v_cache_count > cnt.v_free_reserved || 1554263508Sdim (page_req == VM_ALLOC_SYSTEM && 1555263508Sdim cnt.v_free_count + cnt.v_cache_count > cnt.v_interrupt_free_min) || 1556193326Sed (page_req == VM_ALLOC_INTERRUPT && 1557263508Sdim cnt.v_free_count + cnt.v_cache_count > 0)) { 1558221345Sdim m = vm_phys_alloc_freelist_pages(flind, VM_FREEPOOL_DIRECT, 0); 1559212904Sdim } 1560249423Sdim if (m == NULL) { 1561193326Sed mtx_unlock(&vm_page_queue_free_mtx); 1562193326Sed return (NULL); 1563193326Sed } 1564193326Sed drop = vm_page_alloc_init(m); 1565193326Sed mtx_unlock(&vm_page_queue_free_mtx); 1566218893Sdim if (drop) 1567226633Sdim vdrop(drop); 1568221345Sdim return (m); 1569221345Sdim} 1570221345Sdim 1571221345Sdim/* 1572221345Sdim * vm_wait: (also see VM_WAIT macro) 1573249423Sdim * 1574221345Sdim * Block until free pages are available for allocation 1575221345Sdim * - Called in various places before memory allocations. 1576221345Sdim */ 1577226633Sdimvoid 1578193326Sedvm_wait(void) 1579198092Srdivacky{ 1580198092Srdivacky 1581263508Sdim mtx_lock(&vm_page_queue_free_mtx); 1582198092Srdivacky if (curproc == pageproc) { 1583193326Sed vm_pageout_pages_needed = 1; 1584193326Sed msleep(&vm_pageout_pages_needed, &vm_page_queue_free_mtx, 1585193326Sed PDROP | PSWP, "VMWait", 0); 1586193326Sed } else { 1587193326Sed if (!vm_pages_needed) { 1588263508Sdim vm_pages_needed = 1; 1589193326Sed wakeup(&vm_pages_needed); 1590193326Sed } 1591193326Sed msleep(&cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PVM, 1592193326Sed "vmwait", 0); 1593193326Sed } 1594249423Sdim} 1595249423Sdim 1596249423Sdim/* 1597249423Sdim * vm_waitpfault: (also see VM_WAITPFAULT macro) 1598249423Sdim * 1599249423Sdim * Block until free pages are available for allocation 1600249423Sdim * - Called only in vm_fault so that processes page faulting 1601249423Sdim * can be easily tracked. 1602249423Sdim * - Sleeps at a lower priority than vm_wait() so that vm_wait()ing 1603249423Sdim * processes will be able to grab memory first. Do not change 1604249423Sdim * this balance without careful testing first. 1605249423Sdim */ 1606249423Sdimvoid 1607249423Sdimvm_waitpfault(void) 1608249423Sdim{ 1609249423Sdim 1610249423Sdim mtx_lock(&vm_page_queue_free_mtx); 1611249423Sdim if (!vm_pages_needed) { 1612249423Sdim vm_pages_needed = 1; 1613249423Sdim wakeup(&vm_pages_needed); 1614249423Sdim } 1615249423Sdim msleep(&cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PUSER, 1616249423Sdim "pfault", 0); 1617263508Sdim} 1618249423Sdim 1619249423Sdim/* 1620249423Sdim * vm_page_requeue: 1621249423Sdim * 1622249423Sdim * Move the given page to the tail of its present page queue. 1623249423Sdim * 1624234353Sdim * The page queues must be locked. 1625234353Sdim */ 1626234353Sdimvoid 1627234353Sdimvm_page_requeue(vm_page_t m) 1628234353Sdim{ 1629234353Sdim struct vpgqueues *vpq; 1630234353Sdim int queue; 1631234353Sdim 1632234353Sdim mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1633234353Sdim queue = m->queue; 1634234353Sdim KASSERT(queue != PQ_NONE, 1635234353Sdim ("vm_page_requeue: page %p is not queued", m)); 1636234353Sdim vpq = &vm_page_queues[queue]; 1637234353Sdim TAILQ_REMOVE(&vpq->pl, m, pageq); 1638234353Sdim TAILQ_INSERT_TAIL(&vpq->pl, m, pageq); 1639234353Sdim} 1640234353Sdim 1641234353Sdim/* 1642234353Sdim * vm_page_queue_remove: 1643234353Sdim * 1644234353Sdim * Remove the given page from the specified queue. 1645234353Sdim * 1646234353Sdim * The page and page queues must be locked. 1647234353Sdim */ 1648234353Sdimstatic __inline void 1649234353Sdimvm_page_queue_remove(int queue, vm_page_t m) 1650234353Sdim{ 1651234353Sdim struct vpgqueues *pq; 1652234353Sdim 1653234353Sdim mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1654234353Sdim vm_page_lock_assert(m, MA_OWNED); 1655234353Sdim pq = &vm_page_queues[queue]; 1656234353Sdim TAILQ_REMOVE(&pq->pl, m, pageq); 1657234353Sdim (*pq->cnt)--; 1658221345Sdim} 1659193326Sed 1660193326Sed/* 1661193326Sed * vm_pageq_remove: 1662193326Sed * 1663221345Sdim * Remove a page from its queue. 1664199512Srdivacky * 1665199512Srdivacky * The given page must be locked. 1666199512Srdivacky * This routine may not block. 1667198092Srdivacky */ 1668193326Sedvoid 1669193326Sedvm_pageq_remove(vm_page_t m) 1670234353Sdim{ 1671193326Sed int queue; 1672193326Sed 1673193326Sed vm_page_lock_assert(m, MA_OWNED); 1674198092Srdivacky if ((queue = m->queue) != PQ_NONE) { 1675239462Sdim vm_page_lock_queues(); 1676239462Sdim m->queue = PQ_NONE; 1677239462Sdim vm_page_queue_remove(queue, m); 1678239462Sdim vm_page_unlock_queues(); 1679239462Sdim } 1680239462Sdim} 1681239462Sdim 1682239462Sdim/* 1683239462Sdim * vm_page_enqueue: 1684239462Sdim * 1685239462Sdim * Add the given page to the specified queue. 1686239462Sdim * 1687239462Sdim * The page queues must be locked. 1688239462Sdim */ 1689239462Sdimstatic void 1690239462Sdimvm_page_enqueue(int queue, vm_page_t m) 1691193326Sed{ 1692193326Sed struct vpgqueues *vpq; 1693193326Sed 1694226633Sdim vpq = &vm_page_queues[queue]; 1695198092Srdivacky m->queue = queue; 1696198092Srdivacky TAILQ_INSERT_TAIL(&vpq->pl, m, pageq); 1697193326Sed ++*vpq->cnt; 1698193326Sed} 1699193326Sed 1700193326Sed/* 1701226633Sdim * vm_page_activate: 1702199990Srdivacky * 1703199990Srdivacky * Put the specified page on the active list (if appropriate). 1704193326Sed * Ensure that act_count is at least ACT_INIT but do not otherwise 1705193326Sed * mess with it. 1706193326Sed * 1707193326Sed * The page must be locked. 1708199512Srdivacky * This routine may not block. 1709193326Sed */ 1710201361Srdivackyvoid 1711201361Srdivackyvm_page_activate(vm_page_t m) 1712198092Srdivacky{ 1713193326Sed int queue; 1714199512Srdivacky 1715199512Srdivacky vm_page_lock_assert(m, MA_OWNED); 1716193326Sed VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 1717193326Sed if ((queue = m->queue) != PQ_ACTIVE) { 1718199512Srdivacky if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { 1719193326Sed if (m->act_count < ACT_INIT) 1720193326Sed m->act_count = ACT_INIT; 1721193326Sed vm_page_lock_queues(); 1722199512Srdivacky if (queue != PQ_NONE) 1723199512Srdivacky vm_page_queue_remove(queue, m); 1724199512Srdivacky vm_page_enqueue(PQ_ACTIVE, m); 1725199512Srdivacky vm_page_unlock_queues(); 1726193326Sed } else 1727193326Sed KASSERT(queue == PQ_NONE, 1728234353Sdim ("vm_page_activate: wired page %p is queued", m)); 1729234353Sdim } else { 1730193326Sed if (m->act_count < ACT_INIT) 1731263508Sdim m->act_count = ACT_INIT; 1732263508Sdim } 1733263508Sdim} 1734263508Sdim 1735263508Sdim/* 1736263508Sdim * vm_page_free_wakeup: 1737201361Srdivacky * 1738234353Sdim * Helper routine for vm_page_free_toq() and vm_page_cache(). This 1739193326Sed * routine is called when a page has been added to the cache or free 1740193326Sed * queues. 1741234353Sdim * 1742202379Srdivacky * The page queues must be locked. 1743202379Srdivacky * This routine may not block. 1744202379Srdivacky */ 1745202379Srdivackystatic inline void 1746193326Sedvm_page_free_wakeup(void) 1747193326Sed{ 1748202379Srdivacky 1749202379Srdivacky mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1750202379Srdivacky /* 1751202379Srdivacky * if pageout daemon needs pages, then tell it that there are 1752202379Srdivacky * some free. 1753202379Srdivacky */ 1754202379Srdivacky if (vm_pageout_pages_needed && 1755202379Srdivacky cnt.v_cache_count + cnt.v_free_count >= cnt.v_pageout_free_min) { 1756202379Srdivacky wakeup(&vm_pageout_pages_needed); 1757202379Srdivacky vm_pageout_pages_needed = 0; 1758202379Srdivacky } 1759202379Srdivacky /* 1760202379Srdivacky * wakeup processes that are waiting on memory if we hit a 1761202379Srdivacky * high water mark. And wakeup scheduler process if we have 1762202379Srdivacky * lots of memory. this process will swapin processes. 1763202379Srdivacky */ 1764202379Srdivacky if (vm_pages_needed && !vm_page_count_min()) { 1765202379Srdivacky vm_pages_needed = 0; 1766202379Srdivacky wakeup(&cnt.v_free_count); 1767202379Srdivacky } 1768202379Srdivacky} 1769202379Srdivacky 1770221345Sdim/* 1771202379Srdivacky * vm_page_free_toq: 1772202379Srdivacky * 1773193326Sed * Returns the given page to the free list, 1774193326Sed * disassociating it with any VM object. 1775193326Sed * 1776193326Sed * Object and page must be locked prior to entry. 1777193326Sed * This routine may not block. 1778193326Sed */ 1779234353Sdim 1780234353Sdimvoid 1781234353Sdimvm_page_free_toq(vm_page_t m) 1782234353Sdim{ 1783193326Sed 1784193326Sed if ((m->oflags & VPO_UNMANAGED) == 0) { 1785193576Sed vm_page_lock_assert(m, MA_OWNED); 1786193576Sed KASSERT(!pmap_page_is_mapped(m), 1787204643Srdivacky ("vm_page_free_toq: freeing mapped page %p", m)); 1788193576Sed } 1789193576Sed PCPU_INC(cnt.v_tfree); 1790193326Sed 1791198092Srdivacky if (VM_PAGE_IS_FREE(m)) 1792193326Sed panic("vm_page_free: freeing free page %p", m); 1793193326Sed else if (m->busy != 0) 1794193326Sed panic("vm_page_free: freeing busy page %p", m); 1795193326Sed 1796193326Sed /* 1797193326Sed * unqueue, then remove page. Note that we cannot destroy 1798193326Sed * the page here because we do not want to call the pager's 1799193326Sed * callback routine until after we've put the page on the 1800198092Srdivacky * appropriate free queue. 1801212904Sdim */ 1802239462Sdim if ((m->oflags & VPO_UNMANAGED) == 0) 1803239462Sdim vm_pageq_remove(m); 1804239462Sdim vm_page_remove(m); 1805239462Sdim 1806239462Sdim /* 1807239462Sdim * If fictitious remove object association and 1808239462Sdim * return, otherwise delay object association removal. 1809243830Sdim */ 1810243830Sdim if ((m->flags & PG_FICTITIOUS) != 0) { 1811243830Sdim return; 1812243830Sdim } 1813243830Sdim 1814243830Sdim m->valid = 0; 1815243830Sdim vm_page_undirty(m); 1816243830Sdim 1817243830Sdim if (m->wire_count != 0) 1818243830Sdim panic("vm_page_free: freeing wired page %p", m); 1819243830Sdim if (m->hold_count != 0) { 1820243830Sdim m->flags &= ~PG_ZERO; 1821243830Sdim vm_page_lock_queues(); 1822243830Sdim vm_page_enqueue(PQ_HOLD, m); 1823243830Sdim vm_page_unlock_queues(); 1824243830Sdim } else { 1825243830Sdim /* 1826243830Sdim * Restore the default memory attribute to the page. 1827243830Sdim */ 1828212904Sdim if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT) 1829226633Sdim pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT); 1830212904Sdim 1831212904Sdim /* 1832226633Sdim * Insert the page into the physical memory allocator's 1833226633Sdim * cache/free page queues. 1834226633Sdim */ 1835226633Sdim mtx_lock(&vm_page_queue_free_mtx); 1836226633Sdim m->flags |= PG_FREE; 1837212904Sdim cnt.v_free_count++; 1838212904Sdim#if VM_NRESERVLEVEL > 0 1839212904Sdim if (!vm_reserv_free_page(m)) 1840193326Sed#else 1841212904Sdim if (TRUE) 1842193326Sed#endif 1843193326Sed vm_phys_free_pages(m, 0); 1844193326Sed if ((m->flags & PG_ZERO) != 0) 1845193326Sed ++vm_page_zero_count; 1846249423Sdim else 1847249423Sdim vm_page_zero_idle_wakeup(); 1848249423Sdim vm_page_free_wakeup(); 1849249423Sdim mtx_unlock(&vm_page_queue_free_mtx); 1850249423Sdim } 1851249423Sdim} 1852249423Sdim 1853249423Sdim/* 1854249423Sdim * vm_page_wire: 1855249423Sdim * 1856249423Sdim * Mark this page as wired down by yet 1857249423Sdim * another map, removing it from paging queues 1858249423Sdim * as necessary. 1859249423Sdim * 1860249423Sdim * If the page is fictitious, then its wire count must remain one. 1861249423Sdim * 1862249423Sdim * The page must be locked. 1863249423Sdim * This routine may not block. 1864249423Sdim */ 1865249423Sdimvoid 1866249423Sdimvm_page_wire(vm_page_t m) 1867249423Sdim{ 1868249423Sdim 1869249423Sdim /* 1870249423Sdim * Only bump the wire statistics if the page is not already wired, 1871249423Sdim * and only unqueue the page if it is on some queue (if it is unmanaged 1872249423Sdim * it is already off the queues). 1873249423Sdim */ 1874249423Sdim vm_page_lock_assert(m, MA_OWNED); 1875249423Sdim if ((m->flags & PG_FICTITIOUS) != 0) { 1876249423Sdim KASSERT(m->wire_count == 1, 1877249423Sdim ("vm_page_wire: fictitious page %p's wire count isn't one", 1878249423Sdim m)); 1879249423Sdim return; 1880249423Sdim } 1881249423Sdim if (m->wire_count == 0) { 1882249423Sdim if ((m->oflags & VPO_UNMANAGED) == 0) 1883249423Sdim vm_pageq_remove(m); 1884249423Sdim atomic_add_int(&cnt.v_wire_count, 1); 1885249423Sdim } 1886249423Sdim m->wire_count++; 1887249423Sdim KASSERT(m->wire_count != 0, ("vm_page_wire: wire_count overflow m=%p", m)); 1888249423Sdim} 1889249423Sdim 1890249423Sdim/* 1891249423Sdim * vm_page_unwire: 1892249423Sdim * 1893249423Sdim * Release one wiring of the specified page, potentially enabling it to be 1894249423Sdim * paged again. If paging is enabled, then the value of the parameter 1895249423Sdim * "activate" determines to which queue the page is added. If "activate" is 1896249423Sdim * non-zero, then the page is added to the active queue. Otherwise, it is 1897249423Sdim * added to the inactive queue. 1898249423Sdim * 1899249423Sdim * However, unless the page belongs to an object, it is not enqueued because 1900249423Sdim * it cannot be paged out. 1901249423Sdim * 1902249423Sdim * If a page is fictitious, then its wire count must alway be one. 1903249423Sdim * 1904249423Sdim * A managed page must be locked. 1905249423Sdim */ 1906249423Sdimvoid 1907249423Sdimvm_page_unwire(vm_page_t m, int activate) 1908249423Sdim{ 1909249423Sdim 1910249423Sdim if ((m->oflags & VPO_UNMANAGED) == 0) 1911249423Sdim vm_page_lock_assert(m, MA_OWNED); 1912249423Sdim if ((m->flags & PG_FICTITIOUS) != 0) { 1913249423Sdim KASSERT(m->wire_count == 1, 1914249423Sdim ("vm_page_unwire: fictitious page %p's wire count isn't one", m)); 1915249423Sdim return; 1916249423Sdim } 1917249423Sdim if (m->wire_count > 0) { 1918249423Sdim m->wire_count--; 1919249423Sdim if (m->wire_count == 0) { 1920249423Sdim atomic_subtract_int(&cnt.v_wire_count, 1); 1921249423Sdim if ((m->oflags & VPO_UNMANAGED) != 0 || 1922249423Sdim m->object == NULL) 1923249423Sdim return; 1924249423Sdim vm_page_lock_queues(); 1925249423Sdim if (activate) 1926249423Sdim vm_page_enqueue(PQ_ACTIVE, m); 1927249423Sdim else { 1928249423Sdim m->flags &= ~PG_WINATCFLS; 1929249423Sdim vm_page_enqueue(PQ_INACTIVE, m); 1930249423Sdim } 1931249423Sdim vm_page_unlock_queues(); 1932249423Sdim } 1933249423Sdim } else 1934249423Sdim panic("vm_page_unwire: page %p's wire count is zero", m); 1935249423Sdim} 1936249423Sdim 1937249423Sdim/* 1938249423Sdim * Move the specified page to the inactive queue. 1939249423Sdim * 1940249423Sdim * Many pages placed on the inactive queue should actually go 1941249423Sdim * into the cache, but it is difficult to figure out which. What 1942249423Sdim * we do instead, if the inactive target is well met, is to put 1943249423Sdim * clean pages at the head of the inactive queue instead of the tail. 1944249423Sdim * This will cause them to be moved to the cache more quickly and 1945249423Sdim * if not actively re-referenced, reclaimed more quickly. If we just 1946249423Sdim * stick these pages at the end of the inactive queue, heavy filesystem 1947249423Sdim * meta-data accesses can cause an unnecessary paging load on memory bound 1948249423Sdim * processes. This optimization causes one-time-use metadata to be 1949249423Sdim * reused more quickly. 1950249423Sdim * 1951249423Sdim * Normally athead is 0 resulting in LRU operation. athead is set 1952249423Sdim * to 1 if we want this page to be 'as if it were placed in the cache', 1953249423Sdim * except without unmapping it from the process address space. 1954249423Sdim * 1955249423Sdim * This routine may not block. 1956249423Sdim */ 1957249423Sdimstatic inline void 1958249423Sdim_vm_page_deactivate(vm_page_t m, int athead) 1959249423Sdim{ 1960249423Sdim int queue; 1961249423Sdim 1962249423Sdim vm_page_lock_assert(m, MA_OWNED); 1963249423Sdim 1964249423Sdim /* 1965249423Sdim * Ignore if already inactive. 1966249423Sdim */ 1967249423Sdim if ((queue = m->queue) == PQ_INACTIVE) 1968249423Sdim return; 1969249423Sdim if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { 1970249423Sdim vm_page_lock_queues(); 1971239462Sdim m->flags &= ~PG_WINATCFLS; 1972249423Sdim if (queue != PQ_NONE) 1973239462Sdim vm_page_queue_remove(queue, m); 1974249423Sdim if (athead) 1975249423Sdim TAILQ_INSERT_HEAD(&vm_page_queues[PQ_INACTIVE].pl, m, 1976249423Sdim pageq); 1977249423Sdim else 1978249423Sdim TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, 1979239462Sdim pageq); 1980249423Sdim m->queue = PQ_INACTIVE; 1981249423Sdim cnt.v_inactive_count++; 1982249423Sdim vm_page_unlock_queues(); 1983249423Sdim } 1984249423Sdim} 1985239462Sdim 1986249423Sdim/* 1987249423Sdim * Move the specified page to the inactive queue. 1988239462Sdim * 1989249423Sdim * The page must be locked. 1990249423Sdim */ 1991239462Sdimvoid 1992249423Sdimvm_page_deactivate(vm_page_t m) 1993249423Sdim{ 1994249423Sdim 1995239462Sdim _vm_page_deactivate(m, 0); 1996249423Sdim} 1997249423Sdim 1998249423Sdim/* 1999249423Sdim * vm_page_try_to_cache: 2000249423Sdim * 2001249423Sdim * Returns 0 on failure, 1 on success 2002239462Sdim */ 2003249423Sdimint 2004239462Sdimvm_page_try_to_cache(vm_page_t m) 2005239462Sdim{ 2006239462Sdim 2007239462Sdim vm_page_lock_assert(m, MA_OWNED); 2008239462Sdim VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2009239462Sdim if (m->dirty || m->hold_count || m->busy || m->wire_count || 2010239462Sdim (m->oflags & (VPO_BUSY | VPO_UNMANAGED)) != 0) 2011239462Sdim return (0); 2012239462Sdim pmap_remove_all(m); 2013239462Sdim if (m->dirty) 2014239462Sdim return (0); 2015239462Sdim vm_page_cache(m); 2016239462Sdim return (1); 2017263508Sdim} 2018263508Sdim 2019263508Sdim/* 2020263508Sdim * vm_page_try_to_free() 2021263508Sdim * 2022263508Sdim * Attempt to free the page. If we cannot free it, we do nothing. 2023239462Sdim * 1 is returned on success, 0 on failure. 2024239462Sdim */ 2025263508Sdimint 2026239462Sdimvm_page_try_to_free(vm_page_t m) 2027239462Sdim{ 2028239462Sdim 2029239462Sdim vm_page_lock_assert(m, MA_OWNED); 2030239462Sdim if (m->object != NULL) 2031239462Sdim VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2032239462Sdim if (m->dirty || m->hold_count || m->busy || m->wire_count || 2033239462Sdim (m->oflags & (VPO_BUSY | VPO_UNMANAGED)) != 0) 2034239462Sdim return (0); 2035239462Sdim pmap_remove_all(m); 2036239462Sdim if (m->dirty) 2037239462Sdim return (0); 2038239462Sdim vm_page_free(m); 2039239462Sdim return (1); 2040239462Sdim} 2041239462Sdim 2042239462Sdim/* 2043239462Sdim * vm_page_cache 2044239462Sdim * 2045239462Sdim * Put the specified page onto the page cache queue (if appropriate). 2046239462Sdim * 2047239462Sdim * This routine may not block. 2048239462Sdim */ 2049239462Sdimvoid 2050239462Sdimvm_page_cache(vm_page_t m) 2051239462Sdim{ 2052239462Sdim vm_object_t object; 2053239462Sdim vm_page_t next, prev, root; 2054263508Sdim 2055263508Sdim vm_page_lock_assert(m, MA_OWNED); 2056263508Sdim object = m->object; 2057263508Sdim VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 2058263508Sdim if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) || m->busy || 2059263508Sdim m->hold_count || m->wire_count) 2060263508Sdim panic("vm_page_cache: attempting to cache busy page"); 2061263508Sdim pmap_remove_all(m); 2062263508Sdim if (m->dirty != 0) 2063263508Sdim panic("vm_page_cache: page %p is dirty", m); 2064263508Sdim if (m->valid == 0 || object->type == OBJT_DEFAULT || 2065263508Sdim (object->type == OBJT_SWAP && 2066263508Sdim !vm_pager_has_page(object, m->pindex, NULL, NULL))) { 2067263508Sdim /* 2068263508Sdim * Hypothesis: A cache-elgible page belonging to a 2069263508Sdim * default object or swap object but without a backing 2070263508Sdim * store must be zero filled. 2071263508Sdim */ 2072263508Sdim vm_page_free(m); 2073263508Sdim return; 2074263508Sdim } 2075263508Sdim KASSERT((m->flags & PG_CACHED) == 0, 2076263508Sdim ("vm_page_cache: page %p is already cached", m)); 2077263508Sdim PCPU_INC(cnt.v_tcached); 2078263508Sdim 2079263508Sdim /* 2080239462Sdim * Remove the page from the paging queues. 2081239462Sdim */ 2082239462Sdim vm_pageq_remove(m); 2083239462Sdim 2084249423Sdim /* 2085249423Sdim * Remove the page from the object's collection of resident 2086249423Sdim * pages. 2087249423Sdim */ 2088249423Sdim if ((next = TAILQ_NEXT(m, listq)) != NULL && next->left == m) { 2089249423Sdim /* 2090249423Sdim * Since the page's successor in the list is also its parent 2091249423Sdim * in the tree, its right subtree must be empty. 2092249423Sdim */ 2093249423Sdim next->left = m->left; 2094249423Sdim KASSERT(m->right == NULL, 2095249423Sdim ("vm_page_cache: page %p has right child", m)); 2096249423Sdim } else if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL && 2097249423Sdim prev->right == m) { 2098249423Sdim /* 2099249423Sdim * Since the page's predecessor in the list is also its parent 2100249423Sdim * in the tree, its left subtree must be empty. 2101249423Sdim */ 2102249423Sdim KASSERT(m->left == NULL, 2103249423Sdim ("vm_page_cache: page %p has left child", m)); 2104249423Sdim prev->right = m->right; 2105249423Sdim } else { 2106249423Sdim if (m != object->root) 2107249423Sdim vm_page_splay(m->pindex, object->root); 2108249423Sdim if (m->left == NULL) 2109239462Sdim root = m->right; 2110239462Sdim else if (m->right == NULL) 2111239462Sdim root = m->left; 2112239462Sdim else { 2113239462Sdim /* 2114239462Sdim * Move the page's successor to the root, because 2115239462Sdim * pages are usually removed in ascending order. 2116239462Sdim */ 2117221345Sdim if (m->right != next) 2118249423Sdim vm_page_splay(m->pindex, m->right); 2119249423Sdim next->left = m->left; 2120263508Sdim root = next; 2121263508Sdim } 2122263508Sdim object->root = root; 2123263508Sdim } 2124263508Sdim TAILQ_REMOVE(&object->memq, m, listq); 2125263508Sdim object->resident_page_count--; 2126249423Sdim 2127249423Sdim /* 2128249423Sdim * Restore the default memory attribute to the page. 2129239462Sdim */ 2130239462Sdim if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT) 2131239462Sdim pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT); 2132234353Sdim 2133212904Sdim /* 2134221345Sdim * Insert the page into the object's collection of cached pages 2135234353Sdim * and the physical memory allocator's cache/free page queues. 2136221345Sdim */ 2137212904Sdim m->flags &= ~PG_ZERO; 2138212904Sdim mtx_lock(&vm_page_queue_free_mtx); 2139234353Sdim m->flags |= PG_CACHED; 2140221345Sdim cnt.v_cache_count++; 2141218893Sdim root = object->cache; 2142234353Sdim if (root == NULL) { 2143234353Sdim m->left = NULL; 2144234353Sdim m->right = NULL; 2145249423Sdim } else { 2146226633Sdim root = vm_page_splay(m->pindex, root); 2147249423Sdim if (m->pindex < root->pindex) { 2148249423Sdim m->left = root->left; 2149249423Sdim m->right = root; 2150249423Sdim root->left = NULL; 2151249423Sdim } else if (__predict_false(m->pindex == root->pindex)) 2152249423Sdim panic("vm_page_cache: offset already cached"); 2153226633Sdim else { 2154249423Sdim m->right = root->right; 2155249423Sdim m->left = root; 2156249423Sdim root->right = NULL; 2157249423Sdim } 2158249423Sdim } 2159249423Sdim object->cache = m; 2160249423Sdim#if VM_NRESERVLEVEL > 0 2161249423Sdim if (!vm_reserv_free_page(m)) { 2162249423Sdim#else 2163263508Sdim if (TRUE) { 2164263508Sdim#endif 2165263508Sdim vm_phys_set_pool(VM_FREEPOOL_CACHE, m, 0); 2166263508Sdim vm_phys_free_pages(m, 0); 2167249423Sdim } 2168221345Sdim vm_page_free_wakeup(); 2169193326Sed mtx_unlock(&vm_page_queue_free_mtx); 2170221345Sdim 2171249423Sdim /* 2172249423Sdim * Increment the vnode's hold count if this is the object's only 2173249423Sdim * cached page. Decrement the vnode's hold count if this was 2174234353Sdim * the object's only resident page. 2175193326Sed */ 2176193326Sed if (object->type == OBJT_VNODE) { 2177221345Sdim if (root == NULL && object->resident_page_count != 0) 2178221345Sdim vhold(object->handle); 2179221345Sdim else if (root != NULL && object->resident_page_count == 0) 2180221345Sdim vdrop(object->handle); 2181249423Sdim } 2182249423Sdim} 2183249423Sdim 2184249423Sdim/* 2185249423Sdim * vm_page_dontneed 2186249423Sdim * 2187249423Sdim * Cache, deactivate, or do nothing as appropriate. This routine 2188249423Sdim * is typically used by madvise() MADV_DONTNEED. 2189249423Sdim * 2190249423Sdim * Generally speaking we want to move the page into the cache so 2191249423Sdim * it gets reused quickly. However, this can result in a silly syndrome 2192249423Sdim * due to the page recycling too quickly. Small objects will not be 2193263508Sdim * fully cached. On the otherhand, if we move the page to the inactive 2194249423Sdim * queue we wind up with a problem whereby very large objects 2195249423Sdim * unnecessarily blow away our inactive and cache queues. 2196249423Sdim * 2197249423Sdim * The solution is to move the pages based on a fixed weighting. We 2198249423Sdim * either leave them alone, deactivate them, or move them to the cache, 2199249423Sdim * where moving them to the cache has the highest weighting. 2200221345Sdim * By forcing some pages into other queues we eventually force the 2201221345Sdim * system to balance the queues, potentially recovering other unrelated 2202221345Sdim * space from active. The idea is to not force this to happen too 2203221345Sdim * often. 2204221345Sdim */ 2205221345Sdimvoid 2206221345Sdimvm_page_dontneed(vm_page_t m) 2207221345Sdim{ 2208221345Sdim int dnw; 2209221345Sdim int head; 2210221345Sdim 2211221345Sdim vm_page_lock_assert(m, MA_OWNED); 2212221345Sdim VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2213249423Sdim dnw = PCPU_GET(dnweight); 2214249423Sdim PCPU_INC(dnweight); 2215221345Sdim 2216221345Sdim /* 2217221345Sdim * Occasionally leave the page alone. 2218221345Sdim */ 2219221345Sdim if ((dnw & 0x01F0) == 0 || m->queue == PQ_INACTIVE) { 2220221345Sdim if (m->act_count >= ACT_INIT) 2221221345Sdim --m->act_count; 2222221345Sdim return; 2223221345Sdim } 2224212904Sdim 2225212904Sdim /* 2226193326Sed * Clear any references to the page. Otherwise, the page daemon will 2227193326Sed * immediately reactivate the page. 2228193326Sed * 2229193326Sed * Perform the pmap_clear_reference() first. Otherwise, a concurrent 2230193326Sed * pmap operation, such as pmap_remove(), could clear a reference in 2231193326Sed * the pmap and set PGA_REFERENCED on the page before the 2232193326Sed * pmap_clear_reference() had completed. Consequently, the page would 2233193326Sed * appear referenced based upon an old reference that occurred before 2234212904Sdim * this function ran. 2235200583Srdivacky */ 2236200583Srdivacky pmap_clear_reference(m); 2237207619Srdivacky vm_page_aflag_clear(m, PGA_REFERENCED); 2238200583Srdivacky 2239223017Sdim if (m->dirty == 0 && pmap_is_modified(m)) 2240223017Sdim vm_page_dirty(m); 2241223017Sdim 2242201361Srdivacky if (m->dirty || (dnw & 0x0070) == 0) { 2243200583Srdivacky /* 2244223017Sdim * Deactivate the page 3 times out of 32. 2245223017Sdim */ 2246223017Sdim head = 0; 2247223017Sdim } else { 2248200583Srdivacky /* 2249223017Sdim * Cache the page 28 times out of every 32. Note that 2250223017Sdim * the page is deactivated instead of cached, but placed 2251226633Sdim * at the head of the queue instead of the tail. 2252226633Sdim */ 2253223017Sdim head = 1; 2254223017Sdim } 2255223017Sdim _vm_page_deactivate(m, head); 2256200583Srdivacky} 2257200583Srdivacky 2258210299Sed/* 2259204643Srdivacky * Grab a page, waiting until we are waken up due to the page 2260204643Srdivacky * changing state. We keep on waiting, if the page continues 2261204643Srdivacky * to be in the object. If the page doesn't exist, first allocate it 2262204643Srdivacky * and then conditionally zero it. 2263224145Sdim * 2264224145Sdim * The caller must always specify the VM_ALLOC_RETRY flag. This is intended 2265204643Srdivacky * to facilitate its eventual removal. 2266212904Sdim * 2267204643Srdivacky * This routine may block. 2268204643Srdivacky */ 2269263508Sdimvm_page_t 2270263508Sdimvm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) 2271263508Sdim{ 2272263508Sdim vm_page_t m; 2273263508Sdim 2274243830Sdim VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 2275243830Sdim KASSERT((allocflags & VM_ALLOC_RETRY) != 0, 2276249423Sdim ("vm_page_grab: VM_ALLOC_RETRY is required")); 2277249423Sdimretrylookup: 2278249423Sdim if ((m = vm_page_lookup(object, pindex)) != NULL) { 2279249423Sdim if ((m->oflags & VPO_BUSY) != 0 || 2280249423Sdim ((allocflags & VM_ALLOC_IGN_SBUSY) == 0 && m->busy != 0)) { 2281249423Sdim /* 2282249423Sdim * Reference the page before unlocking and 2283251662Sdim * sleeping so that the page daemon is less 2284249423Sdim * likely to reclaim it. 2285251662Sdim */ 2286249423Sdim vm_page_aflag_set(m, PGA_REFERENCED); 2287249423Sdim vm_page_sleep(m, "pgrbwt"); 2288249423Sdim goto retrylookup; 2289249423Sdim } else { 2290193326Sed if ((allocflags & VM_ALLOC_WIRED) != 0) { 2291193326Sed vm_page_lock(m); 2292193326Sed vm_page_wire(m); 2293193326Sed vm_page_unlock(m); 2294193326Sed } 2295193326Sed if ((allocflags & VM_ALLOC_NOBUSY) == 0) 2296193326Sed vm_page_busy(m); 2297193326Sed return (m); 2298193326Sed } 2299193326Sed } 2300193326Sed m = vm_page_alloc(object, pindex, allocflags & ~(VM_ALLOC_RETRY | 2301263508Sdim VM_ALLOC_IGN_SBUSY)); 2302263508Sdim if (m == NULL) { 2303193326Sed VM_OBJECT_UNLOCK(object); 2304195099Sed VM_WAIT; 2305198092Srdivacky VM_OBJECT_LOCK(object); 2306195099Sed goto retrylookup; 2307195099Sed } else if (m->valid != 0) 2308198092Srdivacky return (m); 2309195099Sed if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0) 2310193326Sed pmap_zero_page(m); 2311200583Srdivacky return (m); 2312249423Sdim} 2313249423Sdim 2314249423Sdim/* 2315249423Sdim * Mapping function for valid bits or for dirty bits in 2316249423Sdim * a page. May not block. 2317249423Sdim * 2318249423Sdim * Inputs are required to range within a page. 2319249423Sdim */ 2320249423Sdimvm_page_bits_t 2321200583Srdivackyvm_page_bits(int base, int size) 2322200583Srdivacky{ 2323200583Srdivacky int first_bit; 2324200583Srdivacky int last_bit; 2325200583Srdivacky 2326200583Srdivacky KASSERT( 2327200583Srdivacky base + size <= PAGE_SIZE, 2328200583Srdivacky ("vm_page_bits: illegal base/size %d/%d", base, size) 2329193326Sed ); 2330193326Sed 2331193326Sed if (size == 0) /* handle degenerate case */ 2332193326Sed return (0); 2333193326Sed 2334193326Sed first_bit = base >> DEV_BSHIFT; 2335263508Sdim last_bit = (base + size - 1) >> DEV_BSHIFT; 2336263508Sdim 2337263508Sdim return (((vm_page_bits_t)2 << last_bit) - 2338263508Sdim ((vm_page_bits_t)1 << first_bit)); 2339193326Sed} 2340193326Sed 2341193326Sed/* 2342193326Sed * vm_page_set_valid: 2343193326Sed * 2344193326Sed * Sets portions of a page valid. The arguments are expected 2345193326Sed * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive 2346198092Srdivacky * of any partial chunks touched by the range. The invalid portion of 2347193326Sed * such chunks will be zeroed. 2348198092Srdivacky * 2349204643Srdivacky * (base + size) must be less then or equal to PAGE_SIZE. 2350204643Srdivacky */ 2351249423Sdimvoid 2352249423Sdimvm_page_set_valid(vm_page_t m, int base, int size) 2353193326Sed{ 2354212904Sdim int endoff, frag; 2355263508Sdim 2356249423Sdim VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2357234353Sdim if (size == 0) /* handle degenerate case */ 2358234353Sdim return; 2359221345Sdim 2360221345Sdim /* 2361221345Sdim * If the base is not DEV_BSIZE aligned and the valid 2362221345Sdim * bit is clear, we have to zero out a portion of the 2363221345Sdim * first block. 2364221345Sdim */ 2365221345Sdim if ((frag = base & ~(DEV_BSIZE - 1)) != base && 2366193326Sed (m->valid & (1 << (base >> DEV_BSHIFT))) == 0) 2367193326Sed pmap_zero_page_area(m, frag, base - frag); 2368263508Sdim 2369263508Sdim /* 2370263508Sdim * If the ending offset is not DEV_BSIZE aligned and the 2371263508Sdim * valid bit is clear, we have to zero out a portion of 2372203955Srdivacky * the last block. 2373243830Sdim */ 2374243830Sdim endoff = base + size; 2375243830Sdim if ((frag = endoff & ~(DEV_BSIZE - 1)) != endoff && 2376263508Sdim (m->valid & (1 << (endoff >> DEV_BSHIFT))) == 0) 2377263508Sdim pmap_zero_page_area(m, endoff, 2378263508Sdim DEV_BSIZE - (endoff & (DEV_BSIZE - 1))); 2379203955Srdivacky 2380203955Srdivacky /* 2381203955Srdivacky * Assert that no previously invalid block that is now being validated 2382263508Sdim * is already dirty. 2383263508Sdim */ 2384218893Sdim KASSERT((~m->valid & vm_page_bits(base, size) & m->dirty) == 0, 2385263508Sdim ("vm_page_set_valid: page %p is dirty", m)); 2386218893Sdim 2387218893Sdim /* 2388218893Sdim * Set valid bits inclusive of any overlap. 2389243830Sdim */ 2390263508Sdim m->valid |= vm_page_bits(base, size); 2391263508Sdim} 2392263508Sdim 2393263508Sdim/* 2394263508Sdim * Clear the given bits from the specified page's dirty field. 2395263508Sdim */ 2396263508Sdimstatic __inline void 2397263508Sdimvm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits) 2398263508Sdim{ 2399263508Sdim uintptr_t addr; 2400263508Sdim#if PAGE_SIZE < 16384 2401263508Sdim int shift; 2402263508Sdim#endif 2403263508Sdim 2404263508Sdim /* 2405263508Sdim * If the object is locked and the page is neither VPO_BUSY nor 2406263508Sdim * PGA_WRITEABLE, then the page's dirty field cannot possibly be 2407263508Sdim * set by a concurrent pmap operation. 2408263508Sdim */ 2409243830Sdim VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2410263508Sdim if ((m->oflags & VPO_BUSY) == 0 && (m->aflags & PGA_WRITEABLE) == 0) 2411263508Sdim m->dirty &= ~pagebits; 2412263508Sdim else { 2413263508Sdim /* 2414203955Srdivacky * The pmap layer can call vm_page_dirty() without 2415203955Srdivacky * holding a distinguished lock. The combination of 2416203955Srdivacky * the object's lock and an atomic operation suffice 2417218893Sdim * to guarantee consistency of the page dirty field. 2418218893Sdim * 2419218893Sdim * For PAGE_SIZE == 32768 case, compiler already 2420203955Srdivacky * properly aligns the dirty field, so no forcible 2421203955Srdivacky * alignment is needed. Only require existence of 2422210299Sed * atomic_clear_64 when page size is 32768. 2423221345Sdim */ 2424221345Sdim addr = (uintptr_t)&m->dirty; 2425221345Sdim#if PAGE_SIZE == 32768 2426210299Sed atomic_clear_64((uint64_t *)addr, pagebits); 2427210299Sed#elif PAGE_SIZE == 16384 2428210299Sed atomic_clear_32((uint32_t *)addr, pagebits); 2429210299Sed#else /* PAGE_SIZE <= 8192 */ 2430210299Sed /* 2431210299Sed * Use a trick to perform a 32-bit atomic on the 2432218893Sdim * containing aligned word, to not depend on the existence 2433218893Sdim * of atomic_clear_{8, 16}. 2434218893Sdim */ 2435210299Sed shift = addr & (sizeof(uint32_t) - 1); 2436218893Sdim#if BYTE_ORDER == BIG_ENDIAN 2437226633Sdim shift = (sizeof(uint32_t) - sizeof(m->dirty) - shift) * NBBY; 2438226633Sdim#else 2439226633Sdim shift *= NBBY; 2440226633Sdim#endif 2441226633Sdim addr &= ~(sizeof(uint32_t) - 1); 2442226633Sdim atomic_clear_32((uint32_t *)addr, pagebits << shift); 2443226633Sdim#endif /* PAGE_SIZE */ 2444226633Sdim } 2445226633Sdim} 2446226633Sdim 2447226633Sdim/* 2448226633Sdim * vm_page_set_validclean: 2449218893Sdim * 2450263508Sdim * Sets portions of a page valid and clean. The arguments are expected 2451263508Sdim * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive 2452263508Sdim * of any partial chunks touched by the range. The invalid portion of 2453218893Sdim * such chunks will be zero'd. 2454263508Sdim * 2455218893Sdim * This routine may not block. 2456249423Sdim * 2457249423Sdim * (base + size) must be less then or equal to PAGE_SIZE. 2458249423Sdim */ 2459249423Sdimvoid 2460249423Sdimvm_page_set_validclean(vm_page_t m, int base, int size) 2461249423Sdim{ 2462249423Sdim vm_page_bits_t oldvalid, pagebits; 2463249423Sdim int endoff, frag; 2464249423Sdim 2465249423Sdim VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2466249423Sdim if (size == 0) /* handle degenerate case */ 2467249423Sdim return; 2468249423Sdim 2469249423Sdim /* 2470249423Sdim * If the base is not DEV_BSIZE aligned and the valid 2471249423Sdim * bit is clear, we have to zero out a portion of the 2472249423Sdim * first block. 2473210299Sed */ 2474234353Sdim if ((frag = base & ~(DEV_BSIZE - 1)) != base && 2475193326Sed (m->valid & ((vm_page_bits_t)1 << (base >> DEV_BSHIFT))) == 0) 2476193326Sed pmap_zero_page_area(m, frag, base - frag); 2477198092Srdivacky 2478193326Sed /* 2479251662Sdim * If the ending offset is not DEV_BSIZE aligned and the 2480251662Sdim * valid bit is clear, we have to zero out a portion of 2481263508Sdim * the last block. 2482251662Sdim */ 2483251662Sdim endoff = base + size; 2484251662Sdim if ((frag = endoff & ~(DEV_BSIZE - 1)) != endoff && 2485251662Sdim (m->valid & ((vm_page_bits_t)1 << (endoff >> DEV_BSHIFT))) == 0) 2486251662Sdim pmap_zero_page_area(m, endoff, 2487251662Sdim DEV_BSIZE - (endoff & (DEV_BSIZE - 1))); 2488251662Sdim 2489251662Sdim /* 2490251662Sdim * Set valid, clear dirty bits. If validating the entire 2491208600Srdivacky * page we can safely clear the pmap modify bit. We also 2492263508Sdim * use this opportunity to clear the VPO_NOSYNC flag. If a process 2493263508Sdim * takes a write fault on a MAP_NOSYNC memory area the flag will 2494263508Sdim * be set again. 2495251662Sdim * 2496251662Sdim * We set valid bits inclusive of any overlap, but we can only 2497208600Srdivacky * clear dirty bits for DEV_BSIZE chunks that are fully within 2498208600Srdivacky * the range. 2499218893Sdim */ 2500218893Sdim oldvalid = m->valid; 2501218893Sdim pagebits = vm_page_bits(base, size); 2502218893Sdim m->valid |= pagebits; 2503218893Sdim#if 0 /* NOT YET */ 2504208600Srdivacky if ((frag = base & (DEV_BSIZE - 1)) != 0) { 2505208600Srdivacky frag = DEV_BSIZE - frag; 2506208600Srdivacky base += frag; 2507208600Srdivacky size -= frag; 2508208600Srdivacky if (size < 0) 2509193326Sed size = 0; 2510193326Sed } 2511251662Sdim pagebits = vm_page_bits(base, size & (DEV_BSIZE - 1)); 2512251662Sdim#endif 2513251662Sdim if (base == 0 && size == PAGE_SIZE) { 2514251662Sdim /* 2515251662Sdim * The page can only be modified within the pmap if it is 2516251662Sdim * mapped, and it can only be mapped if it was previously 2517251662Sdim * fully valid. 2518263508Sdim */ 2519263508Sdim if (oldvalid == VM_PAGE_BITS_ALL) 2520263508Sdim /* 2521263508Sdim * Perform the pmap_clear_modify() first. Otherwise, 2522251662Sdim * a concurrent pmap operation, such as 2523263508Sdim * pmap_protect(), could clear a modification in the 2524263508Sdim * pmap and set the dirty field on the page before 2525263508Sdim * pmap_clear_modify() had begun and after the dirty 2526251662Sdim * field was cleared here. 2527251662Sdim */ 2528251662Sdim pmap_clear_modify(m); 2529251662Sdim m->dirty = 0; 2530251662Sdim m->oflags &= ~VPO_NOSYNC; 2531200583Srdivacky } else if (oldvalid != VM_PAGE_BITS_ALL) 2532207619Srdivacky m->dirty &= ~pagebits; 2533207619Srdivacky else 2534207619Srdivacky vm_page_clear_dirty_mask(m, pagebits); 2535226633Sdim} 2536226633Sdim 2537226633Sdimvoid 2538226633Sdimvm_page_clear_dirty(vm_page_t m, int base, int size) 2539226633Sdim{ 2540226633Sdim 2541207619Srdivacky vm_page_clear_dirty_mask(m, vm_page_bits(base, size)); 2542207619Srdivacky} 2543226633Sdim 2544226633Sdim/* 2545200583Srdivacky * vm_page_set_invalid: 2546200583Srdivacky * 2547200583Srdivacky * Invalidates DEV_BSIZE'd chunks within a page. Both the 2548263508Sdim * valid and dirty bits for the effected areas are cleared. 2549200583Srdivacky * 2550200583Srdivacky * May not block. 2551200583Srdivacky */ 2552200583Srdivackyvoid 2553239462Sdimvm_page_set_invalid(vm_page_t m, int base, int size) 2554200583Srdivacky{ 2555200583Srdivacky vm_page_bits_t bits; 2556200583Srdivacky 2557200583Srdivacky VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2558239462Sdim KASSERT((m->oflags & VPO_BUSY) == 0, 2559239462Sdim ("vm_page_set_invalid: page %p is busy", m)); 2560239462Sdim bits = vm_page_bits(base, size); 2561239462Sdim if (m->valid == VM_PAGE_BITS_ALL && bits != 0) 2562239462Sdim pmap_remove_all(m); 2563239462Sdim KASSERT(!pmap_page_is_mapped(m), 2564239462Sdim ("vm_page_set_invalid: page %p is mapped", m)); 2565239462Sdim m->valid &= ~bits; 2566239462Sdim m->dirty &= ~bits; 2567239462Sdim} 2568200583Srdivacky 2569239462Sdim/* 2570239462Sdim * vm_page_zero_invalid() 2571239462Sdim * 2572239462Sdim * The kernel assumes that the invalid portions of a page contain 2573239462Sdim * garbage, but such pages can be mapped into memory by user code. 2574193326Sed * When this occurs, we must zero out the non-valid portions of the 2575207619Srdivacky * page so user code sees what it expects. 2576207619Srdivacky * 2577207619Srdivacky * Pages are most often semi-valid when the end of a file is mapped 2578207619Srdivacky * into memory and the file's size is not page aligned. 2579207619Srdivacky */ 2580207619Srdivackyvoid 2581207619Srdivackyvm_page_zero_invalid(vm_page_t m, boolean_t setvalid) 2582207619Srdivacky{ 2583207619Srdivacky int b; 2584207619Srdivacky int i; 2585200583Srdivacky 2586200583Srdivacky VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2587207619Srdivacky /* 2588200583Srdivacky * Scan the valid bits looking for invalid sections that 2589200583Srdivacky * must be zerod. Invalid sub-DEV_BSIZE'd areas ( where the 2590239462Sdim * valid bit may be set ) have already been zerod by 2591223017Sdim * vm_page_set_validclean(). 2592223017Sdim */ 2593223017Sdim for (b = i = 0; i <= PAGE_SIZE / DEV_BSIZE; ++i) { 2594223017Sdim if (i == (PAGE_SIZE / DEV_BSIZE) || 2595193326Sed (m->valid & ((vm_page_bits_t)1 << i))) { 2596193326Sed if (i > b) { 2597193326Sed pmap_zero_page_area(m, 2598249423Sdim b << DEV_BSHIFT, (i - b) << DEV_BSHIFT); 2599249423Sdim } 2600249423Sdim b = i + 1; 2601249423Sdim } 2602249423Sdim } 2603249423Sdim 2604249423Sdim /* 2605249423Sdim * setvalid is TRUE when we can safely set the zero'd areas 2606263508Sdim * as being valid. We can do this if there are no cache consistancy 2607249423Sdim * issues. e.g. it is ok to do with UFS, but not ok to do with NFS. 2608249423Sdim */ 2609249423Sdim if (setvalid) 2610249423Sdim m->valid = VM_PAGE_BITS_ALL; 2611249423Sdim} 2612249423Sdim 2613249423Sdim/* 2614249423Sdim * vm_page_is_valid: 2615249423Sdim * 2616249423Sdim * Is (partial) page valid? Note that the case where size == 0 2617249423Sdim * will return FALSE in the degenerate case where the page is 2618263508Sdim * entirely invalid, and TRUE otherwise. 2619249423Sdim * 2620249423Sdim * May not block. 2621249423Sdim */ 2622193326Sedint 2623193326Sedvm_page_is_valid(vm_page_t m, int base, int size) 2624193326Sed{ 2625218893Sdim vm_page_bits_t bits; 2626218893Sdim 2627218893Sdim VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2628218893Sdim bits = vm_page_bits(base, size); 2629218893Sdim if (m->valid && ((m->valid & bits) == bits)) 2630218893Sdim return 1; 2631218893Sdim else 2632218893Sdim return 0; 2633218893Sdim} 2634218893Sdim 2635218893Sdim/* 2636218893Sdim * update dirty bits from pmap/mmu. May not block. 2637249423Sdim */ 2638263508Sdimvoid 2639263508Sdimvm_page_test_dirty(vm_page_t m) 2640263508Sdim{ 2641263508Sdim 2642263508Sdim VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2643263508Sdim if (m->dirty != VM_PAGE_BITS_ALL && pmap_is_modified(m)) 2644263508Sdim vm_page_dirty(m); 2645263508Sdim} 2646263508Sdim 2647263508Sdimvoid 2648263508Sdimvm_page_lock_KBI(vm_page_t m, const char *file, int line) 2649263508Sdim{ 2650263508Sdim 2651263508Sdim mtx_lock_flags_(vm_page_lockptr(m), 0, file, line); 2652263508Sdim} 2653263508Sdim 2654249423Sdimvoid 2655249423Sdimvm_page_unlock_KBI(vm_page_t m, const char *file, int line) 2656218893Sdim{ 2657263508Sdim 2658193326Sed mtx_unlock_flags_(vm_page_lockptr(m), 0, file, line); 2659263508Sdim} 2660263508Sdim 2661263508Sdimint 2662263508Sdimvm_page_trylock_KBI(vm_page_t m, const char *file, int line) 2663263508Sdim{ 2664263508Sdim 2665263508Sdim return (mtx_trylock_flags_(vm_page_lockptr(m), 0, file, line)); 2666263508Sdim} 2667193326Sed 2668193326Sed#if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) 2669193326Sedvoid 2670193326Sedvm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line) 2671193326Sed{ 2672234353Sdim 2673193326Sed mtx_assert_(vm_page_lockptr(m), a, file, line); 2674198092Srdivacky} 2675198092Srdivacky#endif 2676193326Sed 2677263508Sdimint so_zerocp_fullpage = 0; 2678193326Sed 2679193326Sed/* 2680193326Sed * Replace the given page with a copy. The copied page assumes 2681193326Sed * the portion of the given page's "wire_count" that is not the 2682226633Sdim * responsibility of this copy-on-write mechanism. 2683193326Sed * 2684193326Sed * The object containing the given page must have a non-zero 2685249423Sdim * paging-in-progress count and be locked. 2686218893Sdim */ 2687193326Sedvoid 2688193326Sedvm_page_cowfault(vm_page_t m) 2689193326Sed{ 2690193326Sed vm_page_t mnew; 2691226633Sdim vm_object_t object; 2692198092Srdivacky vm_pindex_t pindex; 2693198092Srdivacky 2694193326Sed mtx_assert(&vm_page_queue_mtx, MA_NOTOWNED); 2695193326Sed vm_page_lock_assert(m, MA_OWNED); 2696193326Sed object = m->object; 2697221345Sdim VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 2698193326Sed KASSERT(object->paging_in_progress != 0, 2699200583Srdivacky ("vm_page_cowfault: object %p's paging-in-progress count is zero.", 2700249423Sdim object)); 2701207619Srdivacky pindex = m->pindex; 2702221345Sdim 2703193326Sed retry_alloc: 2704193326Sed pmap_remove_all(m); 2705193326Sed vm_page_remove(m); 2706193326Sed mnew = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY); 2707226633Sdim if (mnew == NULL) { 2708198092Srdivacky vm_page_insert(m, object, pindex); 2709193326Sed vm_page_unlock(m); 2710263508Sdim VM_OBJECT_UNLOCK(object); 2711193326Sed VM_WAIT; 2712193326Sed VM_OBJECT_LOCK(object); 2713193326Sed if (m == vm_page_lookup(object, pindex)) { 2714193326Sed vm_page_lock(m); 2715193326Sed goto retry_alloc; 2716193326Sed } else { 2717193326Sed /* 2718193326Sed * Page disappeared during the wait. 2719193326Sed */ 2720193326Sed return; 2721193326Sed } 2722193326Sed } 2723193326Sed 2724234353Sdim if (m->cow == 0) { 2725193326Sed /* 2726198092Srdivacky * check to see if we raced with an xmit complete when 2727193326Sed * waiting to allocate a page. If so, put things back 2728226633Sdim * the way they were 2729226633Sdim */ 2730198092Srdivacky vm_page_unlock(m); 2731198092Srdivacky vm_page_lock(mnew); 2732198092Srdivacky vm_page_free(mnew); 2733198092Srdivacky vm_page_unlock(mnew); 2734198092Srdivacky vm_page_insert(m, object, pindex); 2735193326Sed } else { /* clear COW & copy page */ 2736193326Sed if (!so_zerocp_fullpage) 2737193326Sed pmap_copy_page(m, mnew); 2738193326Sed mnew->valid = VM_PAGE_BITS_ALL; 2739198092Srdivacky vm_page_dirty(mnew); 2740193326Sed mnew->wire_count = m->wire_count - m->cow; 2741193326Sed m->wire_count = m->cow; 2742193326Sed vm_page_unlock(m); 2743198092Srdivacky } 2744193326Sed} 2745193326Sed 2746193326Sedvoid 2747212904Sdimvm_page_cowclear(vm_page_t m) 2748212904Sdim{ 2749198092Srdivacky 2750193326Sed vm_page_lock_assert(m, MA_OWNED); 2751193326Sed if (m->cow) { 2752193326Sed m->cow--; 2753193326Sed /* 2754193326Sed * let vm_fault add back write permission lazily 2755193326Sed */ 2756193326Sed } 2757193326Sed /* 2758193326Sed * sf_buf_free() will free the page, so we needn't do it here 2759193326Sed */ 2760193326Sed} 2761193326Sed 2762193326Sedint 2763212904Sdimvm_page_cowsetup(vm_page_t m) 2764212904Sdim{ 2765212904Sdim 2766193326Sed vm_page_lock_assert(m, MA_OWNED); 2767193326Sed if ((m->flags & PG_FICTITIOUS) != 0 || 2768263508Sdim (m->oflags & VPO_UNMANAGED) != 0 || 2769263508Sdim m->cow == USHRT_MAX - 1 || !VM_OBJECT_TRYLOCK(m->object)) 2770263508Sdim return (EBUSY); 2771263508Sdim m->cow++; 2772193326Sed pmap_remove_write(m); 2773193326Sed VM_OBJECT_UNLOCK(m->object); 2774193326Sed return (0); 2775193326Sed} 2776193326Sed 2777251662Sdim#ifdef INVARIANTS 2778251662Sdimvoid 2779251662Sdimvm_page_object_lock_assert(vm_page_t m) 2780251662Sdim{ 2781251662Sdim 2782251662Sdim /* 2783251662Sdim * Certain of the page's fields may only be modified by the 2784251662Sdim * holder of the containing object's lock or the setter of the 2785251662Sdim * page's VPO_BUSY flag. Unfortunately, the setter of the 2786193326Sed * VPO_BUSY flag is not recorded, and thus cannot be checked 2787193326Sed * here. 2788193326Sed */ 2789193326Sed if (m->object != NULL && (m->oflags & VPO_BUSY) == 0) 2790251662Sdim VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2791251662Sdim} 2792251662Sdim#endif 2793251662Sdim 2794251662Sdim#include "opt_ddb.h" 2795251662Sdim#ifdef DDB 2796251662Sdim#include <sys/kernel.h> 2797251662Sdim 2798251662Sdim#include <ddb/ddb.h> 2799263508Sdim 2800251662SdimDB_SHOW_COMMAND(page, vm_page_print_page_info) 2801251662Sdim{ 2802193326Sed db_printf("cnt.v_free_count: %d\n", cnt.v_free_count); 2803193326Sed db_printf("cnt.v_cache_count: %d\n", cnt.v_cache_count); 2804193326Sed db_printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count); 2805193326Sed db_printf("cnt.v_active_count: %d\n", cnt.v_active_count); 2806193326Sed db_printf("cnt.v_wire_count: %d\n", cnt.v_wire_count); 2807193326Sed db_printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved); 2808193326Sed db_printf("cnt.v_free_min: %d\n", cnt.v_free_min); 2809193326Sed db_printf("cnt.v_free_target: %d\n", cnt.v_free_target); 2810193326Sed db_printf("cnt.v_cache_min: %d\n", cnt.v_cache_min); 2811193326Sed db_printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target); 2812193326Sed} 2813193326Sed 2814198092SrdivackyDB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) 2815193326Sed{ 2816193326Sed 2817263508Sdim db_printf("PQ_FREE:"); 2818193326Sed db_printf(" %d", cnt.v_free_count); 2819193326Sed db_printf("\n"); 2820193326Sed 2821193326Sed db_printf("PQ_CACHE:"); 2822234353Sdim db_printf(" %d", cnt.v_cache_count); 2823263508Sdim db_printf("\n"); 2824193326Sed 2825234353Sdim db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n", 2826193326Sed *vm_page_queues[PQ_ACTIVE].cnt, 2827193326Sed *vm_page_queues[PQ_INACTIVE].cnt); 2828193326Sed} 2829193326Sed#endif /* DDB */ 2830193326Sed