198184Sgordon/* $NetBSD: uvm_page.c,v 1.256 2024/03/05 14:33:50 thorpej Exp $ */ 298184Sgordon 398184Sgordon/*- 498184Sgordon * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc. 598184Sgordon * All rights reserved. 698184Sgordon * 798184Sgordon * This code is derived from software contributed to The NetBSD Foundation 8180564Sdougb * by Andrew Doran. 998184Sgordon * 1098184Sgordon * Redistribution and use in source and binary forms, with or without 1198184Sgordon * modification, are permitted provided that the following conditions 1298184Sgordon * are met: 1398184Sgordon * 1. Redistributions of source code must retain the above copyright 14231667Sdougb * notice, this list of conditions and the following disclaimer. 15231667Sdougb * 2. Redistributions in binary form must reproduce the above copyright 16231667Sdougb * notice, this list of conditions and the following disclaimer in the 1798184Sgordon * documentation and/or other materials provided with the distribution. 1898184Sgordon * 1998184Sgordon * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20231667Sdougb * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21231667Sdougb * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 2298184Sgordon * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 2398184Sgordon * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24165664Syar * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25165664Syar * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26231667Sdougb * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27231667Sdougb * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28101851Sgordon * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29104980Sschweikh * POSSIBILITY OF SUCH DAMAGE. 30104980Sschweikh */ 31117346Smtm 32104980Sschweikh/* 33104980Sschweikh * Copyright (c) 1997 Charles D. Cranor and Washington University. 3498184Sgordon * Copyright (c) 1991, 1993, The Regents of the University of California. 3598184Sgordon * 3698184Sgordon * All rights reserved. 37 * 38 * This code is derived from software contributed to Berkeley by 39 * The Mach Operating System project at Carnegie-Mellon University. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vm_page.c 8.3 (Berkeley) 3/21/94 66 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp 67 * 68 * 69 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 70 * All rights reserved. 71 * 72 * Permission to use, copy, modify and distribute this software and 73 * its documentation is hereby granted, provided that both the copyright 74 * notice and this permission notice appear in all copies of the 75 * software, derivative works or modified versions, and any portions 76 * thereof, and that both notices appear in supporting documentation. 77 * 78 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 79 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 80 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 81 * 82 * Carnegie Mellon requests users of this software to return to 83 * 84 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 85 * School of Computer Science 86 * Carnegie Mellon University 87 * Pittsburgh PA 15213-3890 88 * 89 * any improvements or extensions that they make and grant Carnegie the 90 * rights to redistribute these changes. 91 */ 92 93/* 94 * uvm_page.c: page ops. 95 */ 96 97#include <sys/cdefs.h> 98__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.256 2024/03/05 14:33:50 thorpej Exp $"); 99 100#include "opt_ddb.h" 101#include "opt_uvm.h" 102#include "opt_uvmhist.h" 103#include "opt_readahead.h" 104 105#include <sys/param.h> 106#include <sys/systm.h> 107#include <sys/sched.h> 108#include <sys/kernel.h> 109#include <sys/vnode.h> 110#include <sys/proc.h> 111#include <sys/radixtree.h> 112#include <sys/atomic.h> 113#include <sys/cpu.h> 114 115#include <ddb/db_active.h> 116 117#include <uvm/uvm.h> 118#include <uvm/uvm_ddb.h> 119#include <uvm/uvm_pdpolicy.h> 120#include <uvm/uvm_pgflcache.h> 121 122/* 123 * number of pages per-CPU to reserve for the kernel. 124 */ 125#ifndef UVM_RESERVED_PAGES_PER_CPU 126#define UVM_RESERVED_PAGES_PER_CPU 5 127#endif 128int vm_page_reserve_kernel = UVM_RESERVED_PAGES_PER_CPU; 129 130/* 131 * physical memory size; 132 */ 133psize_t physmem; 134 135/* 136 * local variables 137 */ 138 139/* 140 * these variables record the values returned by vm_page_bootstrap, 141 * for debugging purposes. The implementation of uvm_pageboot_alloc 142 * and pmap_startup here also uses them internally. 143 */ 144 145static vaddr_t virtual_space_start; 146static vaddr_t virtual_space_end; 147 148/* 149 * we allocate an initial number of page colors in uvm_page_init(), 150 * and remember them. We may re-color pages as cache sizes are 151 * discovered during the autoconfiguration phase. But we can never 152 * free the initial set of buckets, since they are allocated using 153 * uvm_pageboot_alloc(). 154 */ 155 156static size_t recolored_pages_memsize /* = 0 */; 157static char *recolored_pages_mem; 158 159/* 160 * freelist locks - one per bucket. 161 */ 162 163union uvm_freelist_lock uvm_freelist_locks[PGFL_MAX_BUCKETS] 164 __cacheline_aligned; 165 166/* 167 * basic NUMA information. 168 */ 169 170static struct uvm_page_numa_region { 171 struct uvm_page_numa_region *next; 172 paddr_t start; 173 paddr_t size; 174 u_int numa_id; 175} *uvm_page_numa_region; 176 177#ifdef DEBUG 178kmutex_t uvm_zerochecklock __cacheline_aligned; 179vaddr_t uvm_zerocheckkva; 180#endif /* DEBUG */ 181 182/* 183 * These functions are reserved for uvm(9) internal use and are not 184 * exported in the header file uvm_physseg.h 185 * 186 * Thus they are redefined here. 187 */ 188void uvm_physseg_init_seg(uvm_physseg_t, struct vm_page *); 189void uvm_physseg_seg_chomp_slab(uvm_physseg_t, struct vm_page *, size_t); 190 191/* returns a pgs array */ 192struct vm_page *uvm_physseg_seg_alloc_from_slab(uvm_physseg_t, size_t); 193 194/* 195 * inline functions 196 */ 197 198/* 199 * uvm_pageinsert: insert a page in the object. 200 * 201 * => caller must lock object 202 * => call should have already set pg's object and offset pointers 203 * and bumped the version counter 204 */ 205 206static inline void 207uvm_pageinsert_object(struct uvm_object *uobj, struct vm_page *pg) 208{ 209 210 KASSERT(uobj == pg->uobject); 211 KASSERT(rw_write_held(uobj->vmobjlock)); 212 KASSERT((pg->flags & PG_TABLED) == 0); 213 214 if ((pg->flags & PG_STAT) != 0) { 215 /* Cannot use uvm_pagegetdirty(): not yet in radix tree. */ 216 const unsigned int status = pg->flags & (PG_CLEAN | PG_DIRTY); 217 218 if ((pg->flags & PG_FILE) != 0) { 219 if (uobj->uo_npages == 0) { 220 struct vnode *vp = (struct vnode *)uobj; 221 mutex_enter(vp->v_interlock); 222 KASSERT((vp->v_iflag & VI_PAGES) == 0); 223 vp->v_iflag |= VI_PAGES; 224 vholdl(vp); 225 mutex_exit(vp->v_interlock); 226 } 227 if (UVM_OBJ_IS_VTEXT(uobj)) { 228 cpu_count(CPU_COUNT_EXECPAGES, 1); 229 } 230 cpu_count(CPU_COUNT_FILEUNKNOWN + status, 1); 231 } else { 232 cpu_count(CPU_COUNT_ANONUNKNOWN + status, 1); 233 } 234 } 235 pg->flags |= PG_TABLED; 236 uobj->uo_npages++; 237} 238 239static inline int 240uvm_pageinsert_tree(struct uvm_object *uobj, struct vm_page *pg) 241{ 242 const uint64_t idx = pg->offset >> PAGE_SHIFT; 243 int error; 244 245 KASSERT(rw_write_held(uobj->vmobjlock)); 246 247 error = radix_tree_insert_node(&uobj->uo_pages, idx, pg); 248 if (error != 0) { 249 return error; 250 } 251 if ((pg->flags & PG_CLEAN) == 0) { 252 uvm_obj_page_set_dirty(pg); 253 } 254 KASSERT(((pg->flags & PG_CLEAN) == 0) == 255 uvm_obj_page_dirty_p(pg)); 256 return 0; 257} 258 259/* 260 * uvm_page_remove: remove page from object. 261 * 262 * => caller must lock object 263 */ 264 265static inline void 266uvm_pageremove_object(struct uvm_object *uobj, struct vm_page *pg) 267{ 268 269 KASSERT(uobj == pg->uobject); 270 KASSERT(rw_write_held(uobj->vmobjlock)); 271 KASSERT(pg->flags & PG_TABLED); 272 273 if ((pg->flags & PG_STAT) != 0) { 274 /* Cannot use uvm_pagegetdirty(): no longer in radix tree. */ 275 const unsigned int status = pg->flags & (PG_CLEAN | PG_DIRTY); 276 277 if ((pg->flags & PG_FILE) != 0) { 278 if (uobj->uo_npages == 1) { 279 struct vnode *vp = (struct vnode *)uobj; 280 mutex_enter(vp->v_interlock); 281 KASSERT((vp->v_iflag & VI_PAGES) != 0); 282 vp->v_iflag &= ~VI_PAGES; 283 holdrelel(vp); 284 mutex_exit(vp->v_interlock); 285 } 286 if (UVM_OBJ_IS_VTEXT(uobj)) { 287 cpu_count(CPU_COUNT_EXECPAGES, -1); 288 } 289 cpu_count(CPU_COUNT_FILEUNKNOWN + status, -1); 290 } else { 291 cpu_count(CPU_COUNT_ANONUNKNOWN + status, -1); 292 } 293 } 294 uobj->uo_npages--; 295 pg->flags &= ~PG_TABLED; 296 pg->uobject = NULL; 297} 298 299static inline void 300uvm_pageremove_tree(struct uvm_object *uobj, struct vm_page *pg) 301{ 302 struct vm_page *opg __unused; 303 304 KASSERT(rw_write_held(uobj->vmobjlock)); 305 306 opg = radix_tree_remove_node(&uobj->uo_pages, pg->offset >> PAGE_SHIFT); 307 KASSERT(pg == opg); 308} 309 310static void 311uvm_page_init_bucket(struct pgfreelist *pgfl, struct pgflbucket *pgb, int num) 312{ 313 int i; 314 315 pgb->pgb_nfree = 0; 316 for (i = 0; i < uvmexp.ncolors; i++) { 317 LIST_INIT(&pgb->pgb_colors[i]); 318 } 319 pgfl->pgfl_buckets[num] = pgb; 320} 321 322/* 323 * uvm_page_init: init the page system. called from uvm_init(). 324 * 325 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp 326 */ 327 328void 329uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp) 330{ 331 static struct uvm_cpu uvm_boot_cpu __cacheline_aligned; 332 psize_t freepages, pagecount, bucketsize, n; 333 struct pgflbucket *pgb; 334 struct vm_page *pagearray; 335 char *bucketarray; 336 uvm_physseg_t bank; 337 int fl, b; 338 339 KASSERT(ncpu <= 1); 340 341 /* 342 * init the page queues and free page queue locks, except the 343 * free list; we allocate that later (with the initial vm_page 344 * structures). 345 */ 346 347 curcpu()->ci_data.cpu_uvm = &uvm_boot_cpu; 348 uvmpdpol_init(); 349 for (b = 0; b < __arraycount(uvm_freelist_locks); b++) { 350 mutex_init(&uvm_freelist_locks[b].lock, MUTEX_DEFAULT, IPL_VM); 351 } 352 353 /* 354 * allocate vm_page structures. 355 */ 356 357 /* 358 * sanity check: 359 * before calling this function the MD code is expected to register 360 * some free RAM with the uvm_page_physload() function. our job 361 * now is to allocate vm_page structures for this memory. 362 */ 363 364 if (uvm_physseg_get_last() == UVM_PHYSSEG_TYPE_INVALID) 365 panic("uvm_page_bootstrap: no memory pre-allocated"); 366 367 /* 368 * first calculate the number of free pages... 369 * 370 * note that we use start/end rather than avail_start/avail_end. 371 * this allows us to allocate extra vm_page structures in case we 372 * want to return some memory to the pool after booting. 373 */ 374 375 freepages = 0; 376 377 for (bank = uvm_physseg_get_first(); 378 uvm_physseg_valid_p(bank) ; 379 bank = uvm_physseg_get_next(bank)) { 380 freepages += (uvm_physseg_get_end(bank) - uvm_physseg_get_start(bank)); 381 } 382 383 /* 384 * Let MD code initialize the number of colors, or default 385 * to 1 color if MD code doesn't care. 386 */ 387 if (uvmexp.ncolors == 0) 388 uvmexp.ncolors = 1; 389 uvmexp.colormask = uvmexp.ncolors - 1; 390 KASSERT((uvmexp.colormask & uvmexp.ncolors) == 0); 391 392 /* We always start with only 1 bucket. */ 393 uvm.bucketcount = 1; 394 395 /* 396 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can 397 * use. for each page of memory we use we need a vm_page structure. 398 * thus, the total number of pages we can use is the total size of 399 * the memory divided by the PAGE_SIZE plus the size of the vm_page 400 * structure. we add one to freepages as a fudge factor to avoid 401 * truncation errors (since we can only allocate in terms of whole 402 * pages). 403 */ 404 pagecount = ((freepages + 1) << PAGE_SHIFT) / 405 (PAGE_SIZE + sizeof(struct vm_page)); 406 bucketsize = offsetof(struct pgflbucket, pgb_colors[uvmexp.ncolors]); 407 bucketsize = roundup2(bucketsize, coherency_unit); 408 bucketarray = (void *)uvm_pageboot_alloc( 409 bucketsize * VM_NFREELIST + 410 pagecount * sizeof(struct vm_page)); 411 pagearray = (struct vm_page *) 412 (bucketarray + bucketsize * VM_NFREELIST); 413 414 for (fl = 0; fl < VM_NFREELIST; fl++) { 415 pgb = (struct pgflbucket *)(bucketarray + bucketsize * fl); 416 uvm_page_init_bucket(&uvm.page_free[fl], pgb, 0); 417 } 418 memset(pagearray, 0, pagecount * sizeof(struct vm_page)); 419 420 /* 421 * init the freelist cache in the disabled state. 422 */ 423 uvm_pgflcache_init(); 424 425 /* 426 * init the vm_page structures and put them in the correct place. 427 */ 428 /* First init the extent */ 429 430 for (bank = uvm_physseg_get_first(), 431 uvm_physseg_seg_chomp_slab(bank, pagearray, pagecount); 432 uvm_physseg_valid_p(bank); 433 bank = uvm_physseg_get_next(bank)) { 434 435 n = uvm_physseg_get_end(bank) - uvm_physseg_get_start(bank); 436 uvm_physseg_seg_alloc_from_slab(bank, n); 437 uvm_physseg_init_seg(bank, pagearray); 438 439 /* set up page array pointers */ 440 pagearray += n; 441 pagecount -= n; 442 } 443 444 /* 445 * pass up the values of virtual_space_start and 446 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper 447 * layers of the VM. 448 */ 449 450 *kvm_startp = round_page(virtual_space_start); 451 *kvm_endp = trunc_page(virtual_space_end); 452 453 /* 454 * init various thresholds. 455 */ 456 457 uvmexp.reserve_pagedaemon = 1; 458 uvmexp.reserve_kernel = vm_page_reserve_kernel; 459 460 /* 461 * done! 462 */ 463 464 uvm.page_init_done = true; 465} 466 467/* 468 * uvm_pgfl_lock: lock all freelist buckets 469 */ 470 471void 472uvm_pgfl_lock(void) 473{ 474 int i; 475 476 for (i = 0; i < __arraycount(uvm_freelist_locks); i++) { 477 mutex_spin_enter(&uvm_freelist_locks[i].lock); 478 } 479} 480 481/* 482 * uvm_pgfl_unlock: unlock all freelist buckets 483 */ 484 485void 486uvm_pgfl_unlock(void) 487{ 488 int i; 489 490 for (i = 0; i < __arraycount(uvm_freelist_locks); i++) { 491 mutex_spin_exit(&uvm_freelist_locks[i].lock); 492 } 493} 494 495/* 496 * uvm_setpagesize: set the page size 497 * 498 * => sets page_shift and page_mask from uvmexp.pagesize. 499 */ 500 501void 502uvm_setpagesize(void) 503{ 504 505 /* 506 * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE 507 * to be a constant (indicated by being a non-zero value). 508 */ 509 if (uvmexp.pagesize == 0) { 510 if (PAGE_SIZE == 0) 511 panic("uvm_setpagesize: uvmexp.pagesize not set"); 512 uvmexp.pagesize = PAGE_SIZE; 513 } 514 uvmexp.pagemask = uvmexp.pagesize - 1; 515 if ((uvmexp.pagemask & uvmexp.pagesize) != 0) 516 panic("uvm_setpagesize: page size %u (%#x) not a power of two", 517 uvmexp.pagesize, uvmexp.pagesize); 518 for (uvmexp.pageshift = 0; ; uvmexp.pageshift++) 519 if ((1 << uvmexp.pageshift) == uvmexp.pagesize) 520 break; 521} 522 523/* 524 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping 525 */ 526 527vaddr_t 528uvm_pageboot_alloc(vsize_t size) 529{ 530 static bool initialized = false; 531 vaddr_t addr; 532#if !defined(PMAP_STEAL_MEMORY) 533 vaddr_t vaddr; 534 paddr_t paddr; 535#endif 536 537 /* 538 * on first call to this function, initialize ourselves. 539 */ 540 if (initialized == false) { 541 pmap_virtual_space(&virtual_space_start, &virtual_space_end); 542 543 /* round it the way we like it */ 544 virtual_space_start = round_page(virtual_space_start); 545 virtual_space_end = trunc_page(virtual_space_end); 546 547 initialized = true; 548 } 549 550 /* round to page size */ 551 size = round_page(size); 552 uvmexp.bootpages += atop(size); 553 554#if defined(PMAP_STEAL_MEMORY) 555 556 /* 557 * defer bootstrap allocation to MD code (it may want to allocate 558 * from a direct-mapped segment). pmap_steal_memory should adjust 559 * virtual_space_start/virtual_space_end if necessary. 560 */ 561 562 addr = pmap_steal_memory(size, &virtual_space_start, 563 &virtual_space_end); 564 565 return addr; 566 567#else /* !PMAP_STEAL_MEMORY */ 568 569 /* 570 * allocate virtual memory for this request 571 */ 572 if (virtual_space_start == virtual_space_end || 573 (virtual_space_end - virtual_space_start) < size) 574 panic("uvm_pageboot_alloc: out of virtual space"); 575 576 addr = virtual_space_start; 577 578#ifdef PMAP_GROWKERNEL 579 /* 580 * If the kernel pmap can't map the requested space, 581 * then allocate more resources for it. 582 */ 583 if (uvm_maxkaddr < (addr + size)) { 584 uvm_maxkaddr = pmap_growkernel(addr + size); 585 if (uvm_maxkaddr < (addr + size)) 586 panic("uvm_pageboot_alloc: pmap_growkernel() failed"); 587 } 588#endif 589 590 virtual_space_start += size; 591 592 /* 593 * allocate and mapin physical pages to back new virtual pages 594 */ 595 596 for (vaddr = round_page(addr) ; vaddr < addr + size ; 597 vaddr += PAGE_SIZE) { 598 599 if (!uvm_page_physget(&paddr)) 600 panic("uvm_pageboot_alloc: out of memory"); 601 602 /* 603 * Note this memory is no longer managed, so using 604 * pmap_kenter is safe. 605 */ 606 pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE, 0); 607 } 608 pmap_update(pmap_kernel()); 609 return addr; 610#endif /* PMAP_STEAL_MEMORY */ 611} 612 613#if !defined(PMAP_STEAL_MEMORY) 614/* 615 * uvm_page_physget: "steal" one page from the vm_physmem structure. 616 * 617 * => attempt to allocate it off the end of a segment in which the "avail" 618 * values match the start/end values. if we can't do that, then we 619 * will advance both values (making them equal, and removing some 620 * vm_page structures from the non-avail area). 621 * => return false if out of memory. 622 */ 623 624/* subroutine: try to allocate from memory chunks on the specified freelist */ 625static bool uvm_page_physget_freelist(paddr_t *, int); 626 627static bool 628uvm_page_physget_freelist(paddr_t *paddrp, int freelist) 629{ 630 uvm_physseg_t lcv; 631 632 /* pass 1: try allocating from a matching end */ 633#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 634 for (lcv = uvm_physseg_get_last(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_prev(lcv)) 635#else 636 for (lcv = uvm_physseg_get_first(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_next(lcv)) 637#endif 638 { 639 if (uvm.page_init_done == true) 640 panic("uvm_page_physget: called _after_ bootstrap"); 641 642 /* Try to match at front or back on unused segment */ 643 if (uvm_page_physunload(lcv, freelist, paddrp)) 644 return true; 645 } 646 647 /* pass2: forget about matching ends, just allocate something */ 648#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 649 for (lcv = uvm_physseg_get_last(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_prev(lcv)) 650#else 651 for (lcv = uvm_physseg_get_first(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_next(lcv)) 652#endif 653 { 654 /* Try the front regardless. */ 655 if (uvm_page_physunload_force(lcv, freelist, paddrp)) 656 return true; 657 } 658 return false; 659} 660 661bool 662uvm_page_physget(paddr_t *paddrp) 663{ 664 int i; 665 666 /* try in the order of freelist preference */ 667 for (i = 0; i < VM_NFREELIST; i++) 668 if (uvm_page_physget_freelist(paddrp, i) == true) 669 return (true); 670 return (false); 671} 672#endif /* PMAP_STEAL_MEMORY */ 673 674paddr_t 675uvm_vm_page_to_phys(const struct vm_page *pg) 676{ 677 678 return pg->phys_addr & ~(PAGE_SIZE - 1); 679} 680 681/* 682 * uvm_page_numa_load: load NUMA range description. 683 */ 684void 685uvm_page_numa_load(paddr_t start, paddr_t size, u_int numa_id) 686{ 687 struct uvm_page_numa_region *d; 688 689 KASSERT(numa_id < PGFL_MAX_BUCKETS); 690 691 d = kmem_alloc(sizeof(*d), KM_SLEEP); 692 d->start = start; 693 d->size = size; 694 d->numa_id = numa_id; 695 d->next = uvm_page_numa_region; 696 uvm_page_numa_region = d; 697} 698 699/* 700 * uvm_page_numa_lookup: lookup NUMA node for the given page. 701 */ 702static u_int 703uvm_page_numa_lookup(struct vm_page *pg) 704{ 705 struct uvm_page_numa_region *d; 706 static bool warned; 707 paddr_t pa; 708 709 KASSERT(uvm_page_numa_region != NULL); 710 711 pa = VM_PAGE_TO_PHYS(pg); 712 for (d = uvm_page_numa_region; d != NULL; d = d->next) { 713 if (pa >= d->start && pa < d->start + d->size) { 714 return d->numa_id; 715 } 716 } 717 718 if (!warned) { 719 printf("uvm_page_numa_lookup: failed, first pg=%p pa=%#" 720 PRIxPADDR "\n", pg, VM_PAGE_TO_PHYS(pg)); 721 warned = true; 722 } 723 724 return 0; 725} 726 727/* 728 * uvm_page_redim: adjust freelist dimensions if they have changed. 729 */ 730 731static void 732uvm_page_redim(int newncolors, int newnbuckets) 733{ 734 struct pgfreelist npgfl; 735 struct pgflbucket *opgb, *npgb; 736 struct pgflist *ohead, *nhead; 737 struct vm_page *pg; 738 size_t bucketsize, bucketmemsize, oldbucketmemsize; 739 int fl, ob, oc, nb, nc, obuckets, ocolors; 740 char *bucketarray, *oldbucketmem, *bucketmem; 741 742 KASSERT(((newncolors - 1) & newncolors) == 0); 743 744 /* Anything to do? */ 745 if (newncolors <= uvmexp.ncolors && 746 newnbuckets == uvm.bucketcount) { 747 return; 748 } 749 if (uvm.page_init_done == false) { 750 uvmexp.ncolors = newncolors; 751 return; 752 } 753 754 bucketsize = offsetof(struct pgflbucket, pgb_colors[newncolors]); 755 bucketsize = roundup2(bucketsize, coherency_unit); 756 bucketmemsize = bucketsize * newnbuckets * VM_NFREELIST + 757 coherency_unit - 1; 758 bucketmem = kmem_zalloc(bucketmemsize, KM_SLEEP); 759 bucketarray = (char *)roundup2((uintptr_t)bucketmem, coherency_unit); 760 761 ocolors = uvmexp.ncolors; 762 obuckets = uvm.bucketcount; 763 764 /* Freelist cache mustn't be enabled. */ 765 uvm_pgflcache_pause(); 766 767 /* Make sure we should still do this. */ 768 uvm_pgfl_lock(); 769 if (newncolors <= uvmexp.ncolors && 770 newnbuckets == uvm.bucketcount) { 771 uvm_pgfl_unlock(); 772 uvm_pgflcache_resume(); 773 kmem_free(bucketmem, bucketmemsize); 774 return; 775 } 776 777 uvmexp.ncolors = newncolors; 778 uvmexp.colormask = uvmexp.ncolors - 1; 779 uvm.bucketcount = newnbuckets; 780 781 for (fl = 0; fl < VM_NFREELIST; fl++) { 782 /* Init new buckets in new freelist. */ 783 memset(&npgfl, 0, sizeof(npgfl)); 784 for (nb = 0; nb < newnbuckets; nb++) { 785 npgb = (struct pgflbucket *)bucketarray; 786 uvm_page_init_bucket(&npgfl, npgb, nb); 787 bucketarray += bucketsize; 788 } 789 /* Now transfer pages from the old freelist. */ 790 for (nb = ob = 0; ob < obuckets; ob++) { 791 opgb = uvm.page_free[fl].pgfl_buckets[ob]; 792 for (oc = 0; oc < ocolors; oc++) { 793 ohead = &opgb->pgb_colors[oc]; 794 while ((pg = LIST_FIRST(ohead)) != NULL) { 795 LIST_REMOVE(pg, pageq.list); 796 /* 797 * Here we decide on the NEW color & 798 * bucket for the page. For NUMA 799 * we'll use the info that the 800 * hardware gave us. For non-NUMA 801 * assign take physical page frame 802 * number and cache color into 803 * account. We do this to try and 804 * avoid defeating any memory 805 * interleaving in the hardware. 806 */ 807 KASSERT( 808 uvm_page_get_bucket(pg) == ob); 809 KASSERT(fl == 810 uvm_page_get_freelist(pg)); 811 if (uvm_page_numa_region != NULL) { 812 nb = uvm_page_numa_lookup(pg); 813 } else { 814 nb = atop(VM_PAGE_TO_PHYS(pg)) 815 / uvmexp.ncolors / 8 816 % newnbuckets; 817 } 818 uvm_page_set_bucket(pg, nb); 819 npgb = npgfl.pgfl_buckets[nb]; 820 npgb->pgb_nfree++; 821 nc = VM_PGCOLOR(pg); 822 nhead = &npgb->pgb_colors[nc]; 823 LIST_INSERT_HEAD(nhead, pg, pageq.list); 824 } 825 } 826 } 827 /* Install the new freelist. */ 828 memcpy(&uvm.page_free[fl], &npgfl, sizeof(npgfl)); 829 } 830 831 /* Unlock and free the old memory. */ 832 oldbucketmemsize = recolored_pages_memsize; 833 oldbucketmem = recolored_pages_mem; 834 recolored_pages_memsize = bucketmemsize; 835 recolored_pages_mem = bucketmem; 836 837 uvm_pgfl_unlock(); 838 uvm_pgflcache_resume(); 839 840 if (oldbucketmemsize) { 841 kmem_free(oldbucketmem, oldbucketmemsize); 842 } 843 844 /* 845 * this calls uvm_km_alloc() which may want to hold 846 * uvm_freelist_lock. 847 */ 848 uvm_pager_realloc_emerg(); 849} 850 851/* 852 * uvm_page_recolor: Recolor the pages if the new color count is 853 * larger than the old one. 854 */ 855 856void 857uvm_page_recolor(int newncolors) 858{ 859 860 uvm_page_redim(newncolors, uvm.bucketcount); 861} 862 863/* 864 * uvm_page_rebucket: Determine a bucket structure and redim the free 865 * lists to match. 866 */ 867 868void 869uvm_page_rebucket(void) 870{ 871 u_int min_numa, max_numa, npackage, shift; 872 struct cpu_info *ci, *ci2, *ci3; 873 CPU_INFO_ITERATOR cii; 874 875 /* 876 * If we have more than one NUMA node, and the maximum NUMA node ID 877 * is less than PGFL_MAX_BUCKETS, then we'll use NUMA distribution 878 * for free pages. 879 */ 880 min_numa = (u_int)-1; 881 max_numa = 0; 882 for (CPU_INFO_FOREACH(cii, ci)) { 883 if (ci->ci_numa_id < min_numa) { 884 min_numa = ci->ci_numa_id; 885 } 886 if (ci->ci_numa_id > max_numa) { 887 max_numa = ci->ci_numa_id; 888 } 889 } 890 if (min_numa != max_numa && max_numa < PGFL_MAX_BUCKETS) { 891 aprint_debug("UVM: using NUMA allocation scheme\n"); 892 for (CPU_INFO_FOREACH(cii, ci)) { 893 ci->ci_data.cpu_uvm->pgflbucket = ci->ci_numa_id; 894 } 895 uvm_page_redim(uvmexp.ncolors, max_numa + 1); 896 return; 897 } 898 899 /* 900 * Otherwise we'll go with a scheme to maximise L2/L3 cache locality 901 * and minimise lock contention. Count the total number of CPU 902 * packages, and then try to distribute the buckets among CPU 903 * packages evenly. 904 */ 905 npackage = curcpu()->ci_nsibling[CPUREL_PACKAGE1ST]; 906 907 /* 908 * Figure out how to arrange the packages & buckets, and the total 909 * number of buckets we need. XXX 2 may not be the best factor. 910 */ 911 for (shift = 0; npackage > PGFL_MAX_BUCKETS; shift++) { 912 npackage >>= 1; 913 } 914 uvm_page_redim(uvmexp.ncolors, npackage); 915 916 /* 917 * Now tell each CPU which bucket to use. In the outer loop, scroll 918 * through all CPU packages. 919 */ 920 npackage = 0; 921 ci = curcpu(); 922 ci2 = ci->ci_sibling[CPUREL_PACKAGE1ST]; 923 do { 924 /* 925 * In the inner loop, scroll through all CPUs in the package 926 * and assign the same bucket ID. 927 */ 928 ci3 = ci2; 929 do { 930 ci3->ci_data.cpu_uvm->pgflbucket = npackage >> shift; 931 ci3 = ci3->ci_sibling[CPUREL_PACKAGE]; 932 } while (ci3 != ci2); 933 npackage++; 934 ci2 = ci2->ci_sibling[CPUREL_PACKAGE1ST]; 935 } while (ci2 != ci->ci_sibling[CPUREL_PACKAGE1ST]); 936 937 aprint_debug("UVM: using package allocation scheme, " 938 "%d package(s) per bucket\n", 1 << shift); 939} 940 941/* 942 * uvm_cpu_attach: initialize per-CPU data structures. 943 */ 944 945void 946uvm_cpu_attach(struct cpu_info *ci) 947{ 948 struct uvm_cpu *ucpu; 949 950 /* Already done in uvm_page_init(). */ 951 if (!CPU_IS_PRIMARY(ci)) { 952 /* Add more reserve pages for this CPU. */ 953 uvmexp.reserve_kernel += vm_page_reserve_kernel; 954 955 /* Allocate per-CPU data structures. */ 956 ucpu = kmem_zalloc(sizeof(struct uvm_cpu) + coherency_unit - 1, 957 KM_SLEEP); 958 ucpu = (struct uvm_cpu *)roundup2((uintptr_t)ucpu, 959 coherency_unit); 960 ci->ci_data.cpu_uvm = ucpu; 961 } else { 962 ucpu = ci->ci_data.cpu_uvm; 963 } 964 965 uvmpdpol_init_cpu(ucpu); 966} 967 968/* 969 * uvm_availmem: fetch the total amount of free memory in pages. this can 970 * have a detrimental effect on performance due to false sharing; don't call 971 * unless needed. 972 * 973 * some users can request the amount of free memory so often that it begins 974 * to impact upon performance. if calling frequently and an inexact value 975 * is okay, call with cached = true. 976 */ 977 978int 979uvm_availmem(bool cached) 980{ 981 int64_t fp; 982 983 cpu_count_sync(cached); 984 if ((fp = cpu_count_get(CPU_COUNT_FREEPAGES)) < 0) { 985 /* 986 * XXXAD could briefly go negative because it's impossible 987 * to get a clean snapshot. address this for other counters 988 * used as running totals before NetBSD 10 although less 989 * important for those. 990 */ 991 fp = 0; 992 } 993 return (int)fp; 994} 995 996/* 997 * uvm_pagealloc_pgb: helper routine that tries to allocate any color from a 998 * specific freelist and specific bucket only. 999 * 1000 * => must be at IPL_VM or higher to protect per-CPU data structures. 1001 */ 1002 1003static struct vm_page * 1004uvm_pagealloc_pgb(struct uvm_cpu *ucpu, int f, int b, int *trycolorp, int flags) 1005{ 1006 int c, trycolor, colormask; 1007 struct pgflbucket *pgb; 1008 struct vm_page *pg; 1009 kmutex_t *lock; 1010 bool fill; 1011 1012 /* 1013 * Skip the bucket if empty, no lock needed. There could be many 1014 * empty freelists/buckets. 1015 */ 1016 pgb = uvm.page_free[f].pgfl_buckets[b]; 1017 if (pgb->pgb_nfree == 0) { 1018 return NULL; 1019 } 1020 1021 /* Skip bucket if low on memory. */ 1022 lock = &uvm_freelist_locks[b].lock; 1023 mutex_spin_enter(lock); 1024 if (__predict_false(pgb->pgb_nfree <= uvmexp.reserve_kernel)) { 1025 if ((flags & UVM_PGA_USERESERVE) == 0 || 1026 (pgb->pgb_nfree <= uvmexp.reserve_pagedaemon && 1027 curlwp != uvm.pagedaemon_lwp)) { 1028 mutex_spin_exit(lock); 1029 return NULL; 1030 } 1031 fill = false; 1032 } else { 1033 fill = true; 1034 } 1035 1036 /* Try all page colors as needed. */ 1037 c = trycolor = *trycolorp; 1038 colormask = uvmexp.colormask; 1039 do { 1040 pg = LIST_FIRST(&pgb->pgb_colors[c]); 1041 if (__predict_true(pg != NULL)) { 1042 /* 1043 * Got a free page! PG_FREE must be cleared under 1044 * lock because of uvm_pglistalloc(). 1045 */ 1046 LIST_REMOVE(pg, pageq.list); 1047 KASSERT(pg->flags == PG_FREE); 1048 pg->flags = PG_BUSY | PG_CLEAN | PG_FAKE; 1049 pgb->pgb_nfree--; 1050 CPU_COUNT(CPU_COUNT_FREEPAGES, -1); 1051 1052 /* 1053 * While we have the bucket locked and our data 1054 * structures fresh in L1 cache, we have an ideal 1055 * opportunity to grab some pages for the freelist 1056 * cache without causing extra contention. Only do 1057 * so if we found pages in this CPU's preferred 1058 * bucket. 1059 */ 1060 if (__predict_true(b == ucpu->pgflbucket && fill)) { 1061 uvm_pgflcache_fill(ucpu, f, b, c); 1062 } 1063 mutex_spin_exit(lock); 1064 KASSERT(uvm_page_get_bucket(pg) == b); 1065 CPU_COUNT(c == trycolor ? 1066 CPU_COUNT_COLORHIT : CPU_COUNT_COLORMISS, 1); 1067 CPU_COUNT(CPU_COUNT_CPUMISS, 1); 1068 *trycolorp = c; 1069 return pg; 1070 } 1071 c = (c + 1) & colormask; 1072 } while (c != trycolor); 1073 mutex_spin_exit(lock); 1074 1075 return NULL; 1076} 1077 1078/* 1079 * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat that allocates 1080 * any color from any bucket, in a specific freelist. 1081 * 1082 * => must be at IPL_VM or higher to protect per-CPU data structures. 1083 */ 1084 1085static struct vm_page * 1086uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, int f, int *trycolorp, int flags) 1087{ 1088 int b, trybucket, bucketcount; 1089 struct vm_page *pg; 1090 1091 /* Try for the exact thing in the per-CPU cache. */ 1092 if ((pg = uvm_pgflcache_alloc(ucpu, f, *trycolorp)) != NULL) { 1093 CPU_COUNT(CPU_COUNT_CPUHIT, 1); 1094 CPU_COUNT(CPU_COUNT_COLORHIT, 1); 1095 return pg; 1096 } 1097 1098 /* Walk through all buckets, trying our preferred bucket first. */ 1099 trybucket = ucpu->pgflbucket; 1100 b = trybucket; 1101 bucketcount = uvm.bucketcount; 1102 do { 1103 pg = uvm_pagealloc_pgb(ucpu, f, b, trycolorp, flags); 1104 if (pg != NULL) { 1105 return pg; 1106 } 1107 b = (b + 1 == bucketcount ? 0 : b + 1); 1108 } while (b != trybucket); 1109 1110 return NULL; 1111} 1112 1113/* 1114 * uvm_pagealloc_strat: allocate vm_page from a particular free list. 1115 * 1116 * => return null if no pages free 1117 * => wake up pagedaemon if number of free pages drops below low water mark 1118 * => if obj != NULL, obj must be locked (to put in obj's tree) 1119 * => if anon != NULL, anon must be locked (to put in anon) 1120 * => only one of obj or anon can be non-null 1121 * => caller must activate/deactivate page if it is not wired. 1122 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL. 1123 * => policy decision: it is more important to pull a page off of the 1124 * appropriate priority free list than it is to get a page from the 1125 * correct bucket or color bin. This is because we live with the 1126 * consequences of a bad free list decision for the entire 1127 * lifetime of the page, e.g. if the page comes from memory that 1128 * is slower to access. 1129 */ 1130 1131struct vm_page * 1132uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon, 1133 int flags, int strat, int free_list) 1134{ 1135 int color, lcv, error, s; 1136 struct uvm_cpu *ucpu; 1137 struct vm_page *pg; 1138 lwp_t *l; 1139 1140 KASSERT(obj == NULL || anon == NULL); 1141 KASSERT(anon == NULL || (flags & UVM_FLAG_COLORMATCH) || off == 0); 1142 KASSERT(off == trunc_page(off)); 1143 KASSERT(obj == NULL || rw_write_held(obj->vmobjlock)); 1144 KASSERT(anon == NULL || anon->an_lock == NULL || 1145 rw_write_held(anon->an_lock)); 1146 1147 /* 1148 * This implements a global round-robin page coloring 1149 * algorithm. 1150 */ 1151 1152 s = splvm(); 1153 ucpu = curcpu()->ci_data.cpu_uvm; 1154 if (flags & UVM_FLAG_COLORMATCH) { 1155 color = atop(off) & uvmexp.colormask; 1156 } else { 1157 color = ucpu->pgflcolor; 1158 } 1159 1160 /* 1161 * fail if any of these conditions is true: 1162 * [1] there really are no free pages, or 1163 * [2] only kernel "reserved" pages remain and 1164 * reserved pages have not been requested. 1165 * [3] only pagedaemon "reserved" pages remain and 1166 * the requestor isn't the pagedaemon. 1167 * we make kernel reserve pages available if called by a 1168 * kernel thread. 1169 */ 1170 l = curlwp; 1171 if (__predict_true(l != NULL) && (l->l_flag & LW_SYSTEM) != 0) { 1172 flags |= UVM_PGA_USERESERVE; 1173 } 1174 1175 again: 1176 switch (strat) { 1177 case UVM_PGA_STRAT_NORMAL: 1178 /* Check freelists: descending priority (ascending id) order. */ 1179 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 1180 pg = uvm_pagealloc_pgfl(ucpu, lcv, &color, flags); 1181 if (pg != NULL) { 1182 goto gotit; 1183 } 1184 } 1185 1186 /* No pages free! Have pagedaemon free some memory. */ 1187 splx(s); 1188 uvm_kick_pdaemon(); 1189 return NULL; 1190 1191 case UVM_PGA_STRAT_ONLY: 1192 case UVM_PGA_STRAT_FALLBACK: 1193 /* Attempt to allocate from the specified free list. */ 1194 KASSERT(free_list >= 0); 1195 KASSERT(free_list < VM_NFREELIST); 1196 pg = uvm_pagealloc_pgfl(ucpu, free_list, &color, flags); 1197 if (pg != NULL) { 1198 goto gotit; 1199 } 1200 1201 /* Fall back, if possible. */ 1202 if (strat == UVM_PGA_STRAT_FALLBACK) { 1203 strat = UVM_PGA_STRAT_NORMAL; 1204 goto again; 1205 } 1206 1207 /* No pages free! Have pagedaemon free some memory. */ 1208 splx(s); 1209 uvm_kick_pdaemon(); 1210 return NULL; 1211 1212 case UVM_PGA_STRAT_NUMA: 1213 /* 1214 * NUMA strategy (experimental): allocating from the correct 1215 * bucket is more important than observing freelist 1216 * priority. Look only to the current NUMA node; if that 1217 * fails, we need to look to other NUMA nodes, so retry with 1218 * the normal strategy. 1219 */ 1220 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 1221 pg = uvm_pgflcache_alloc(ucpu, lcv, color); 1222 if (pg != NULL) { 1223 CPU_COUNT(CPU_COUNT_CPUHIT, 1); 1224 CPU_COUNT(CPU_COUNT_COLORHIT, 1); 1225 goto gotit; 1226 } 1227 pg = uvm_pagealloc_pgb(ucpu, lcv, 1228 ucpu->pgflbucket, &color, flags); 1229 if (pg != NULL) { 1230 goto gotit; 1231 } 1232 } 1233 strat = UVM_PGA_STRAT_NORMAL; 1234 goto again; 1235 1236 default: 1237 panic("uvm_pagealloc_strat: bad strat %d", strat); 1238 /* NOTREACHED */ 1239 } 1240 1241 gotit: 1242 /* 1243 * We now know which color we actually allocated from; set 1244 * the next color accordingly. 1245 */ 1246 1247 ucpu->pgflcolor = (color + 1) & uvmexp.colormask; 1248 1249 /* 1250 * while still at IPL_VM, update allocation statistics. 1251 */ 1252 1253 if (anon) { 1254 CPU_COUNT(CPU_COUNT_ANONCLEAN, 1); 1255 } 1256 splx(s); 1257 KASSERT(pg->flags == (PG_BUSY|PG_CLEAN|PG_FAKE)); 1258 1259 /* 1260 * assign the page to the object. as the page was free, we know 1261 * that pg->uobject and pg->uanon are NULL. we only need to take 1262 * the page's interlock if we are changing the values. 1263 */ 1264 if (anon != NULL || obj != NULL) { 1265 mutex_enter(&pg->interlock); 1266 } 1267 pg->offset = off; 1268 pg->uobject = obj; 1269 pg->uanon = anon; 1270 KASSERT(uvm_page_owner_locked_p(pg, true)); 1271 if (anon) { 1272 anon->an_page = pg; 1273 pg->flags |= PG_ANON; 1274 mutex_exit(&pg->interlock); 1275 } else if (obj) { 1276 /* 1277 * set PG_FILE|PG_AOBJ before the first uvm_pageinsert. 1278 */ 1279 if (UVM_OBJ_IS_VNODE(obj)) { 1280 pg->flags |= PG_FILE; 1281 } else if (UVM_OBJ_IS_AOBJ(obj)) { 1282 pg->flags |= PG_AOBJ; 1283 } 1284 uvm_pageinsert_object(obj, pg); 1285 mutex_exit(&pg->interlock); 1286 error = uvm_pageinsert_tree(obj, pg); 1287 if (error != 0) { 1288 mutex_enter(&pg->interlock); 1289 uvm_pageremove_object(obj, pg); 1290 mutex_exit(&pg->interlock); 1291 uvm_pagefree(pg); 1292 return NULL; 1293 } 1294 } 1295 1296#if defined(UVM_PAGE_TRKOWN) 1297 pg->owner_tag = NULL; 1298#endif 1299 UVM_PAGE_OWN(pg, "new alloc"); 1300 1301 if (flags & UVM_PGA_ZERO) { 1302 /* A zero'd page is not clean. */ 1303 if (obj != NULL || anon != NULL) { 1304 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 1305 } 1306 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1307 } 1308 1309 return(pg); 1310} 1311 1312/* 1313 * uvm_pagereplace: replace a page with another 1314 * 1315 * => object must be locked 1316 * => page interlocks must be held 1317 */ 1318 1319void 1320uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg) 1321{ 1322 struct uvm_object *uobj = oldpg->uobject; 1323 struct vm_page *pg __diagused; 1324 uint64_t idx; 1325 1326 KASSERT((oldpg->flags & PG_TABLED) != 0); 1327 KASSERT(uobj != NULL); 1328 KASSERT((newpg->flags & PG_TABLED) == 0); 1329 KASSERT(newpg->uobject == NULL); 1330 KASSERT(rw_write_held(uobj->vmobjlock)); 1331 KASSERT(mutex_owned(&oldpg->interlock)); 1332 KASSERT(mutex_owned(&newpg->interlock)); 1333 1334 newpg->uobject = uobj; 1335 newpg->offset = oldpg->offset; 1336 idx = newpg->offset >> PAGE_SHIFT; 1337 pg = radix_tree_replace_node(&uobj->uo_pages, idx, newpg); 1338 KASSERT(pg == oldpg); 1339 if (((oldpg->flags ^ newpg->flags) & PG_CLEAN) != 0) { 1340 if ((newpg->flags & PG_CLEAN) != 0) { 1341 uvm_obj_page_clear_dirty(newpg); 1342 } else { 1343 uvm_obj_page_set_dirty(newpg); 1344 } 1345 } 1346 /* 1347 * oldpg's PG_STAT is stable. newpg is not reachable by others yet. 1348 */ 1349 newpg->flags |= 1350 (newpg->flags & ~PG_STAT) | (oldpg->flags & PG_STAT); 1351 uvm_pageinsert_object(uobj, newpg); 1352 uvm_pageremove_object(uobj, oldpg); 1353} 1354 1355/* 1356 * uvm_pagerealloc: reallocate a page from one object to another 1357 * 1358 * => both objects must be locked 1359 */ 1360 1361int 1362uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff) 1363{ 1364 int error = 0; 1365 1366 /* 1367 * remove it from the old object 1368 */ 1369 1370 if (pg->uobject) { 1371 uvm_pageremove_tree(pg->uobject, pg); 1372 uvm_pageremove_object(pg->uobject, pg); 1373 } 1374 1375 /* 1376 * put it in the new object 1377 */ 1378 1379 if (newobj) { 1380 mutex_enter(&pg->interlock); 1381 pg->uobject = newobj; 1382 pg->offset = newoff; 1383 if (UVM_OBJ_IS_VNODE(newobj)) { 1384 pg->flags |= PG_FILE; 1385 } else if (UVM_OBJ_IS_AOBJ(newobj)) { 1386 pg->flags |= PG_AOBJ; 1387 } 1388 uvm_pageinsert_object(newobj, pg); 1389 mutex_exit(&pg->interlock); 1390 error = uvm_pageinsert_tree(newobj, pg); 1391 if (error != 0) { 1392 mutex_enter(&pg->interlock); 1393 uvm_pageremove_object(newobj, pg); 1394 mutex_exit(&pg->interlock); 1395 } 1396 } 1397 1398 return error; 1399} 1400 1401/* 1402 * uvm_pagefree: free page 1403 * 1404 * => erase page's identity (i.e. remove from object) 1405 * => put page on free list 1406 * => caller must lock owning object (either anon or uvm_object) 1407 * => assumes all valid mappings of pg are gone 1408 */ 1409 1410void 1411uvm_pagefree(struct vm_page *pg) 1412{ 1413 struct pgfreelist *pgfl; 1414 struct pgflbucket *pgb; 1415 struct uvm_cpu *ucpu; 1416 kmutex_t *lock; 1417 int bucket, s; 1418 bool locked; 1419 1420#ifdef DEBUG 1421 if (pg->uobject == (void *)0xdeadbeef && 1422 pg->uanon == (void *)0xdeadbeef) { 1423 panic("uvm_pagefree: freeing free page %p", pg); 1424 } 1425#endif /* DEBUG */ 1426 1427 KASSERT((pg->flags & PG_PAGEOUT) == 0); 1428 KASSERT(!(pg->flags & PG_FREE)); 1429 KASSERT(pg->uobject == NULL || rw_write_held(pg->uobject->vmobjlock)); 1430 KASSERT(pg->uobject != NULL || pg->uanon == NULL || 1431 rw_write_held(pg->uanon->an_lock)); 1432 1433 /* 1434 * remove the page from the object's tree before acquiring any page 1435 * interlocks: this can acquire locks to free radixtree nodes. 1436 */ 1437 if (pg->uobject != NULL) { 1438 uvm_pageremove_tree(pg->uobject, pg); 1439 } 1440 1441 /* 1442 * if the page is loaned, resolve the loan instead of freeing. 1443 */ 1444 1445 if (pg->loan_count) { 1446 KASSERT(pg->wire_count == 0); 1447 1448 /* 1449 * if the page is owned by an anon then we just want to 1450 * drop anon ownership. the kernel will free the page when 1451 * it is done with it. if the page is owned by an object, 1452 * remove it from the object and mark it dirty for the benefit 1453 * of possible anon owners. 1454 * 1455 * regardless of previous ownership, wakeup any waiters, 1456 * unbusy the page, and we're done. 1457 */ 1458 1459 uvm_pagelock(pg); 1460 locked = true; 1461 if (pg->uobject != NULL) { 1462 uvm_pageremove_object(pg->uobject, pg); 1463 pg->flags &= ~(PG_FILE|PG_AOBJ); 1464 } else if (pg->uanon != NULL) { 1465 if ((pg->flags & PG_ANON) == 0) { 1466 pg->loan_count--; 1467 } else { 1468 const unsigned status = uvm_pagegetdirty(pg); 1469 pg->flags &= ~PG_ANON; 1470 cpu_count(CPU_COUNT_ANONUNKNOWN + status, -1); 1471 } 1472 pg->uanon->an_page = NULL; 1473 pg->uanon = NULL; 1474 } 1475 if (pg->pqflags & PQ_WANTED) { 1476 wakeup(pg); 1477 } 1478 pg->pqflags &= ~PQ_WANTED; 1479 pg->flags &= ~(PG_BUSY|PG_RELEASED|PG_PAGER1); 1480#ifdef UVM_PAGE_TRKOWN 1481 pg->owner_tag = NULL; 1482#endif 1483 KASSERT((pg->flags & PG_STAT) == 0); 1484 if (pg->loan_count) { 1485 KASSERT(pg->uobject == NULL); 1486 if (pg->uanon == NULL) { 1487 uvm_pagedequeue(pg); 1488 } 1489 uvm_pageunlock(pg); 1490 return; 1491 } 1492 } else if (pg->uobject != NULL || pg->uanon != NULL || 1493 pg->wire_count != 0) { 1494 uvm_pagelock(pg); 1495 locked = true; 1496 } else { 1497 locked = false; 1498 } 1499 1500 /* 1501 * remove page from its object or anon. 1502 */ 1503 if (pg->uobject != NULL) { 1504 uvm_pageremove_object(pg->uobject, pg); 1505 } else if (pg->uanon != NULL) { 1506 const unsigned int status = uvm_pagegetdirty(pg); 1507 pg->uanon->an_page = NULL; 1508 pg->uanon = NULL; 1509 cpu_count(CPU_COUNT_ANONUNKNOWN + status, -1); 1510 } 1511 1512 /* 1513 * if the page was wired, unwire it now. 1514 */ 1515 1516 if (pg->wire_count) { 1517 pg->wire_count = 0; 1518 atomic_dec_uint(&uvmexp.wired); 1519 } 1520 if (locked) { 1521 /* 1522 * wake anyone waiting on the page. 1523 */ 1524 if ((pg->pqflags & PQ_WANTED) != 0) { 1525 pg->pqflags &= ~PQ_WANTED; 1526 wakeup(pg); 1527 } 1528 1529 /* 1530 * now remove the page from the queues. 1531 */ 1532 uvm_pagedequeue(pg); 1533 uvm_pageunlock(pg); 1534 } else { 1535 KASSERT(!uvmpdpol_pageisqueued_p(pg)); 1536 } 1537 1538 /* 1539 * and put on free queue 1540 */ 1541 1542#ifdef DEBUG 1543 pg->uobject = (void *)0xdeadbeef; 1544 pg->uanon = (void *)0xdeadbeef; 1545#endif /* DEBUG */ 1546 1547 /* Try to send the page to the per-CPU cache. */ 1548 s = splvm(); 1549 ucpu = curcpu()->ci_data.cpu_uvm; 1550 bucket = uvm_page_get_bucket(pg); 1551 if (bucket == ucpu->pgflbucket && uvm_pgflcache_free(ucpu, pg)) { 1552 splx(s); 1553 return; 1554 } 1555 1556 /* Didn't work. Never mind, send it to a global bucket. */ 1557 pgfl = &uvm.page_free[uvm_page_get_freelist(pg)]; 1558 pgb = pgfl->pgfl_buckets[bucket]; 1559 lock = &uvm_freelist_locks[bucket].lock; 1560 1561 mutex_spin_enter(lock); 1562 /* PG_FREE must be set under lock because of uvm_pglistalloc(). */ 1563 pg->flags = PG_FREE; 1564 LIST_INSERT_HEAD(&pgb->pgb_colors[VM_PGCOLOR(pg)], pg, pageq.list); 1565 pgb->pgb_nfree++; 1566 CPU_COUNT(CPU_COUNT_FREEPAGES, 1); 1567 mutex_spin_exit(lock); 1568 splx(s); 1569} 1570 1571/* 1572 * uvm_page_unbusy: unbusy an array of pages. 1573 * 1574 * => pages must either all belong to the same object, or all belong to anons. 1575 * => if pages are object-owned, object must be locked. 1576 * => if pages are anon-owned, anons must be locked. 1577 * => caller must make sure that anon-owned pages are not PG_RELEASED. 1578 */ 1579 1580void 1581uvm_page_unbusy(struct vm_page **pgs, int npgs) 1582{ 1583 struct vm_page *pg; 1584 int i, pageout_done; 1585 UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist); 1586 1587 pageout_done = 0; 1588 for (i = 0; i < npgs; i++) { 1589 pg = pgs[i]; 1590 if (pg == NULL || pg == PGO_DONTCARE) { 1591 continue; 1592 } 1593 1594 KASSERT(uvm_page_owner_locked_p(pg, true)); 1595 KASSERT(pg->flags & PG_BUSY); 1596 1597 if (pg->flags & PG_PAGEOUT) { 1598 pg->flags &= ~PG_PAGEOUT; 1599 pg->flags |= PG_RELEASED; 1600 pageout_done++; 1601 atomic_inc_uint(&uvmexp.pdfreed); 1602 } 1603 if (pg->flags & PG_RELEASED) { 1604 UVMHIST_LOG(ubchist, "releasing pg %#jx", 1605 (uintptr_t)pg, 0, 0, 0); 1606 KASSERT(pg->uobject != NULL || 1607 (pg->uanon != NULL && pg->uanon->an_ref > 0)); 1608 pg->flags &= ~PG_RELEASED; 1609 uvm_pagefree(pg); 1610 } else { 1611 UVMHIST_LOG(ubchist, "unbusying pg %#jx", 1612 (uintptr_t)pg, 0, 0, 0); 1613 KASSERT((pg->flags & PG_FAKE) == 0); 1614 pg->flags &= ~PG_BUSY; 1615 uvm_pagelock(pg); 1616 uvm_pagewakeup(pg); 1617 uvm_pageunlock(pg); 1618 UVM_PAGE_OWN(pg, NULL); 1619 } 1620 } 1621 if (pageout_done != 0) { 1622 uvm_pageout_done(pageout_done); 1623 } 1624} 1625 1626/* 1627 * uvm_pagewait: wait for a busy page 1628 * 1629 * => page must be known PG_BUSY 1630 * => object must be read or write locked 1631 * => object will be unlocked on return 1632 */ 1633 1634void 1635uvm_pagewait(struct vm_page *pg, krwlock_t *lock, const char *wmesg) 1636{ 1637 1638 KASSERT(rw_lock_held(lock)); 1639 KASSERT((pg->flags & PG_BUSY) != 0); 1640 KASSERT(uvm_page_owner_locked_p(pg, false)); 1641 1642 mutex_enter(&pg->interlock); 1643 pg->pqflags |= PQ_WANTED; 1644 rw_exit(lock); 1645 UVM_UNLOCK_AND_WAIT(pg, &pg->interlock, false, wmesg, 0); 1646} 1647 1648/* 1649 * uvm_pagewakeup: wake anyone waiting on a page 1650 * 1651 * => page interlock must be held 1652 */ 1653 1654void 1655uvm_pagewakeup(struct vm_page *pg) 1656{ 1657 UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist); 1658 1659 KASSERT(mutex_owned(&pg->interlock)); 1660 1661 UVMHIST_LOG(ubchist, "waking pg %#jx", (uintptr_t)pg, 0, 0, 0); 1662 1663 if ((pg->pqflags & PQ_WANTED) != 0) { 1664 wakeup(pg); 1665 pg->pqflags &= ~PQ_WANTED; 1666 } 1667} 1668 1669/* 1670 * uvm_pagewanted_p: return true if someone is waiting on the page 1671 * 1672 * => object must be write locked (lock out all concurrent access) 1673 */ 1674 1675bool 1676uvm_pagewanted_p(struct vm_page *pg) 1677{ 1678 1679 KASSERT(uvm_page_owner_locked_p(pg, true)); 1680 1681 return (atomic_load_relaxed(&pg->pqflags) & PQ_WANTED) != 0; 1682} 1683 1684#if defined(UVM_PAGE_TRKOWN) 1685/* 1686 * uvm_page_own: set or release page ownership 1687 * 1688 * => this is a debugging function that keeps track of who sets PG_BUSY 1689 * and where they do it. it can be used to track down problems 1690 * such a process setting "PG_BUSY" and never releasing it. 1691 * => page's object [if any] must be locked 1692 * => if "tag" is NULL then we are releasing page ownership 1693 */ 1694void 1695uvm_page_own(struct vm_page *pg, const char *tag) 1696{ 1697 1698 KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0); 1699 KASSERT(uvm_page_owner_locked_p(pg, true)); 1700 1701 /* gain ownership? */ 1702 if (tag) { 1703 KASSERT((pg->flags & PG_BUSY) != 0); 1704 if (pg->owner_tag) { 1705 printf("uvm_page_own: page %p already owned " 1706 "by proc %d.%d [%s]\n", pg, 1707 pg->owner, pg->lowner, pg->owner_tag); 1708 panic("uvm_page_own"); 1709 } 1710 pg->owner = curproc->p_pid; 1711 pg->lowner = curlwp->l_lid; 1712 pg->owner_tag = tag; 1713 return; 1714 } 1715 1716 /* drop ownership */ 1717 KASSERT((pg->flags & PG_BUSY) == 0); 1718 if (pg->owner_tag == NULL) { 1719 printf("uvm_page_own: dropping ownership of an non-owned " 1720 "page (%p)\n", pg); 1721 panic("uvm_page_own"); 1722 } 1723 pg->owner_tag = NULL; 1724} 1725#endif 1726 1727/* 1728 * uvm_pagelookup: look up a page 1729 * 1730 * => caller should lock object to keep someone from pulling the page 1731 * out from under it 1732 */ 1733 1734struct vm_page * 1735uvm_pagelookup(struct uvm_object *obj, voff_t off) 1736{ 1737 struct vm_page *pg; 1738 1739 KASSERT(db_active || rw_lock_held(obj->vmobjlock)); 1740 1741 pg = radix_tree_lookup_node(&obj->uo_pages, off >> PAGE_SHIFT); 1742 1743 KASSERT(pg == NULL || obj->uo_npages != 0); 1744 KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 || 1745 (pg->flags & PG_BUSY) != 0); 1746 return pg; 1747} 1748 1749/* 1750 * uvm_pagewire: wire the page, thus removing it from the daemon's grasp 1751 * 1752 * => caller must lock objects 1753 * => caller must hold pg->interlock 1754 */ 1755 1756void 1757uvm_pagewire(struct vm_page *pg) 1758{ 1759 1760 KASSERT(uvm_page_owner_locked_p(pg, true)); 1761 KASSERT(mutex_owned(&pg->interlock)); 1762#if defined(READAHEAD_STATS) 1763 if ((pg->flags & PG_READAHEAD) != 0) { 1764 uvm_ra_hit.ev_count++; 1765 pg->flags &= ~PG_READAHEAD; 1766 } 1767#endif /* defined(READAHEAD_STATS) */ 1768 if (pg->wire_count == 0) { 1769 uvm_pagedequeue(pg); 1770 atomic_inc_uint(&uvmexp.wired); 1771 } 1772 pg->wire_count++; 1773 KASSERT(pg->wire_count > 0); /* detect wraparound */ 1774} 1775 1776/* 1777 * uvm_pageunwire: unwire the page. 1778 * 1779 * => activate if wire count goes to zero. 1780 * => caller must lock objects 1781 * => caller must hold pg->interlock 1782 */ 1783 1784void 1785uvm_pageunwire(struct vm_page *pg) 1786{ 1787 1788 KASSERT(uvm_page_owner_locked_p(pg, true)); 1789 KASSERT(pg->wire_count != 0); 1790 KASSERT(!uvmpdpol_pageisqueued_p(pg)); 1791 KASSERT(mutex_owned(&pg->interlock)); 1792 pg->wire_count--; 1793 if (pg->wire_count == 0) { 1794 uvm_pageactivate(pg); 1795 KASSERT(uvmexp.wired != 0); 1796 atomic_dec_uint(&uvmexp.wired); 1797 } 1798} 1799 1800/* 1801 * uvm_pagedeactivate: deactivate page 1802 * 1803 * => caller must lock objects 1804 * => caller must check to make sure page is not wired 1805 * => object that page belongs to must be locked (so we can adjust pg->flags) 1806 * => caller must clear the reference on the page before calling 1807 * => caller must hold pg->interlock 1808 */ 1809 1810void 1811uvm_pagedeactivate(struct vm_page *pg) 1812{ 1813 1814 KASSERT(uvm_page_owner_locked_p(pg, false)); 1815 KASSERT(mutex_owned(&pg->interlock)); 1816 if (pg->wire_count == 0) { 1817 KASSERT(uvmpdpol_pageisqueued_p(pg)); 1818 uvmpdpol_pagedeactivate(pg); 1819 } 1820} 1821 1822/* 1823 * uvm_pageactivate: activate page 1824 * 1825 * => caller must lock objects 1826 * => caller must hold pg->interlock 1827 */ 1828 1829void 1830uvm_pageactivate(struct vm_page *pg) 1831{ 1832 1833 KASSERT(uvm_page_owner_locked_p(pg, false)); 1834 KASSERT(mutex_owned(&pg->interlock)); 1835#if defined(READAHEAD_STATS) 1836 if ((pg->flags & PG_READAHEAD) != 0) { 1837 uvm_ra_hit.ev_count++; 1838 pg->flags &= ~PG_READAHEAD; 1839 } 1840#endif /* defined(READAHEAD_STATS) */ 1841 if (pg->wire_count == 0) { 1842 uvmpdpol_pageactivate(pg); 1843 } 1844} 1845 1846/* 1847 * uvm_pagedequeue: remove a page from any paging queue 1848 * 1849 * => caller must lock objects 1850 * => caller must hold pg->interlock 1851 */ 1852void 1853uvm_pagedequeue(struct vm_page *pg) 1854{ 1855 1856 KASSERT(uvm_page_owner_locked_p(pg, true)); 1857 KASSERT(mutex_owned(&pg->interlock)); 1858 if (uvmpdpol_pageisqueued_p(pg)) { 1859 uvmpdpol_pagedequeue(pg); 1860 } 1861} 1862 1863/* 1864 * uvm_pageenqueue: add a page to a paging queue without activating. 1865 * used where a page is not really demanded (yet). eg. read-ahead 1866 * 1867 * => caller must lock objects 1868 * => caller must hold pg->interlock 1869 */ 1870void 1871uvm_pageenqueue(struct vm_page *pg) 1872{ 1873 1874 KASSERT(uvm_page_owner_locked_p(pg, false)); 1875 KASSERT(mutex_owned(&pg->interlock)); 1876 if (pg->wire_count == 0 && !uvmpdpol_pageisqueued_p(pg)) { 1877 uvmpdpol_pageenqueue(pg); 1878 } 1879} 1880 1881/* 1882 * uvm_pagelock: acquire page interlock 1883 */ 1884void 1885uvm_pagelock(struct vm_page *pg) 1886{ 1887 1888 mutex_enter(&pg->interlock); 1889} 1890 1891/* 1892 * uvm_pagelock2: acquire two page interlocks 1893 */ 1894void 1895uvm_pagelock2(struct vm_page *pg1, struct vm_page *pg2) 1896{ 1897 1898 if (pg1 < pg2) { 1899 mutex_enter(&pg1->interlock); 1900 mutex_enter(&pg2->interlock); 1901 } else { 1902 mutex_enter(&pg2->interlock); 1903 mutex_enter(&pg1->interlock); 1904 } 1905} 1906 1907/* 1908 * uvm_pageunlock: release page interlock, and if a page replacement intent 1909 * is set on the page, pass it to uvmpdpol to make real. 1910 * 1911 * => caller must hold pg->interlock 1912 */ 1913void 1914uvm_pageunlock(struct vm_page *pg) 1915{ 1916 1917 if ((pg->pqflags & PQ_INTENT_SET) == 0 || 1918 (pg->pqflags & PQ_INTENT_QUEUED) != 0) { 1919 mutex_exit(&pg->interlock); 1920 return; 1921 } 1922 pg->pqflags |= PQ_INTENT_QUEUED; 1923 mutex_exit(&pg->interlock); 1924 uvmpdpol_pagerealize(pg); 1925} 1926 1927/* 1928 * uvm_pageunlock2: release two page interlocks, and for both pages if a 1929 * page replacement intent is set on the page, pass it to uvmpdpol to make 1930 * real. 1931 * 1932 * => caller must hold pg->interlock 1933 */ 1934void 1935uvm_pageunlock2(struct vm_page *pg1, struct vm_page *pg2) 1936{ 1937 1938 if ((pg1->pqflags & PQ_INTENT_SET) == 0 || 1939 (pg1->pqflags & PQ_INTENT_QUEUED) != 0) { 1940 mutex_exit(&pg1->interlock); 1941 pg1 = NULL; 1942 } else { 1943 pg1->pqflags |= PQ_INTENT_QUEUED; 1944 mutex_exit(&pg1->interlock); 1945 } 1946 1947 if ((pg2->pqflags & PQ_INTENT_SET) == 0 || 1948 (pg2->pqflags & PQ_INTENT_QUEUED) != 0) { 1949 mutex_exit(&pg2->interlock); 1950 pg2 = NULL; 1951 } else { 1952 pg2->pqflags |= PQ_INTENT_QUEUED; 1953 mutex_exit(&pg2->interlock); 1954 } 1955 1956 if (pg1 != NULL) { 1957 uvmpdpol_pagerealize(pg1); 1958 } 1959 if (pg2 != NULL) { 1960 uvmpdpol_pagerealize(pg2); 1961 } 1962} 1963 1964/* 1965 * uvm_pagezero: zero fill a page 1966 * 1967 * => if page is part of an object then the object should be locked 1968 * to protect pg->flags. 1969 */ 1970 1971void 1972uvm_pagezero(struct vm_page *pg) 1973{ 1974 1975 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 1976 pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1977} 1978 1979/* 1980 * uvm_pagecopy: copy a page 1981 * 1982 * => if page is part of an object then the object should be locked 1983 * to protect pg->flags. 1984 */ 1985 1986void 1987uvm_pagecopy(struct vm_page *src, struct vm_page *dst) 1988{ 1989 1990 uvm_pagemarkdirty(dst, UVM_PAGE_STATUS_DIRTY); 1991 pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst)); 1992} 1993 1994/* 1995 * uvm_pageismanaged: test it see that a page (specified by PA) is managed. 1996 */ 1997 1998bool 1999uvm_pageismanaged(paddr_t pa) 2000{ 2001 2002 return (uvm_physseg_find(atop(pa), NULL) != UVM_PHYSSEG_TYPE_INVALID); 2003} 2004 2005/* 2006 * uvm_page_lookup_freelist: look up the free list for the specified page 2007 */ 2008 2009int 2010uvm_page_lookup_freelist(struct vm_page *pg) 2011{ 2012 uvm_physseg_t upm; 2013 2014 upm = uvm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL); 2015 KASSERT(upm != UVM_PHYSSEG_TYPE_INVALID); 2016 return uvm_physseg_get_free_list(upm); 2017} 2018 2019/* 2020 * uvm_page_owner_locked_p: return true if object associated with page is 2021 * locked. this is a weak check for runtime assertions only. 2022 */ 2023 2024bool 2025uvm_page_owner_locked_p(struct vm_page *pg, bool exclusive) 2026{ 2027 2028 if (pg->uobject != NULL) { 2029 return exclusive 2030 ? rw_write_held(pg->uobject->vmobjlock) 2031 : rw_lock_held(pg->uobject->vmobjlock); 2032 } 2033 if (pg->uanon != NULL) { 2034 return exclusive 2035 ? rw_write_held(pg->uanon->an_lock) 2036 : rw_lock_held(pg->uanon->an_lock); 2037 } 2038 return true; 2039} 2040 2041/* 2042 * uvm_pagereadonly_p: return if the page should be mapped read-only 2043 */ 2044 2045bool 2046uvm_pagereadonly_p(struct vm_page *pg) 2047{ 2048 struct uvm_object * const uobj = pg->uobject; 2049 2050 KASSERT(uobj == NULL || rw_lock_held(uobj->vmobjlock)); 2051 KASSERT(uobj != NULL || rw_lock_held(pg->uanon->an_lock)); 2052 if ((pg->flags & PG_RDONLY) != 0) { 2053 return true; 2054 } 2055 if (uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN) { 2056 return true; 2057 } 2058 if (uobj == NULL) { 2059 return false; 2060 } 2061 return UVM_OBJ_NEEDS_WRITEFAULT(uobj); 2062} 2063 2064#ifdef PMAP_DIRECT 2065/* 2066 * Call pmap to translate physical address into a virtual and to run a callback 2067 * for it. Used to avoid actually mapping the pages, pmap most likely uses direct map 2068 * or equivalent. 2069 */ 2070int 2071uvm_direct_process(struct vm_page **pgs, u_int npages, voff_t off, vsize_t len, 2072 int (*process)(void *, size_t, void *), void *arg) 2073{ 2074 int error = 0; 2075 paddr_t pa; 2076 size_t todo; 2077 voff_t pgoff = (off & PAGE_MASK); 2078 struct vm_page *pg; 2079 2080 KASSERT(npages > 0); 2081 KASSERT(len > 0); 2082 2083 for (int i = 0; i < npages; i++) { 2084 pg = pgs[i]; 2085 2086 KASSERT(len > 0); 2087 2088 /* 2089 * Caller is responsible for ensuring all the pages are 2090 * available. 2091 */ 2092 KASSERT(pg != NULL); 2093 KASSERT(pg != PGO_DONTCARE); 2094 2095 pa = VM_PAGE_TO_PHYS(pg); 2096 todo = MIN(len, PAGE_SIZE - pgoff); 2097 2098 error = pmap_direct_process(pa, pgoff, todo, process, arg); 2099 if (error) 2100 break; 2101 2102 pgoff = 0; 2103 len -= todo; 2104 } 2105 2106 KASSERTMSG(error != 0 || len == 0, "len %lu != 0 for non-error", len); 2107 return error; 2108} 2109#endif /* PMAP_DIRECT */ 2110 2111#if defined(DDB) || defined(DEBUGPRINT) 2112 2113/* 2114 * uvm_page_printit: actually print the page 2115 */ 2116 2117static const char page_flagbits[] = UVM_PGFLAGBITS; 2118static const char page_pqflagbits[] = UVM_PQFLAGBITS; 2119 2120void 2121uvm_page_printit(struct vm_page *pg, bool full, 2122 void (*pr)(const char *, ...)) 2123{ 2124 struct vm_page *tpg; 2125 struct uvm_object *uobj; 2126 struct pgflbucket *pgb; 2127 struct pgflist *pgl; 2128 char pgbuf[128]; 2129 2130 (*pr)("PAGE %p:\n", pg); 2131 snprintb(pgbuf, sizeof(pgbuf), page_flagbits, pg->flags); 2132 (*pr)(" flags=%s\n", pgbuf); 2133 snprintb(pgbuf, sizeof(pgbuf), page_pqflagbits, pg->pqflags); 2134 (*pr)(" pqflags=%s\n", pgbuf); 2135 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx\n", 2136 pg->uobject, pg->uanon, (long long)pg->offset); 2137 (*pr)(" loan_count=%d wire_count=%d bucket=%d freelist=%d\n", 2138 pg->loan_count, pg->wire_count, uvm_page_get_bucket(pg), 2139 uvm_page_get_freelist(pg)); 2140 (*pr)(" pa=0x%lx\n", (long)VM_PAGE_TO_PHYS(pg)); 2141#if defined(UVM_PAGE_TRKOWN) 2142 if (pg->flags & PG_BUSY) 2143 (*pr)(" owning process = %d.%d, tag=%s\n", 2144 pg->owner, pg->lowner, pg->owner_tag); 2145 else 2146 (*pr)(" page not busy, no owner\n"); 2147#else 2148 (*pr)(" [page ownership tracking disabled]\n"); 2149#endif 2150 2151 if (!full) 2152 return; 2153 2154 /* cross-verify object/anon */ 2155 if ((pg->flags & PG_FREE) == 0) { 2156 if (pg->flags & PG_ANON) { 2157 if (pg->uanon == NULL || pg->uanon->an_page != pg) 2158 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 2159 (pg->uanon) ? pg->uanon->an_page : NULL); 2160 else 2161 (*pr)(" anon backpointer is OK\n"); 2162 } else { 2163 uobj = pg->uobject; 2164 if (uobj) { 2165 (*pr)(" checking object list\n"); 2166 tpg = uvm_pagelookup(uobj, pg->offset); 2167 if (tpg) 2168 (*pr)(" page found on object list\n"); 2169 else 2170 (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n"); 2171 } 2172 } 2173 } 2174 2175 /* cross-verify page queue */ 2176 if (pg->flags & PG_FREE) { 2177 int fl = uvm_page_get_freelist(pg); 2178 int b = uvm_page_get_bucket(pg); 2179 pgb = uvm.page_free[fl].pgfl_buckets[b]; 2180 pgl = &pgb->pgb_colors[VM_PGCOLOR(pg)]; 2181 (*pr)(" checking pageq list\n"); 2182 LIST_FOREACH(tpg, pgl, pageq.list) { 2183 if (tpg == pg) { 2184 break; 2185 } 2186 } 2187 if (tpg) 2188 (*pr)(" page found on pageq list\n"); 2189 else 2190 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 2191 } 2192} 2193 2194/* 2195 * uvm_page_printall - print a summary of all managed pages 2196 */ 2197 2198void 2199uvm_page_printall(void (*pr)(const char *, ...)) 2200{ 2201 uvm_physseg_t i; 2202 paddr_t pfn; 2203 struct vm_page *pg; 2204 2205 (*pr)("%18s %4s %4s %18s %18s" 2206#ifdef UVM_PAGE_TRKOWN 2207 " OWNER" 2208#endif 2209 "\n", "PAGE", "FLAG", "PQ", "UOBJECT", "UANON"); 2210 for (i = uvm_physseg_get_first(); 2211 uvm_physseg_valid_p(i); 2212 i = uvm_physseg_get_next(i)) { 2213 for (pfn = uvm_physseg_get_start(i); 2214 pfn < uvm_physseg_get_end(i); 2215 pfn++) { 2216 pg = PHYS_TO_VM_PAGE(ptoa(pfn)); 2217 2218 (*pr)("%18p %04x %08x %18p %18p", 2219 pg, pg->flags, pg->pqflags, pg->uobject, 2220 pg->uanon); 2221#ifdef UVM_PAGE_TRKOWN 2222 if (pg->flags & PG_BUSY) 2223 (*pr)(" %d [%s]", pg->owner, pg->owner_tag); 2224#endif 2225 (*pr)("\n"); 2226 } 2227 } 2228} 2229 2230/* 2231 * uvm_page_print_freelists - print a summary freelists 2232 */ 2233 2234void 2235uvm_page_print_freelists(void (*pr)(const char *, ...)) 2236{ 2237 struct pgfreelist *pgfl; 2238 struct pgflbucket *pgb; 2239 int fl, b, c; 2240 2241 (*pr)("There are %d freelists with %d buckets of %d colors.\n\n", 2242 VM_NFREELIST, uvm.bucketcount, uvmexp.ncolors); 2243 2244 for (fl = 0; fl < VM_NFREELIST; fl++) { 2245 pgfl = &uvm.page_free[fl]; 2246 (*pr)("freelist(%d) @ %p\n", fl, pgfl); 2247 for (b = 0; b < uvm.bucketcount; b++) { 2248 pgb = uvm.page_free[fl].pgfl_buckets[b]; 2249 (*pr)(" bucket(%d) @ %p, nfree = %d, lock @ %p:\n", 2250 b, pgb, pgb->pgb_nfree, 2251 &uvm_freelist_locks[b].lock); 2252 for (c = 0; c < uvmexp.ncolors; c++) { 2253 (*pr)(" color(%d) @ %p, ", c, 2254 &pgb->pgb_colors[c]); 2255 (*pr)("first page = %p\n", 2256 LIST_FIRST(&pgb->pgb_colors[c])); 2257 } 2258 } 2259 } 2260} 2261 2262#endif /* DDB || DEBUGPRINT */ 2263