uvm_page.c revision 1.69
1/* $OpenBSD: uvm_page.c,v 1.69 2009/03/24 16:29:42 oga Exp $ */ 2/* $NetBSD: uvm_page.c,v 1.44 2000/11/27 08:40:04 chs Exp $ */ 3 4/* 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * Copyright (c) 1991, 1993, The Regents of the University of California. 7 * 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * The Mach Operating System project at Carnegie-Mellon University. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by Charles D. Cranor, 24 * Washington University, the University of California, Berkeley and 25 * its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)vm_page.c 8.3 (Berkeley) 3/21/94 43 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp 44 * 45 * 46 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 47 * All rights reserved. 48 * 49 * Permission to use, copy, modify and distribute this software and 50 * its documentation is hereby granted, provided that both the copyright 51 * notice and this permission notice appear in all copies of the 52 * software, derivative works or modified versions, and any portions 53 * thereof, and that both notices appear in supporting documentation. 54 * 55 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 56 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 57 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 58 * 59 * Carnegie Mellon requests users of this software to return to 60 * 61 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 62 * School of Computer Science 63 * Carnegie Mellon University 64 * Pittsburgh PA 15213-3890 65 * 66 * any improvements or extensions that they make and grant Carnegie the 67 * rights to redistribute these changes. 68 */ 69 70/* 71 * uvm_page.c: page ops. 72 */ 73 74#define UVM_PAGE /* pull in uvm_page.h functions */ 75#include <sys/param.h> 76#include <sys/systm.h> 77#include <sys/malloc.h> 78#include <sys/sched.h> 79#include <sys/kernel.h> 80#include <sys/vnode.h> 81#include <sys/mount.h> 82 83#include <uvm/uvm.h> 84 85/* 86 * global vars... XXXCDC: move to uvm. structure. 87 */ 88 89/* 90 * physical memory config is stored in vm_physmem. 91 */ 92 93struct vm_physseg vm_physmem[VM_PHYSSEG_MAX]; /* XXXCDC: uvm.physmem */ 94int vm_nphysseg = 0; /* XXXCDC: uvm.nphysseg */ 95 96/* 97 * Some supported CPUs in a given architecture don't support all 98 * of the things necessary to do idle page zero'ing efficiently. 99 * We therefore provide a way to disable it from machdep code here. 100 */ 101 102/* 103 * XXX disabled until we can find a way to do this without causing 104 * problems for either cpu caches or DMA latency. 105 */ 106boolean_t vm_page_zero_enable = FALSE; 107 108/* 109 * local variables 110 */ 111 112/* 113 * these variables record the values returned by vm_page_bootstrap, 114 * for debugging purposes. The implementation of uvm_pageboot_alloc 115 * and pmap_startup here also uses them internally. 116 */ 117 118static vaddr_t virtual_space_start; 119static vaddr_t virtual_space_end; 120 121/* 122 * we use a hash table with only one bucket during bootup. we will 123 * later rehash (resize) the hash table once the allocator is ready. 124 * we static allocate the one bootstrap bucket below... 125 */ 126 127static struct pglist uvm_bootbucket; 128 129/* 130 * History 131 */ 132UVMHIST_DECL(pghist); 133 134/* 135 * local prototypes 136 */ 137 138static void uvm_pageinsert(struct vm_page *); 139static void uvm_pageremove(struct vm_page *); 140 141/* 142 * inline functions 143 */ 144 145/* 146 * uvm_pageinsert: insert a page in the object and the hash table 147 * 148 * => caller must lock object 149 * => caller must lock page queues 150 * => call should have already set pg's object and offset pointers 151 * and bumped the version counter 152 */ 153 154__inline static void 155uvm_pageinsert(struct vm_page *pg) 156{ 157 struct pglist *buck; 158 int s; 159 UVMHIST_FUNC("uvm_pageinsert"); UVMHIST_CALLED(pghist); 160 161 KASSERT((pg->pg_flags & PG_TABLED) == 0); 162 buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)]; 163 s = splvm(); 164 simple_lock(&uvm.hashlock); 165 TAILQ_INSERT_TAIL(buck, pg, hashq); /* put in hash */ 166 simple_unlock(&uvm.hashlock); 167 splx(s); 168 169 TAILQ_INSERT_TAIL(&pg->uobject->memq, pg, listq); /* put in object */ 170 atomic_setbits_int(&pg->pg_flags, PG_TABLED); 171 pg->uobject->uo_npages++; 172} 173 174/* 175 * uvm_page_remove: remove page from object and hash 176 * 177 * => caller must lock object 178 * => caller must lock page queues 179 */ 180 181static __inline void 182uvm_pageremove(struct vm_page *pg) 183{ 184 struct pglist *buck; 185 int s; 186 UVMHIST_FUNC("uvm_pageremove"); UVMHIST_CALLED(pghist); 187 188 KASSERT(pg->pg_flags & PG_TABLED); 189 buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)]; 190 s = splvm(); 191 simple_lock(&uvm.hashlock); 192 TAILQ_REMOVE(buck, pg, hashq); 193 simple_unlock(&uvm.hashlock); 194 splx(s); 195 196#ifdef UBC 197 if (pg->uobject->pgops == &uvm_vnodeops) { 198 uvm_pgcnt_vnode--; 199 } 200#endif 201 202 /* object should be locked */ 203 TAILQ_REMOVE(&pg->uobject->memq, pg, listq); 204 205 atomic_clearbits_int(&pg->pg_flags, PG_TABLED); 206 pg->uobject->uo_npages--; 207 pg->uobject = NULL; 208 pg->pg_version++; 209} 210 211/* 212 * uvm_page_init: init the page system. called from uvm_init(). 213 * 214 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp 215 */ 216 217void 218uvm_page_init(kvm_startp, kvm_endp) 219 vaddr_t *kvm_startp, *kvm_endp; 220{ 221 vsize_t freepages, pagecount, n; 222 vm_page_t pagearray; 223 int lcv, i; 224 paddr_t paddr; 225#if defined(UVMHIST) 226 static struct uvm_history_ent pghistbuf[100]; 227#endif 228 229 UVMHIST_FUNC("uvm_page_init"); 230 UVMHIST_INIT_STATIC(pghist, pghistbuf); 231 UVMHIST_CALLED(pghist); 232 233 /* 234 * init the page queues and page queue locks 235 */ 236 237 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 238 for (i = 0; i < PGFL_NQUEUES; i++) 239 TAILQ_INIT(&uvm.page_free[lcv].pgfl_queues[i]); 240 } 241 TAILQ_INIT(&uvm.page_active); 242 TAILQ_INIT(&uvm.page_inactive_swp); 243 TAILQ_INIT(&uvm.page_inactive_obj); 244 simple_lock_init(&uvm.pageqlock); 245 mtx_init(&uvm.fpageqlock, IPL_VM); 246 247 /* 248 * init the <obj,offset> => <page> hash table. for now 249 * we just have one bucket (the bootstrap bucket). later on we 250 * will allocate new buckets as we dynamically resize the hash table. 251 */ 252 253 uvm.page_nhash = 1; /* 1 bucket */ 254 uvm.page_hashmask = 0; /* mask for hash function */ 255 uvm.page_hash = &uvm_bootbucket; /* install bootstrap bucket */ 256 TAILQ_INIT(uvm.page_hash); /* init hash table */ 257 simple_lock_init(&uvm.hashlock); /* init hash table lock */ 258 259 /* 260 * allocate vm_page structures. 261 */ 262 263 /* 264 * sanity check: 265 * before calling this function the MD code is expected to register 266 * some free RAM with the uvm_page_physload() function. our job 267 * now is to allocate vm_page structures for this memory. 268 */ 269 270 if (vm_nphysseg == 0) 271 panic("uvm_page_bootstrap: no memory pre-allocated"); 272 273 /* 274 * first calculate the number of free pages... 275 * 276 * note that we use start/end rather than avail_start/avail_end. 277 * this allows us to allocate extra vm_page structures in case we 278 * want to return some memory to the pool after booting. 279 */ 280 281 freepages = 0; 282 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 283 freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start); 284 285 /* 286 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can 287 * use. for each page of memory we use we need a vm_page structure. 288 * thus, the total number of pages we can use is the total size of 289 * the memory divided by the PAGE_SIZE plus the size of the vm_page 290 * structure. we add one to freepages as a fudge factor to avoid 291 * truncation errors (since we can only allocate in terms of whole 292 * pages). 293 */ 294 295 pagecount = (((paddr_t)freepages + 1) << PAGE_SHIFT) / 296 (PAGE_SIZE + sizeof(struct vm_page)); 297 pagearray = (vm_page_t)uvm_pageboot_alloc(pagecount * 298 sizeof(struct vm_page)); 299 memset(pagearray, 0, pagecount * sizeof(struct vm_page)); 300 301 /* 302 * init the vm_page structures and put them in the correct place. 303 */ 304 305 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) { 306 n = vm_physmem[lcv].end - vm_physmem[lcv].start; 307 if (n > pagecount) { 308 printf("uvm_page_init: lost %ld page(s) in init\n", 309 (long)(n - pagecount)); 310 panic("uvm_page_init"); /* XXXCDC: shouldn't happen? */ 311 /* n = pagecount; */ 312 } 313 314 /* set up page array pointers */ 315 vm_physmem[lcv].pgs = pagearray; 316 pagearray += n; 317 pagecount -= n; 318 vm_physmem[lcv].lastpg = vm_physmem[lcv].pgs + (n - 1); 319 320 /* init and free vm_pages (we've already zeroed them) */ 321 paddr = ptoa(vm_physmem[lcv].start); 322 for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) { 323 vm_physmem[lcv].pgs[i].phys_addr = paddr; 324#ifdef __HAVE_VM_PAGE_MD 325 VM_MDPAGE_INIT(&vm_physmem[lcv].pgs[i]); 326#endif 327 if (atop(paddr) >= vm_physmem[lcv].avail_start && 328 atop(paddr) <= vm_physmem[lcv].avail_end) { 329 uvmexp.npages++; 330 /* add page to free pool */ 331 uvm_pagefree(&vm_physmem[lcv].pgs[i]); 332 } 333 } 334 } 335 336 /* 337 * pass up the values of virtual_space_start and 338 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper 339 * layers of the VM. 340 */ 341 342 *kvm_startp = round_page(virtual_space_start); 343 *kvm_endp = trunc_page(virtual_space_end); 344 345 /* 346 * init locks for kernel threads 347 */ 348 349 simple_lock_init(&uvm.pagedaemon_lock); 350 simple_lock_init(&uvm.aiodoned_lock); 351 352 /* 353 * init reserve thresholds 354 * XXXCDC - values may need adjusting 355 */ 356 uvmexp.reserve_pagedaemon = 4; 357 uvmexp.reserve_kernel = 6; 358 uvmexp.anonminpct = 10; 359 uvmexp.vnodeminpct = 10; 360 uvmexp.vtextminpct = 5; 361 uvmexp.anonmin = uvmexp.anonminpct * 256 / 100; 362 uvmexp.vnodemin = uvmexp.vnodeminpct * 256 / 100; 363 uvmexp.vtextmin = uvmexp.vtextminpct * 256 / 100; 364 365 /* 366 * determine if we should zero pages in the idle loop. 367 */ 368 369 uvm.page_idle_zero = vm_page_zero_enable; 370 371 /* 372 * done! 373 */ 374 375 uvm.page_init_done = TRUE; 376} 377 378/* 379 * uvm_setpagesize: set the page size 380 * 381 * => sets page_shift and page_mask from uvmexp.pagesize. 382 */ 383 384void 385uvm_setpagesize() 386{ 387 if (uvmexp.pagesize == 0) 388 uvmexp.pagesize = DEFAULT_PAGE_SIZE; 389 uvmexp.pagemask = uvmexp.pagesize - 1; 390 if ((uvmexp.pagemask & uvmexp.pagesize) != 0) 391 panic("uvm_setpagesize: page size not a power of two"); 392 for (uvmexp.pageshift = 0; ; uvmexp.pageshift++) 393 if ((1 << uvmexp.pageshift) == uvmexp.pagesize) 394 break; 395} 396 397/* 398 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping 399 */ 400 401vaddr_t 402uvm_pageboot_alloc(size) 403 vsize_t size; 404{ 405#if defined(PMAP_STEAL_MEMORY) 406 vaddr_t addr; 407 408 /* 409 * defer bootstrap allocation to MD code (it may want to allocate 410 * from a direct-mapped segment). pmap_steal_memory should round 411 * off virtual_space_start/virtual_space_end. 412 */ 413 414 addr = pmap_steal_memory(size, &virtual_space_start, 415 &virtual_space_end); 416 417 return(addr); 418 419#else /* !PMAP_STEAL_MEMORY */ 420 421 static boolean_t initialized = FALSE; 422 vaddr_t addr, vaddr; 423 paddr_t paddr; 424 425 /* round to page size */ 426 size = round_page(size); 427 428 /* 429 * on first call to this function, initialize ourselves. 430 */ 431 if (initialized == FALSE) { 432 pmap_virtual_space(&virtual_space_start, &virtual_space_end); 433 434 /* round it the way we like it */ 435 virtual_space_start = round_page(virtual_space_start); 436 virtual_space_end = trunc_page(virtual_space_end); 437 438 initialized = TRUE; 439 } 440 441 /* 442 * allocate virtual memory for this request 443 */ 444 if (virtual_space_start == virtual_space_end || 445 (virtual_space_end - virtual_space_start) < size) 446 panic("uvm_pageboot_alloc: out of virtual space"); 447 448 addr = virtual_space_start; 449 450#ifdef PMAP_GROWKERNEL 451 /* 452 * If the kernel pmap can't map the requested space, 453 * then allocate more resources for it. 454 */ 455 if (uvm_maxkaddr < (addr + size)) { 456 uvm_maxkaddr = pmap_growkernel(addr + size); 457 if (uvm_maxkaddr < (addr + size)) 458 panic("uvm_pageboot_alloc: pmap_growkernel() failed"); 459 } 460#endif 461 462 virtual_space_start += size; 463 464 /* 465 * allocate and mapin physical pages to back new virtual pages 466 */ 467 468 for (vaddr = round_page(addr) ; vaddr < addr + size ; 469 vaddr += PAGE_SIZE) { 470 471 if (!uvm_page_physget(&paddr)) 472 panic("uvm_pageboot_alloc: out of memory"); 473 474 /* 475 * Note this memory is no longer managed, so using 476 * pmap_kenter is safe. 477 */ 478 pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE); 479 } 480 pmap_update(pmap_kernel()); 481 return(addr); 482#endif /* PMAP_STEAL_MEMORY */ 483} 484 485#if !defined(PMAP_STEAL_MEMORY) 486/* 487 * uvm_page_physget: "steal" one page from the vm_physmem structure. 488 * 489 * => attempt to allocate it off the end of a segment in which the "avail" 490 * values match the start/end values. if we can't do that, then we 491 * will advance both values (making them equal, and removing some 492 * vm_page structures from the non-avail area). 493 * => return false if out of memory. 494 */ 495 496/* subroutine: try to allocate from memory chunks on the specified freelist */ 497static boolean_t uvm_page_physget_freelist(paddr_t *, int); 498 499static boolean_t 500uvm_page_physget_freelist(paddrp, freelist) 501 paddr_t *paddrp; 502 int freelist; 503{ 504 int lcv, x; 505 UVMHIST_FUNC("uvm_page_physget_freelist"); UVMHIST_CALLED(pghist); 506 507 /* pass 1: try allocating from a matching end */ 508#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) || \ 509 (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH) 510 for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--) 511#else 512 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 513#endif 514 { 515 516 if (uvm.page_init_done == TRUE) 517 panic("uvm_page_physget: called _after_ bootstrap"); 518 519 if (vm_physmem[lcv].free_list != freelist) 520 continue; 521 522 /* try from front */ 523 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].start && 524 vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) { 525 *paddrp = ptoa(vm_physmem[lcv].avail_start); 526 vm_physmem[lcv].avail_start++; 527 vm_physmem[lcv].start++; 528 /* nothing left? nuke it */ 529 if (vm_physmem[lcv].avail_start == 530 vm_physmem[lcv].end) { 531 if (vm_nphysseg == 1) 532 panic("uvm_page_physget: out of memory!"); 533 vm_nphysseg--; 534 for (x = lcv ; x < vm_nphysseg ; x++) 535 /* structure copy */ 536 vm_physmem[x] = vm_physmem[x+1]; 537 } 538 return (TRUE); 539 } 540 541 /* try from rear */ 542 if (vm_physmem[lcv].avail_end == vm_physmem[lcv].end && 543 vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) { 544 *paddrp = ptoa(vm_physmem[lcv].avail_end - 1); 545 vm_physmem[lcv].avail_end--; 546 vm_physmem[lcv].end--; 547 /* nothing left? nuke it */ 548 if (vm_physmem[lcv].avail_end == 549 vm_physmem[lcv].start) { 550 if (vm_nphysseg == 1) 551 panic("uvm_page_physget: out of memory!"); 552 vm_nphysseg--; 553 for (x = lcv ; x < vm_nphysseg ; x++) 554 /* structure copy */ 555 vm_physmem[x] = vm_physmem[x+1]; 556 } 557 return (TRUE); 558 } 559 } 560 561 /* pass2: forget about matching ends, just allocate something */ 562#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) || \ 563 (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH) 564 for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--) 565#else 566 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 567#endif 568 { 569 570 /* any room in this bank? */ 571 if (vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end) 572 continue; /* nope */ 573 574 *paddrp = ptoa(vm_physmem[lcv].avail_start); 575 vm_physmem[lcv].avail_start++; 576 /* truncate! */ 577 vm_physmem[lcv].start = vm_physmem[lcv].avail_start; 578 579 /* nothing left? nuke it */ 580 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) { 581 if (vm_nphysseg == 1) 582 panic("uvm_page_physget: out of memory!"); 583 vm_nphysseg--; 584 for (x = lcv ; x < vm_nphysseg ; x++) 585 /* structure copy */ 586 vm_physmem[x] = vm_physmem[x+1]; 587 } 588 return (TRUE); 589 } 590 591 return (FALSE); /* whoops! */ 592} 593 594boolean_t 595uvm_page_physget(paddrp) 596 paddr_t *paddrp; 597{ 598 int i; 599 UVMHIST_FUNC("uvm_page_physget"); UVMHIST_CALLED(pghist); 600 601 /* try in the order of freelist preference */ 602 for (i = 0; i < VM_NFREELIST; i++) 603 if (uvm_page_physget_freelist(paddrp, i) == TRUE) 604 return (TRUE); 605 return (FALSE); 606} 607#endif /* PMAP_STEAL_MEMORY */ 608 609/* 610 * uvm_page_physload: load physical memory into VM system 611 * 612 * => all args are PFs 613 * => all pages in start/end get vm_page structures 614 * => areas marked by avail_start/avail_end get added to the free page pool 615 * => we are limited to VM_PHYSSEG_MAX physical memory segments 616 */ 617 618void 619uvm_page_physload(start, end, avail_start, avail_end, free_list) 620 paddr_t start, end, avail_start, avail_end; 621 int free_list; 622{ 623 int preload, lcv; 624 psize_t npages; 625 struct vm_page *pgs; 626 struct vm_physseg *ps; 627 628 if (uvmexp.pagesize == 0) 629 panic("uvm_page_physload: page size not set!"); 630 631 if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT) 632 panic("uvm_page_physload: bad free list %d", free_list); 633 634 if (start >= end) 635 panic("uvm_page_physload: start >= end"); 636 637 /* 638 * do we have room? 639 */ 640 if (vm_nphysseg == VM_PHYSSEG_MAX) { 641 printf("uvm_page_physload: unable to load physical memory " 642 "segment\n"); 643 printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n", 644 VM_PHYSSEG_MAX, (long long)start, (long long)end); 645 printf("\tincrease VM_PHYSSEG_MAX\n"); 646 return; 647 } 648 649 /* 650 * check to see if this is a "preload" (i.e. uvm_mem_init hasn't been 651 * called yet, so malloc is not available). 652 */ 653 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) { 654 if (vm_physmem[lcv].pgs) 655 break; 656 } 657 preload = (lcv == vm_nphysseg); 658 659 /* 660 * if VM is already running, attempt to malloc() vm_page structures 661 */ 662 if (!preload) { 663#if defined(VM_PHYSSEG_NOADD) 664 panic("uvm_page_physload: tried to add RAM after vm_mem_init"); 665#else 666 /* XXXCDC: need some sort of lockout for this case */ 667 paddr_t paddr; 668 npages = end - start; /* # of pages */ 669 pgs = (vm_page *)uvm_km_alloc(kernel_map, 670 sizeof(struct vm_page) * npages); 671 if (pgs == NULL) { 672 printf("uvm_page_physload: can not malloc vm_page " 673 "structs for segment\n"); 674 printf("\tignoring 0x%lx -> 0x%lx\n", start, end); 675 return; 676 } 677 /* zero data, init phys_addr and free_list, and free pages */ 678 memset(pgs, 0, sizeof(struct vm_page) * npages); 679 for (lcv = 0, paddr = ptoa(start) ; 680 lcv < npages ; lcv++, paddr += PAGE_SIZE) { 681 pgs[lcv].phys_addr = paddr; 682 pgs[lcv].free_list = free_list; 683 if (atop(paddr) >= avail_start && 684 atop(paddr) <= avail_end) 685 uvm_pagefree(&pgs[lcv]); 686 } 687 /* XXXCDC: incomplete: need to update uvmexp.free, what else? */ 688 /* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */ 689#endif 690 } else { 691 692 /* gcc complains if these don't get init'd */ 693 pgs = NULL; 694 npages = 0; 695 696 } 697 698 /* 699 * now insert us in the proper place in vm_physmem[] 700 */ 701 702#if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM) 703 704 /* random: put it at the end (easy!) */ 705 ps = &vm_physmem[vm_nphysseg]; 706 707#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH) 708 709 { 710 int x; 711 /* sort by address for binary search */ 712 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 713 if (start < vm_physmem[lcv].start) 714 break; 715 ps = &vm_physmem[lcv]; 716 /* move back other entries, if necessary ... */ 717 for (x = vm_nphysseg ; x > lcv ; x--) 718 /* structure copy */ 719 vm_physmem[x] = vm_physmem[x - 1]; 720 } 721 722#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 723 724 { 725 int x; 726 /* sort by largest segment first */ 727 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 728 if ((end - start) > 729 (vm_physmem[lcv].end - vm_physmem[lcv].start)) 730 break; 731 ps = &vm_physmem[lcv]; 732 /* move back other entries, if necessary ... */ 733 for (x = vm_nphysseg ; x > lcv ; x--) 734 /* structure copy */ 735 vm_physmem[x] = vm_physmem[x - 1]; 736 } 737 738#else 739 740 panic("uvm_page_physload: unknown physseg strategy selected!"); 741 742#endif 743 744 ps->start = start; 745 ps->end = end; 746 ps->avail_start = avail_start; 747 ps->avail_end = avail_end; 748 if (preload) { 749 ps->pgs = NULL; 750 } else { 751 ps->pgs = pgs; 752 ps->lastpg = pgs + npages - 1; 753 } 754 ps->free_list = free_list; 755 vm_nphysseg++; 756 757 /* 758 * done! 759 */ 760 761 if (!preload) 762 uvm_page_rehash(); 763 764 return; 765} 766 767/* 768 * uvm_page_rehash: reallocate hash table based on number of free pages. 769 */ 770 771void 772uvm_page_rehash() 773{ 774 int freepages, lcv, bucketcount, s, oldcount; 775 struct pglist *newbuckets, *oldbuckets; 776 struct vm_page *pg; 777 size_t newsize, oldsize; 778 779 /* 780 * compute number of pages that can go in the free pool 781 */ 782 783 freepages = 0; 784 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 785 freepages += 786 (vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start); 787 788 /* 789 * compute number of buckets needed for this number of pages 790 */ 791 792 bucketcount = 1; 793 while (bucketcount < freepages) 794 bucketcount = bucketcount * 2; 795 796 /* 797 * compute the size of the current table and new table. 798 */ 799 800 oldbuckets = uvm.page_hash; 801 oldcount = uvm.page_nhash; 802 oldsize = round_page(sizeof(struct pglist) * oldcount); 803 newsize = round_page(sizeof(struct pglist) * bucketcount); 804 805 /* 806 * allocate the new buckets 807 */ 808 809 newbuckets = (struct pglist *) uvm_km_alloc(kernel_map, newsize); 810 if (newbuckets == NULL) { 811 printf("uvm_page_physrehash: WARNING: could not grow page " 812 "hash table\n"); 813 return; 814 } 815 for (lcv = 0 ; lcv < bucketcount ; lcv++) 816 TAILQ_INIT(&newbuckets[lcv]); 817 818 /* 819 * now replace the old buckets with the new ones and rehash everything 820 */ 821 822 s = splvm(); 823 simple_lock(&uvm.hashlock); 824 uvm.page_hash = newbuckets; 825 uvm.page_nhash = bucketcount; 826 uvm.page_hashmask = bucketcount - 1; /* power of 2 */ 827 828 /* ... and rehash */ 829 for (lcv = 0 ; lcv < oldcount ; lcv++) { 830 while ((pg = TAILQ_FIRST(&oldbuckets[lcv])) != NULL) { 831 TAILQ_REMOVE(&oldbuckets[lcv], pg, hashq); 832 TAILQ_INSERT_TAIL( 833 &uvm.page_hash[uvm_pagehash(pg->uobject, pg->offset)], 834 pg, hashq); 835 } 836 } 837 simple_unlock(&uvm.hashlock); 838 splx(s); 839 840 /* 841 * free old bucket array if is not the boot-time table 842 */ 843 844 if (oldbuckets != &uvm_bootbucket) 845 uvm_km_free(kernel_map, (vaddr_t) oldbuckets, oldsize); 846 847 /* 848 * done 849 */ 850 return; 851} 852 853 854#ifdef DDB /* XXXCDC: TMP TMP TMP DEBUG DEBUG DEBUG */ 855 856void uvm_page_physdump(void); /* SHUT UP GCC */ 857 858/* call from DDB */ 859void 860uvm_page_physdump() 861{ 862 int lcv; 863 864 printf("rehash: physical memory config [segs=%d of %d]:\n", 865 vm_nphysseg, VM_PHYSSEG_MAX); 866 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) 867 printf("0x%llx->0x%llx [0x%llx->0x%llx]\n", 868 (long long)vm_physmem[lcv].start, 869 (long long)vm_physmem[lcv].end, 870 (long long)vm_physmem[lcv].avail_start, 871 (long long)vm_physmem[lcv].avail_end); 872 printf("STRATEGY = "); 873 switch (VM_PHYSSEG_STRAT) { 874 case VM_PSTRAT_RANDOM: printf("RANDOM\n"); break; 875 case VM_PSTRAT_BSEARCH: printf("BSEARCH\n"); break; 876 case VM_PSTRAT_BIGFIRST: printf("BIGFIRST\n"); break; 877 default: printf("<<UNKNOWN>>!!!!\n"); 878 } 879 printf("number of buckets = %d\n", uvm.page_nhash); 880} 881#endif 882 883void 884uvm_shutdown(void) 885{ 886#ifdef UVM_SWAP_ENCRYPT 887 uvm_swap_finicrypt_all(); 888#endif 889} 890 891/* 892 * uvm_pagealloc_strat: allocate vm_page from a particular free list. 893 * 894 * => return null if no pages free 895 * => wake up pagedaemon if number of free pages drops below low water mark 896 * => if obj != NULL, obj must be locked (to put in hash) 897 * => if anon != NULL, anon must be locked (to put in anon) 898 * => only one of obj or anon can be non-null 899 * => caller must activate/deactivate page if it is not wired. 900 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL. 901 * => policy decision: it is more important to pull a page off of the 902 * appropriate priority free list than it is to get a zero'd or 903 * unknown contents page. This is because we live with the 904 * consequences of a bad free list decision for the entire 905 * lifetime of the page, e.g. if the page comes from memory that 906 * is slower to access. 907 */ 908 909struct vm_page * 910uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list) 911 struct uvm_object *obj; 912 voff_t off; 913 int flags; 914 struct vm_anon *anon; 915 int strat, free_list; 916{ 917 int lcv, try1, try2, zeroit = 0; 918 struct vm_page *pg; 919 struct pglist *freeq; 920 struct pgfreelist *pgfl; 921 boolean_t use_reserve; 922 UVMHIST_FUNC("uvm_pagealloc_strat"); UVMHIST_CALLED(pghist); 923 924 KASSERT(obj == NULL || anon == NULL); 925 KASSERT(off == trunc_page(off)); 926 927 uvm_lock_fpageq(); 928 929 /* 930 * check to see if we need to generate some free pages waking 931 * the pagedaemon. 932 */ 933 if ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freemin || 934 ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg && 935 uvmexp.inactive < uvmexp.inactarg)) 936 wakeup(&uvm.pagedaemon); 937 938 /* 939 * fail if any of these conditions is true: 940 * [1] there really are no free pages, or 941 * [2] only kernel "reserved" pages remain and 942 * the page isn't being allocated to a kernel object. 943 * [3] only pagedaemon "reserved" pages remain and 944 * the requestor isn't the pagedaemon. 945 */ 946 947 use_reserve = (flags & UVM_PGA_USERESERVE) || 948 (obj && UVM_OBJ_IS_KERN_OBJECT(obj)); 949 if ((uvmexp.free <= uvmexp.reserve_kernel && !use_reserve) || 950 (uvmexp.free <= uvmexp.reserve_pagedaemon && 951 !((curproc == uvm.pagedaemon_proc) || 952 (curproc == syncerproc)))) 953 goto fail; 954 955#if PGFL_NQUEUES != 2 956#error uvm_pagealloc_strat needs to be updated 957#endif 958 959 /* 960 * If we want a zero'd page, try the ZEROS queue first, otherwise 961 * we try the UNKNOWN queue first. 962 */ 963 if (flags & UVM_PGA_ZERO) { 964 try1 = PGFL_ZEROS; 965 try2 = PGFL_UNKNOWN; 966 } else { 967 try1 = PGFL_UNKNOWN; 968 try2 = PGFL_ZEROS; 969 } 970 971 UVMHIST_LOG(pghist, "obj=%p off=%lx anon=%p flags=%lx", 972 obj, (u_long)off, anon, flags); 973 UVMHIST_LOG(pghist, "strat=%ld free_list=%ld", strat, free_list, 0, 0); 974 again: 975 switch (strat) { 976 case UVM_PGA_STRAT_NORMAL: 977 /* Check all freelists in descending priority order. */ 978 for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 979 pgfl = &uvm.page_free[lcv]; 980 if ((pg = TAILQ_FIRST((freeq = 981 &pgfl->pgfl_queues[try1]))) != NULL || 982 (pg = TAILQ_FIRST((freeq = 983 &pgfl->pgfl_queues[try2]))) != NULL) 984 goto gotit; 985 } 986 987 /* No pages free! */ 988 goto fail; 989 990 case UVM_PGA_STRAT_ONLY: 991 case UVM_PGA_STRAT_FALLBACK: 992 /* Attempt to allocate from the specified free list. */ 993 KASSERT(free_list >= 0 && free_list < VM_NFREELIST); 994 pgfl = &uvm.page_free[free_list]; 995 if ((pg = TAILQ_FIRST((freeq = 996 &pgfl->pgfl_queues[try1]))) != NULL || 997 (pg = TAILQ_FIRST((freeq = 998 &pgfl->pgfl_queues[try2]))) != NULL) 999 goto gotit; 1000 1001 /* Fall back, if possible. */ 1002 if (strat == UVM_PGA_STRAT_FALLBACK) { 1003 strat = UVM_PGA_STRAT_NORMAL; 1004 goto again; 1005 } 1006 1007 /* No pages free! */ 1008 goto fail; 1009 1010 default: 1011 panic("uvm_pagealloc_strat: bad strat %d", strat); 1012 /* NOTREACHED */ 1013 } 1014 1015 gotit: 1016 TAILQ_REMOVE(freeq, pg, pageq); 1017 uvmexp.free--; 1018 1019 /* update zero'd page count */ 1020 if (pg->pg_flags & PG_ZERO) 1021 uvmexp.zeropages--; 1022 1023 /* 1024 * update allocation statistics and remember if we have to 1025 * zero the page 1026 */ 1027 if (flags & UVM_PGA_ZERO) { 1028 if (pg->pg_flags & PG_ZERO) { 1029 uvmexp.pga_zerohit++; 1030 zeroit = 0; 1031 } else { 1032 uvmexp.pga_zeromiss++; 1033 zeroit = 1; 1034 } 1035 } 1036 1037 uvm_unlock_fpageq(); /* unlock free page queue */ 1038 1039 pg->offset = off; 1040 pg->uobject = obj; 1041 pg->uanon = anon; 1042 pg->pg_flags = PG_BUSY|PG_CLEAN|PG_FAKE; 1043 pg->pg_version++; 1044 if (anon) { 1045 anon->an_page = pg; 1046 atomic_setbits_int(&pg->pg_flags, PQ_ANON); 1047#ifdef UBC 1048 uvm_pgcnt_anon++; 1049#endif 1050 } else { 1051 if (obj) 1052 uvm_pageinsert(pg); 1053 } 1054#if defined(UVM_PAGE_TRKOWN) 1055 pg->owner_tag = NULL; 1056#endif 1057 UVM_PAGE_OWN(pg, "new alloc"); 1058 1059 if (flags & UVM_PGA_ZERO) { 1060 /* 1061 * A zero'd page is not clean. If we got a page not already 1062 * zero'd, then we have to zero it ourselves. 1063 */ 1064 atomic_clearbits_int(&pg->pg_flags, PG_CLEAN); 1065 if (zeroit) 1066 pmap_zero_page(pg); 1067 } 1068 1069 UVMHIST_LOG(pghist, "allocated pg %p/%lx", pg, 1070 (u_long)VM_PAGE_TO_PHYS(pg), 0, 0); 1071 return(pg); 1072 1073 fail: 1074 uvm_unlock_fpageq(); 1075 UVMHIST_LOG(pghist, "failed!", 0, 0, 0, 0); 1076 return (NULL); 1077} 1078 1079/* 1080 * uvm_pagerealloc: reallocate a page from one object to another 1081 * 1082 * => both objects must be locked 1083 */ 1084 1085void 1086uvm_pagerealloc(pg, newobj, newoff) 1087 struct vm_page *pg; 1088 struct uvm_object *newobj; 1089 voff_t newoff; 1090{ 1091 1092 UVMHIST_FUNC("uvm_pagerealloc"); UVMHIST_CALLED(pghist); 1093 1094 /* 1095 * remove it from the old object 1096 */ 1097 1098 if (pg->uobject) { 1099 uvm_pageremove(pg); 1100 } 1101 1102 /* 1103 * put it in the new object 1104 */ 1105 1106 if (newobj) { 1107 pg->uobject = newobj; 1108 pg->offset = newoff; 1109 pg->pg_version++; 1110 uvm_pageinsert(pg); 1111 } 1112} 1113 1114 1115/* 1116 * uvm_pagefree: free page 1117 * 1118 * => erase page's identity (i.e. remove from hash/object) 1119 * => put page on free list 1120 * => caller must lock owning object (either anon or uvm_object) 1121 * => caller must lock page queues 1122 * => assumes all valid mappings of pg are gone 1123 */ 1124 1125void 1126uvm_pagefree(struct vm_page *pg) 1127{ 1128 int saved_loan_count = pg->loan_count; 1129 UVMHIST_FUNC("uvm_pagefree"); UVMHIST_CALLED(pghist); 1130 1131#ifdef DEBUG 1132 if (pg->uobject == (void *)0xdeadbeef && 1133 pg->uanon == (void *)0xdeadbeef) { 1134 panic("uvm_pagefree: freeing free page %p", pg); 1135 } 1136#endif 1137 1138 UVMHIST_LOG(pghist, "freeing pg %p/%lx", pg, 1139 (u_long)VM_PAGE_TO_PHYS(pg), 0, 0); 1140 1141 /* 1142 * if the page was an object page (and thus "TABLED"), remove it 1143 * from the object. 1144 */ 1145 1146 if (pg->pg_flags & PG_TABLED) { 1147 1148 /* 1149 * if the object page is on loan we are going to drop ownership. 1150 * it is possible that an anon will take over as owner for this 1151 * page later on. the anon will want a !PG_CLEAN page so that 1152 * it knows it needs to allocate swap if it wants to page the 1153 * page out. 1154 */ 1155 1156 /* in case an anon takes over */ 1157 if (saved_loan_count) 1158 atomic_clearbits_int(&pg->pg_flags, PG_CLEAN); 1159 uvm_pageremove(pg); 1160 1161 /* 1162 * if our page was on loan, then we just lost control over it 1163 * (in fact, if it was loaned to an anon, the anon may have 1164 * already taken over ownership of the page by now and thus 1165 * changed the loan_count [e.g. in uvmfault_anonget()]) we just 1166 * return (when the last loan is dropped, then the page can be 1167 * freed by whatever was holding the last loan). 1168 */ 1169 1170 if (saved_loan_count) 1171 return; 1172 } else if (saved_loan_count && pg->uanon) { 1173 /* 1174 * if our page is owned by an anon and is loaned out to the 1175 * kernel then we just want to drop ownership and return. 1176 * the kernel must free the page when all its loans clear ... 1177 * note that the kernel can't change the loan status of our 1178 * page as long as we are holding PQ lock. 1179 */ 1180 atomic_clearbits_int(&pg->pg_flags, PQ_ANON); 1181 pg->uanon->an_page = NULL; 1182 pg->uanon = NULL; 1183 return; 1184 } 1185 KASSERT(saved_loan_count == 0); 1186 1187 /* 1188 * now remove the page from the queues 1189 */ 1190 1191 if (pg->pg_flags & PQ_ACTIVE) { 1192 TAILQ_REMOVE(&uvm.page_active, pg, pageq); 1193 atomic_clearbits_int(&pg->pg_flags, PQ_ACTIVE); 1194 uvmexp.active--; 1195 } 1196 if (pg->pg_flags & PQ_INACTIVE) { 1197 if (pg->pg_flags & PQ_SWAPBACKED) 1198 TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq); 1199 else 1200 TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq); 1201 atomic_clearbits_int(&pg->pg_flags, PQ_INACTIVE); 1202 uvmexp.inactive--; 1203 } 1204 1205 /* 1206 * if the page was wired, unwire it now. 1207 */ 1208 1209 if (pg->wire_count) { 1210 pg->wire_count = 0; 1211 uvmexp.wired--; 1212 } 1213 if (pg->uanon) { 1214 pg->uanon->an_page = NULL; 1215#ifdef UBC 1216 uvm_pgcnt_anon--; 1217#endif 1218 } 1219 1220 /* 1221 * and put on free queue 1222 */ 1223 1224 atomic_clearbits_int(&pg->pg_flags, PG_ZERO); 1225 1226 uvm_lock_fpageq(); 1227 TAILQ_INSERT_TAIL(&uvm.page_free[ 1228 uvm_page_lookup_freelist(pg)].pgfl_queues[PGFL_UNKNOWN], pg, pageq); 1229 atomic_clearbits_int(&pg->pg_flags, PQ_MASK); 1230 atomic_setbits_int(&pg->pg_flags, PQ_FREE); 1231#ifdef DEBUG 1232 pg->uobject = (void *)0xdeadbeef; 1233 pg->offset = 0xdeadbeef; 1234 pg->uanon = (void *)0xdeadbeef; 1235#endif 1236 uvmexp.free++; 1237 1238 if (uvmexp.zeropages < UVM_PAGEZERO_TARGET) 1239 uvm.page_idle_zero = vm_page_zero_enable; 1240 1241 uvm_unlock_fpageq(); 1242} 1243 1244/* 1245 * uvm_page_unbusy: unbusy an array of pages. 1246 * 1247 * => pages must either all belong to the same object, or all belong to anons. 1248 * => if pages are object-owned, object must be locked. 1249 * => if pages are anon-owned, anons must be unlockd and have 0 refcount. 1250 */ 1251 1252void 1253uvm_page_unbusy(pgs, npgs) 1254 struct vm_page **pgs; 1255 int npgs; 1256{ 1257 struct vm_page *pg; 1258 struct uvm_object *uobj; 1259 int i; 1260 UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(pdhist); 1261 1262 for (i = 0; i < npgs; i++) { 1263 pg = pgs[i]; 1264 1265 if (pg == NULL || pg == PGO_DONTCARE) { 1266 continue; 1267 } 1268 if (pg->pg_flags & PG_WANTED) { 1269 wakeup(pg); 1270 } 1271 if (pg->pg_flags & PG_RELEASED) { 1272 UVMHIST_LOG(pdhist, "releasing pg %p", pg,0,0,0); 1273 uobj = pg->uobject; 1274 if (uobj != NULL) { 1275 uobj->pgops->pgo_releasepg(pg, NULL); 1276 } else { 1277 atomic_clearbits_int(&pg->pg_flags, PG_BUSY); 1278 UVM_PAGE_OWN(pg, NULL); 1279 uvm_anfree(pg->uanon); 1280 } 1281 } else { 1282 UVMHIST_LOG(pdhist, "unbusying pg %p", pg,0,0,0); 1283 atomic_clearbits_int(&pg->pg_flags, PG_WANTED|PG_BUSY); 1284 UVM_PAGE_OWN(pg, NULL); 1285 } 1286 } 1287} 1288 1289#if defined(UVM_PAGE_TRKOWN) 1290/* 1291 * uvm_page_own: set or release page ownership 1292 * 1293 * => this is a debugging function that keeps track of who sets PG_BUSY 1294 * and where they do it. it can be used to track down problems 1295 * such a process setting "PG_BUSY" and never releasing it. 1296 * => page's object [if any] must be locked 1297 * => if "tag" is NULL then we are releasing page ownership 1298 */ 1299void 1300uvm_page_own(pg, tag) 1301 struct vm_page *pg; 1302 char *tag; 1303{ 1304 /* gain ownership? */ 1305 if (tag) { 1306 if (pg->owner_tag) { 1307 printf("uvm_page_own: page %p already owned " 1308 "by proc %d [%s]\n", pg, 1309 pg->owner, pg->owner_tag); 1310 panic("uvm_page_own"); 1311 } 1312 pg->owner = (curproc) ? curproc->p_pid : (pid_t) -1; 1313 pg->owner_tag = tag; 1314 return; 1315 } 1316 1317 /* drop ownership */ 1318 if (pg->owner_tag == NULL) { 1319 printf("uvm_page_own: dropping ownership of an non-owned " 1320 "page (%p)\n", pg); 1321 panic("uvm_page_own"); 1322 } 1323 pg->owner_tag = NULL; 1324 return; 1325} 1326#endif 1327 1328/* 1329 * uvm_pageidlezero: zero free pages while the system is idle. 1330 * 1331 * => we do at least one iteration per call, if we are below the target. 1332 * => we loop until we either reach the target or whichqs indicates that 1333 * there is a process ready to run. 1334 */ 1335void 1336uvm_pageidlezero() 1337{ 1338 struct vm_page *pg; 1339 struct pgfreelist *pgfl; 1340 int free_list; 1341 UVMHIST_FUNC("uvm_pageidlezero"); UVMHIST_CALLED(pghist); 1342 1343 do { 1344 uvm_lock_fpageq(); 1345 1346 if (uvmexp.zeropages >= UVM_PAGEZERO_TARGET) { 1347 uvm.page_idle_zero = FALSE; 1348 uvm_unlock_fpageq(); 1349 return; 1350 } 1351 1352 for (free_list = 0; free_list < VM_NFREELIST; free_list++) { 1353 pgfl = &uvm.page_free[free_list]; 1354 if ((pg = TAILQ_FIRST(&pgfl->pgfl_queues[ 1355 PGFL_UNKNOWN])) != NULL) 1356 break; 1357 } 1358 1359 if (pg == NULL) { 1360 /* 1361 * No non-zero'd pages; don't bother trying again 1362 * until we know we have non-zero'd pages free. 1363 */ 1364 uvm.page_idle_zero = FALSE; 1365 uvm_unlock_fpageq(); 1366 return; 1367 } 1368 1369 TAILQ_REMOVE(&pgfl->pgfl_queues[PGFL_UNKNOWN], pg, pageq); 1370 uvmexp.free--; 1371 uvm_unlock_fpageq(); 1372 1373#ifdef PMAP_PAGEIDLEZERO 1374 if (PMAP_PAGEIDLEZERO(pg) == FALSE) { 1375 /* 1376 * The machine-dependent code detected some 1377 * reason for us to abort zeroing pages, 1378 * probably because there is a process now 1379 * ready to run. 1380 */ 1381 uvm_lock_fpageq(); 1382 TAILQ_INSERT_HEAD(&pgfl->pgfl_queues[PGFL_UNKNOWN], 1383 pg, pageq); 1384 uvmexp.free++; 1385 uvmexp.zeroaborts++; 1386 uvm_unlock_fpageq(); 1387 return; 1388 } 1389#else 1390 /* 1391 * XXX This will toast the cache unless the pmap_zero_page() 1392 * XXX implementation does uncached access. 1393 */ 1394 pmap_zero_page(pg); 1395#endif 1396 atomic_setbits_int(&pg->pg_flags, PG_ZERO); 1397 1398 uvm_lock_fpageq(); 1399 TAILQ_INSERT_HEAD(&pgfl->pgfl_queues[PGFL_ZEROS], pg, pageq); 1400 uvmexp.free++; 1401 uvmexp.zeropages++; 1402 uvm_unlock_fpageq(); 1403 } while (curcpu_is_idle()); 1404} 1405 1406/* 1407 * when VM_PHYSSEG_MAX is 1, we can simplify these functions 1408 */ 1409 1410/* 1411 * vm_physseg_find: find vm_physseg structure that belongs to a PA 1412 */ 1413int 1414vm_physseg_find(paddr_t pframe, int *offp) 1415{ 1416#if VM_PHYSSEG_MAX == 1 1417 1418 /* 'contig' case */ 1419 if (pframe >= vm_physmem[0].start && pframe < vm_physmem[0].end) { 1420 if (offp) 1421 *offp = pframe - vm_physmem[0].start; 1422 return(0); 1423 } 1424 return(-1); 1425 1426#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH) 1427 /* binary search for it */ 1428 int start, len, try; 1429 1430 /* 1431 * if try is too large (thus target is less than than try) we reduce 1432 * the length to trunc(len/2) [i.e. everything smaller than "try"] 1433 * 1434 * if the try is too small (thus target is greater than try) then 1435 * we set the new start to be (try + 1). this means we need to 1436 * reduce the length to (round(len/2) - 1). 1437 * 1438 * note "adjust" below which takes advantage of the fact that 1439 * (round(len/2) - 1) == trunc((len - 1) / 2) 1440 * for any value of len we may have 1441 */ 1442 1443 for (start = 0, len = vm_nphysseg ; len != 0 ; len = len / 2) { 1444 try = start + (len / 2); /* try in the middle */ 1445 1446 /* start past our try? */ 1447 if (pframe >= vm_physmem[try].start) { 1448 /* was try correct? */ 1449 if (pframe < vm_physmem[try].end) { 1450 if (offp) 1451 *offp = pframe - vm_physmem[try].start; 1452 return(try); /* got it */ 1453 } 1454 start = try + 1; /* next time, start here */ 1455 len--; /* "adjust" */ 1456 } else { 1457 /* 1458 * pframe before try, just reduce length of 1459 * region, done in "for" loop 1460 */ 1461 } 1462 } 1463 return(-1); 1464 1465#else 1466 /* linear search for it */ 1467 int lcv; 1468 1469 for (lcv = 0; lcv < vm_nphysseg; lcv++) { 1470 if (pframe >= vm_physmem[lcv].start && 1471 pframe < vm_physmem[lcv].end) { 1472 if (offp) 1473 *offp = pframe - vm_physmem[lcv].start; 1474 return(lcv); /* got it */ 1475 } 1476 } 1477 return(-1); 1478 1479#endif 1480} 1481 1482/* 1483 * PHYS_TO_VM_PAGE: find vm_page for a PA. used by MI code to get vm_pages 1484 * back from an I/O mapping (ugh!). used in some MD code as well. 1485 */ 1486struct vm_page * 1487PHYS_TO_VM_PAGE(paddr_t pa) 1488{ 1489 paddr_t pf = atop(pa); 1490 int off; 1491 int psi; 1492 1493 psi = vm_physseg_find(pf, &off); 1494 1495 return ((psi == -1) ? NULL : &vm_physmem[psi].pgs[off]); 1496} 1497 1498