1/*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2002-2006 Rice University 5 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 6 * All rights reserved. 7 * 8 * This software was developed for the FreeBSD Project by Alan L. Cox, 9 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 27 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 30 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34/* 35 * Physical memory system implementation 36 * 37 * Any external functions defined by this module are only to be used by the 38 * virtual memory system. 39 */ 40 41#include <sys/cdefs.h> 42__FBSDID("$FreeBSD$"); 43 44#include "opt_ddb.h" 45#include "opt_vm.h" 46 47#include <sys/param.h> 48#include <sys/systm.h> 49#include <sys/domainset.h> 50#include <sys/lock.h> 51#include <sys/kernel.h> 52#include <sys/malloc.h> 53#include <sys/mutex.h> 54#include <sys/proc.h> 55#include <sys/queue.h> 56#include <sys/rwlock.h> 57#include <sys/sbuf.h> 58#include <sys/sysctl.h> 59#include <sys/tree.h> 60#include <sys/vmmeter.h> 61#include <sys/seq.h> 62 63#include <ddb/ddb.h> 64 65#include <vm/vm.h> 66#include <vm/vm_param.h> 67#include <vm/vm_kern.h> 68#include <vm/vm_object.h> 69#include <vm/vm_page.h> 70#include <vm/vm_phys.h> 71#include <vm/vm_pagequeue.h> 72 73_Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, 74 "Too many physsegs."); 75 76#ifdef NUMA 77struct mem_affinity __read_mostly *mem_affinity; 78int __read_mostly *mem_locality; 79#endif 80 81int __read_mostly vm_ndomains = 1; 82domainset_t __read_mostly all_domains = DOMAINSET_T_INITIALIZER(0x1); 83 84struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX]; 85int __read_mostly vm_phys_nsegs; 86 87struct vm_phys_fictitious_seg; 88static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, 89 struct vm_phys_fictitious_seg *); 90 91RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = 92 RB_INITIALIZER(_vm_phys_fictitious_tree); 93 94struct vm_phys_fictitious_seg { 95 RB_ENTRY(vm_phys_fictitious_seg) node; 96 /* Memory region data */ 97 vm_paddr_t start; 98 vm_paddr_t end; 99 vm_page_t first_page; 100}; 101 102RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, 103 vm_phys_fictitious_cmp); 104 105static struct rwlock_padalign vm_phys_fictitious_reg_lock; 106MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); 107 108static struct vm_freelist __aligned(CACHE_LINE_SIZE) 109 vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL] 110 [VM_NFREEORDER_MAX]; 111 112static int __read_mostly vm_nfreelists; 113 114/* 115 * Provides the mapping from VM_FREELIST_* to free list indices (flind). 116 */ 117static int __read_mostly vm_freelist_to_flind[VM_NFREELIST]; 118 119CTASSERT(VM_FREELIST_DEFAULT == 0); 120 121#ifdef VM_FREELIST_DMA32 122#define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) 123#endif 124 125/* 126 * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about 127 * the ordering of the free list boundaries. 128 */ 129#if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) 130CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); 131#endif 132 133static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 134SYSCTL_OID(_vm, OID_AUTO, phys_free, 135 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 136 sysctl_vm_phys_free, "A", 137 "Phys Free Info"); 138 139static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 140SYSCTL_OID(_vm, OID_AUTO, phys_segs, 141 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 142 sysctl_vm_phys_segs, "A", 143 "Phys Seg Info"); 144 145#ifdef NUMA 146static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS); 147SYSCTL_OID(_vm, OID_AUTO, phys_locality, 148 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 149 sysctl_vm_phys_locality, "A", 150 "Phys Locality Info"); 151#endif 152 153SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 154 &vm_ndomains, 0, "Number of physical memory domains available."); 155 156static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, 157 u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, 158 vm_paddr_t boundary); 159static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); 160static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); 161static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 162 int order, int tail); 163 164/* 165 * Red-black tree helpers for vm fictitious range management. 166 */ 167static inline int 168vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, 169 struct vm_phys_fictitious_seg *range) 170{ 171 172 KASSERT(range->start != 0 && range->end != 0, 173 ("Invalid range passed on search for vm_fictitious page")); 174 if (p->start >= range->end) 175 return (1); 176 if (p->start < range->start) 177 return (-1); 178 179 return (0); 180} 181 182static int 183vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, 184 struct vm_phys_fictitious_seg *p2) 185{ 186 187 /* Check if this is a search for a page */ 188 if (p1->end == 0) 189 return (vm_phys_fictitious_in_range(p1, p2)); 190 191 KASSERT(p2->end != 0, 192 ("Invalid range passed as second parameter to vm fictitious comparison")); 193 194 /* Searching to add a new range */ 195 if (p1->end <= p2->start) 196 return (-1); 197 if (p1->start >= p2->end) 198 return (1); 199 200 panic("Trying to add overlapping vm fictitious ranges:\n" 201 "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, 202 (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); 203} 204 205int 206vm_phys_domain_match(int prefer, vm_paddr_t low, vm_paddr_t high) 207{ 208#ifdef NUMA 209 domainset_t mask; 210 int i; 211 212 if (vm_ndomains == 1 || mem_affinity == NULL) 213 return (0); 214 215 DOMAINSET_ZERO(&mask); 216 /* 217 * Check for any memory that overlaps low, high. 218 */ 219 for (i = 0; mem_affinity[i].end != 0; i++) 220 if (mem_affinity[i].start <= high && 221 mem_affinity[i].end >= low) 222 DOMAINSET_SET(mem_affinity[i].domain, &mask); 223 if (prefer != -1 && DOMAINSET_ISSET(prefer, &mask)) 224 return (prefer); 225 if (DOMAINSET_EMPTY(&mask)) 226 panic("vm_phys_domain_match: Impossible constraint"); 227 return (DOMAINSET_FFS(&mask) - 1); 228#else 229 return (0); 230#endif 231} 232 233/* 234 * Outputs the state of the physical memory allocator, specifically, 235 * the amount of physical memory in each free list. 236 */ 237static int 238sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 239{ 240 struct sbuf sbuf; 241 struct vm_freelist *fl; 242 int dom, error, flind, oind, pind; 243 244 error = sysctl_wire_old_buffer(req, 0); 245 if (error != 0) 246 return (error); 247 sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); 248 for (dom = 0; dom < vm_ndomains; dom++) { 249 sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); 250 for (flind = 0; flind < vm_nfreelists; flind++) { 251 sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 252 "\n ORDER (SIZE) | NUMBER" 253 "\n ", flind); 254 for (pind = 0; pind < VM_NFREEPOOL; pind++) 255 sbuf_printf(&sbuf, " | POOL %d", pind); 256 sbuf_printf(&sbuf, "\n-- "); 257 for (pind = 0; pind < VM_NFREEPOOL; pind++) 258 sbuf_printf(&sbuf, "-- -- "); 259 sbuf_printf(&sbuf, "--\n"); 260 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 261 sbuf_printf(&sbuf, " %2d (%6dK)", oind, 262 1 << (PAGE_SHIFT - 10 + oind)); 263 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 264 fl = vm_phys_free_queues[dom][flind][pind]; 265 sbuf_printf(&sbuf, " | %6d", 266 fl[oind].lcnt); 267 } 268 sbuf_printf(&sbuf, "\n"); 269 } 270 } 271 } 272 error = sbuf_finish(&sbuf); 273 sbuf_delete(&sbuf); 274 return (error); 275} 276 277/* 278 * Outputs the set of physical memory segments. 279 */ 280static int 281sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 282{ 283 struct sbuf sbuf; 284 struct vm_phys_seg *seg; 285 int error, segind; 286 287 error = sysctl_wire_old_buffer(req, 0); 288 if (error != 0) 289 return (error); 290 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 291 for (segind = 0; segind < vm_phys_nsegs; segind++) { 292 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 293 seg = &vm_phys_segs[segind]; 294 sbuf_printf(&sbuf, "start: %#jx\n", 295 (uintmax_t)seg->start); 296 sbuf_printf(&sbuf, "end: %#jx\n", 297 (uintmax_t)seg->end); 298 sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 299 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 300 } 301 error = sbuf_finish(&sbuf); 302 sbuf_delete(&sbuf); 303 return (error); 304} 305 306/* 307 * Return affinity, or -1 if there's no affinity information. 308 */ 309int 310vm_phys_mem_affinity(int f, int t) 311{ 312 313#ifdef NUMA 314 if (mem_locality == NULL) 315 return (-1); 316 if (f >= vm_ndomains || t >= vm_ndomains) 317 return (-1); 318 return (mem_locality[f * vm_ndomains + t]); 319#else 320 return (-1); 321#endif 322} 323 324#ifdef NUMA 325/* 326 * Outputs the VM locality table. 327 */ 328static int 329sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS) 330{ 331 struct sbuf sbuf; 332 int error, i, j; 333 334 error = sysctl_wire_old_buffer(req, 0); 335 if (error != 0) 336 return (error); 337 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 338 339 sbuf_printf(&sbuf, "\n"); 340 341 for (i = 0; i < vm_ndomains; i++) { 342 sbuf_printf(&sbuf, "%d: ", i); 343 for (j = 0; j < vm_ndomains; j++) { 344 sbuf_printf(&sbuf, "%d ", vm_phys_mem_affinity(i, j)); 345 } 346 sbuf_printf(&sbuf, "\n"); 347 } 348 error = sbuf_finish(&sbuf); 349 sbuf_delete(&sbuf); 350 return (error); 351} 352#endif 353 354static void 355vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) 356{ 357 358 m->order = order; 359 if (tail) 360 TAILQ_INSERT_TAIL(&fl[order].pl, m, listq); 361 else 362 TAILQ_INSERT_HEAD(&fl[order].pl, m, listq); 363 fl[order].lcnt++; 364} 365 366static void 367vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) 368{ 369 370 TAILQ_REMOVE(&fl[order].pl, m, listq); 371 fl[order].lcnt--; 372 m->order = VM_NFREEORDER; 373} 374 375/* 376 * Create a physical memory segment. 377 */ 378static void 379_vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain) 380{ 381 struct vm_phys_seg *seg; 382 383 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 384 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 385 KASSERT(domain >= 0 && domain < vm_ndomains, 386 ("vm_phys_create_seg: invalid domain provided")); 387 seg = &vm_phys_segs[vm_phys_nsegs++]; 388 while (seg > vm_phys_segs && (seg - 1)->start >= end) { 389 *seg = *(seg - 1); 390 seg--; 391 } 392 seg->start = start; 393 seg->end = end; 394 seg->domain = domain; 395} 396 397static void 398vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end) 399{ 400#ifdef NUMA 401 int i; 402 403 if (mem_affinity == NULL) { 404 _vm_phys_create_seg(start, end, 0); 405 return; 406 } 407 408 for (i = 0;; i++) { 409 if (mem_affinity[i].end == 0) 410 panic("Reached end of affinity info"); 411 if (mem_affinity[i].end <= start) 412 continue; 413 if (mem_affinity[i].start > start) 414 panic("No affinity info for start %jx", 415 (uintmax_t)start); 416 if (mem_affinity[i].end >= end) { 417 _vm_phys_create_seg(start, end, 418 mem_affinity[i].domain); 419 break; 420 } 421 _vm_phys_create_seg(start, mem_affinity[i].end, 422 mem_affinity[i].domain); 423 start = mem_affinity[i].end; 424 } 425#else 426 _vm_phys_create_seg(start, end, 0); 427#endif 428} 429 430/* 431 * Add a physical memory segment. 432 */ 433void 434vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) 435{ 436 vm_paddr_t paddr; 437 438 KASSERT((start & PAGE_MASK) == 0, 439 ("vm_phys_define_seg: start is not page aligned")); 440 KASSERT((end & PAGE_MASK) == 0, 441 ("vm_phys_define_seg: end is not page aligned")); 442 443 /* 444 * Split the physical memory segment if it spans two or more free 445 * list boundaries. 446 */ 447 paddr = start; 448#ifdef VM_FREELIST_LOWMEM 449 if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { 450 vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); 451 paddr = VM_LOWMEM_BOUNDARY; 452 } 453#endif 454#ifdef VM_FREELIST_DMA32 455 if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) { 456 vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY); 457 paddr = VM_DMA32_BOUNDARY; 458 } 459#endif 460 vm_phys_create_seg(paddr, end); 461} 462 463/* 464 * Initialize the physical memory allocator. 465 * 466 * Requires that vm_page_array is initialized! 467 */ 468void 469vm_phys_init(void) 470{ 471 struct vm_freelist *fl; 472 struct vm_phys_seg *end_seg, *prev_seg, *seg, *tmp_seg; 473 u_long npages; 474 int dom, flind, freelist, oind, pind, segind; 475 476 /* 477 * Compute the number of free lists, and generate the mapping from the 478 * manifest constants VM_FREELIST_* to the free list indices. 479 * 480 * Initially, the entries of vm_freelist_to_flind[] are set to either 481 * 0 or 1 to indicate which free lists should be created. 482 */ 483 npages = 0; 484 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 485 seg = &vm_phys_segs[segind]; 486#ifdef VM_FREELIST_LOWMEM 487 if (seg->end <= VM_LOWMEM_BOUNDARY) 488 vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; 489 else 490#endif 491#ifdef VM_FREELIST_DMA32 492 if ( 493#ifdef VM_DMA32_NPAGES_THRESHOLD 494 /* 495 * Create the DMA32 free list only if the amount of 496 * physical memory above physical address 4G exceeds the 497 * given threshold. 498 */ 499 npages > VM_DMA32_NPAGES_THRESHOLD && 500#endif 501 seg->end <= VM_DMA32_BOUNDARY) 502 vm_freelist_to_flind[VM_FREELIST_DMA32] = 1; 503 else 504#endif 505 { 506 npages += atop(seg->end - seg->start); 507 vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1; 508 } 509 } 510 /* Change each entry into a running total of the free lists. */ 511 for (freelist = 1; freelist < VM_NFREELIST; freelist++) { 512 vm_freelist_to_flind[freelist] += 513 vm_freelist_to_flind[freelist - 1]; 514 } 515 vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1]; 516 KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists")); 517 /* Change each entry into a free list index. */ 518 for (freelist = 0; freelist < VM_NFREELIST; freelist++) 519 vm_freelist_to_flind[freelist]--; 520 521 /* 522 * Initialize the first_page and free_queues fields of each physical 523 * memory segment. 524 */ 525#ifdef VM_PHYSSEG_SPARSE 526 npages = 0; 527#endif 528 for (segind = 0; segind < vm_phys_nsegs; segind++) { 529 seg = &vm_phys_segs[segind]; 530#ifdef VM_PHYSSEG_SPARSE 531 seg->first_page = &vm_page_array[npages]; 532 npages += atop(seg->end - seg->start); 533#else 534 seg->first_page = PHYS_TO_VM_PAGE(seg->start); 535#endif 536#ifdef VM_FREELIST_LOWMEM 537 if (seg->end <= VM_LOWMEM_BOUNDARY) { 538 flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; 539 KASSERT(flind >= 0, 540 ("vm_phys_init: LOWMEM flind < 0")); 541 } else 542#endif 543#ifdef VM_FREELIST_DMA32 544 if (seg->end <= VM_DMA32_BOUNDARY) { 545 flind = vm_freelist_to_flind[VM_FREELIST_DMA32]; 546 KASSERT(flind >= 0, 547 ("vm_phys_init: DMA32 flind < 0")); 548 } else 549#endif 550 { 551 flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT]; 552 KASSERT(flind >= 0, 553 ("vm_phys_init: DEFAULT flind < 0")); 554 } 555 seg->free_queues = &vm_phys_free_queues[seg->domain][flind]; 556 } 557 558 /* 559 * Coalesce physical memory segments that are contiguous and share the 560 * same per-domain free queues. 561 */ 562 prev_seg = vm_phys_segs; 563 seg = &vm_phys_segs[1]; 564 end_seg = &vm_phys_segs[vm_phys_nsegs]; 565 while (seg < end_seg) { 566 if (prev_seg->end == seg->start && 567 prev_seg->free_queues == seg->free_queues) { 568 prev_seg->end = seg->end; 569 KASSERT(prev_seg->domain == seg->domain, 570 ("vm_phys_init: free queues cannot span domains")); 571 vm_phys_nsegs--; 572 end_seg--; 573 for (tmp_seg = seg; tmp_seg < end_seg; tmp_seg++) 574 *tmp_seg = *(tmp_seg + 1); 575 } else { 576 prev_seg = seg; 577 seg++; 578 } 579 } 580 581 /* 582 * Initialize the free queues. 583 */ 584 for (dom = 0; dom < vm_ndomains; dom++) { 585 for (flind = 0; flind < vm_nfreelists; flind++) { 586 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 587 fl = vm_phys_free_queues[dom][flind][pind]; 588 for (oind = 0; oind < VM_NFREEORDER; oind++) 589 TAILQ_INIT(&fl[oind].pl); 590 } 591 } 592 } 593 594 rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); 595} 596 597/* 598 * Register info about the NUMA topology of the system. 599 * 600 * Invoked by platform-dependent code prior to vm_phys_init(). 601 */ 602void 603vm_phys_register_domains(int ndomains, struct mem_affinity *affinity, 604 int *locality) 605{ 606#ifdef NUMA 607 int d, i; 608 609 /* 610 * For now the only override value that we support is 1, which 611 * effectively disables NUMA-awareness in the allocators. 612 */ 613 d = 0; 614 TUNABLE_INT_FETCH("vm.numa.disabled", &d); 615 if (d) 616 ndomains = 1; 617 618 if (ndomains > 1) { 619 vm_ndomains = ndomains; 620 mem_affinity = affinity; 621 mem_locality = locality; 622 } 623 624 for (i = 0; i < vm_ndomains; i++) 625 DOMAINSET_SET(i, &all_domains); 626#else 627 (void)ndomains; 628 (void)affinity; 629 (void)locality; 630#endif 631} 632 633int 634_vm_phys_domain(vm_paddr_t pa) 635{ 636#ifdef NUMA 637 int i; 638 639 if (vm_ndomains == 1 || mem_affinity == NULL) 640 return (0); 641 642 /* 643 * Check for any memory that overlaps. 644 */ 645 for (i = 0; mem_affinity[i].end != 0; i++) 646 if (mem_affinity[i].start <= pa && 647 mem_affinity[i].end >= pa) 648 return (mem_affinity[i].domain); 649#endif 650 return (0); 651} 652 653/* 654 * Split a contiguous, power of two-sized set of physical pages. 655 * 656 * When this function is called by a page allocation function, the caller 657 * should request insertion at the head unless the order [order, oind) queues 658 * are known to be empty. The objective being to reduce the likelihood of 659 * long-term fragmentation by promoting contemporaneous allocation and 660 * (hopefully) deallocation. 661 */ 662static __inline void 663vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order, 664 int tail) 665{ 666 vm_page_t m_buddy; 667 668 while (oind > order) { 669 oind--; 670 m_buddy = &m[1 << oind]; 671 KASSERT(m_buddy->order == VM_NFREEORDER, 672 ("vm_phys_split_pages: page %p has unexpected order %d", 673 m_buddy, m_buddy->order)); 674 vm_freelist_add(fl, m_buddy, oind, tail); 675 } 676} 677 678/* 679 * Add the physical pages [m, m + npages) at the end of a power-of-two aligned 680 * and sized set to the specified free list. 681 * 682 * When this function is called by a page allocation function, the caller 683 * should request insertion at the head unless the lower-order queues are 684 * known to be empty. The objective being to reduce the likelihood of long- 685 * term fragmentation by promoting contemporaneous allocation and (hopefully) 686 * deallocation. 687 * 688 * The physical page m's buddy must not be free. 689 */ 690static void 691vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail) 692{ 693 u_int n; 694 int order; 695 696 KASSERT(npages > 0, ("vm_phys_enq_range: npages is 0")); 697 KASSERT(((VM_PAGE_TO_PHYS(m) + npages * PAGE_SIZE) & 698 ((PAGE_SIZE << (fls(npages) - 1)) - 1)) == 0, 699 ("vm_phys_enq_range: page %p and npages %u are misaligned", 700 m, npages)); 701 do { 702 KASSERT(m->order == VM_NFREEORDER, 703 ("vm_phys_enq_range: page %p has unexpected order %d", 704 m, m->order)); 705 order = ffs(npages) - 1; 706 KASSERT(order < VM_NFREEORDER, 707 ("vm_phys_enq_range: order %d is out of range", order)); 708 vm_freelist_add(fl, m, order, tail); 709 n = 1 << order; 710 m += n; 711 npages -= n; 712 } while (npages > 0); 713} 714 715/* 716 * Tries to allocate the specified number of pages from the specified pool 717 * within the specified domain. Returns the actual number of allocated pages 718 * and a pointer to each page through the array ma[]. 719 * 720 * The returned pages may not be physically contiguous. However, in contrast 721 * to performing multiple, back-to-back calls to vm_phys_alloc_pages(..., 0), 722 * calling this function once to allocate the desired number of pages will 723 * avoid wasted time in vm_phys_split_pages(). 724 * 725 * The free page queues for the specified domain must be locked. 726 */ 727int 728vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]) 729{ 730 struct vm_freelist *alt, *fl; 731 vm_page_t m; 732 int avail, end, flind, freelist, i, need, oind, pind; 733 734 KASSERT(domain >= 0 && domain < vm_ndomains, 735 ("vm_phys_alloc_npages: domain %d is out of range", domain)); 736 KASSERT(pool < VM_NFREEPOOL, 737 ("vm_phys_alloc_npages: pool %d is out of range", pool)); 738 KASSERT(npages <= 1 << (VM_NFREEORDER - 1), 739 ("vm_phys_alloc_npages: npages %d is out of range", npages)); 740 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 741 i = 0; 742 for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 743 flind = vm_freelist_to_flind[freelist]; 744 if (flind < 0) 745 continue; 746 fl = vm_phys_free_queues[domain][flind][pool]; 747 for (oind = 0; oind < VM_NFREEORDER; oind++) { 748 while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) { 749 vm_freelist_rem(fl, m, oind); 750 avail = 1 << oind; 751 need = imin(npages - i, avail); 752 for (end = i + need; i < end;) 753 ma[i++] = m++; 754 if (need < avail) { 755 /* 756 * Return excess pages to fl. Its 757 * order [0, oind) queues are empty. 758 */ 759 vm_phys_enq_range(m, avail - need, fl, 760 1); 761 return (npages); 762 } else if (i == npages) 763 return (npages); 764 } 765 } 766 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 767 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 768 alt = vm_phys_free_queues[domain][flind][pind]; 769 while ((m = TAILQ_FIRST(&alt[oind].pl)) != 770 NULL) { 771 vm_freelist_rem(alt, m, oind); 772 vm_phys_set_pool(pool, m, oind); 773 avail = 1 << oind; 774 need = imin(npages - i, avail); 775 for (end = i + need; i < end;) 776 ma[i++] = m++; 777 if (need < avail) { 778 /* 779 * Return excess pages to fl. 780 * Its order [0, oind) queues 781 * are empty. 782 */ 783 vm_phys_enq_range(m, avail - 784 need, fl, 1); 785 return (npages); 786 } else if (i == npages) 787 return (npages); 788 } 789 } 790 } 791 } 792 return (i); 793} 794 795/* 796 * Allocate a contiguous, power of two-sized set of physical pages 797 * from the free lists. 798 * 799 * The free page queues must be locked. 800 */ 801vm_page_t 802vm_phys_alloc_pages(int domain, int pool, int order) 803{ 804 vm_page_t m; 805 int freelist; 806 807 for (freelist = 0; freelist < VM_NFREELIST; freelist++) { 808 m = vm_phys_alloc_freelist_pages(domain, freelist, pool, order); 809 if (m != NULL) 810 return (m); 811 } 812 return (NULL); 813} 814 815/* 816 * Allocate a contiguous, power of two-sized set of physical pages from the 817 * specified free list. The free list must be specified using one of the 818 * manifest constants VM_FREELIST_*. 819 * 820 * The free page queues must be locked. 821 */ 822vm_page_t 823vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order) 824{ 825 struct vm_freelist *alt, *fl; 826 vm_page_t m; 827 int oind, pind, flind; 828 829 KASSERT(domain >= 0 && domain < vm_ndomains, 830 ("vm_phys_alloc_freelist_pages: domain %d is out of range", 831 domain)); 832 KASSERT(freelist < VM_NFREELIST, 833 ("vm_phys_alloc_freelist_pages: freelist %d is out of range", 834 freelist)); 835 KASSERT(pool < VM_NFREEPOOL, 836 ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 837 KASSERT(order < VM_NFREEORDER, 838 ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 839 840 flind = vm_freelist_to_flind[freelist]; 841 /* Check if freelist is present */ 842 if (flind < 0) 843 return (NULL); 844 845 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 846 fl = &vm_phys_free_queues[domain][flind][pool][0]; 847 for (oind = order; oind < VM_NFREEORDER; oind++) { 848 m = TAILQ_FIRST(&fl[oind].pl); 849 if (m != NULL) { 850 vm_freelist_rem(fl, m, oind); 851 /* The order [order, oind) queues are empty. */ 852 vm_phys_split_pages(m, oind, fl, order, 1); 853 return (m); 854 } 855 } 856 857 /* 858 * The given pool was empty. Find the largest 859 * contiguous, power-of-two-sized set of pages in any 860 * pool. Transfer these pages to the given pool, and 861 * use them to satisfy the allocation. 862 */ 863 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 864 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 865 alt = &vm_phys_free_queues[domain][flind][pind][0]; 866 m = TAILQ_FIRST(&alt[oind].pl); 867 if (m != NULL) { 868 vm_freelist_rem(alt, m, oind); 869 vm_phys_set_pool(pool, m, oind); 870 /* The order [order, oind) queues are empty. */ 871 vm_phys_split_pages(m, oind, fl, order, 1); 872 return (m); 873 } 874 } 875 } 876 return (NULL); 877} 878 879/* 880 * Find the vm_page corresponding to the given physical address. 881 */ 882vm_page_t 883vm_phys_paddr_to_vm_page(vm_paddr_t pa) 884{ 885 struct vm_phys_seg *seg; 886 int segind; 887 888 for (segind = 0; segind < vm_phys_nsegs; segind++) { 889 seg = &vm_phys_segs[segind]; 890 if (pa >= seg->start && pa < seg->end) 891 return (&seg->first_page[atop(pa - seg->start)]); 892 } 893 return (NULL); 894} 895 896vm_page_t 897vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 898{ 899 struct vm_phys_fictitious_seg tmp, *seg; 900 vm_page_t m; 901 902 m = NULL; 903 tmp.start = pa; 904 tmp.end = 0; 905 906 rw_rlock(&vm_phys_fictitious_reg_lock); 907 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 908 rw_runlock(&vm_phys_fictitious_reg_lock); 909 if (seg == NULL) 910 return (NULL); 911 912 m = &seg->first_page[atop(pa - seg->start)]; 913 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); 914 915 return (m); 916} 917 918static inline void 919vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, 920 long page_count, vm_memattr_t memattr) 921{ 922 long i; 923 924 bzero(range, page_count * sizeof(*range)); 925 for (i = 0; i < page_count; i++) { 926 vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); 927 range[i].oflags &= ~VPO_UNMANAGED; 928 range[i].busy_lock = VPB_UNBUSIED; 929 } 930} 931 932int 933vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 934 vm_memattr_t memattr) 935{ 936 struct vm_phys_fictitious_seg *seg; 937 vm_page_t fp; 938 long page_count; 939#ifdef VM_PHYSSEG_DENSE 940 long pi, pe; 941 long dpage_count; 942#endif 943 944 KASSERT(start < end, 945 ("Start of segment isn't less than end (start: %jx end: %jx)", 946 (uintmax_t)start, (uintmax_t)end)); 947 948 page_count = (end - start) / PAGE_SIZE; 949 950#ifdef VM_PHYSSEG_DENSE 951 pi = atop(start); 952 pe = atop(end); 953 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 954 fp = &vm_page_array[pi - first_page]; 955 if ((pe - first_page) > vm_page_array_size) { 956 /* 957 * We have a segment that starts inside 958 * of vm_page_array, but ends outside of it. 959 * 960 * Use vm_page_array pages for those that are 961 * inside of the vm_page_array range, and 962 * allocate the remaining ones. 963 */ 964 dpage_count = vm_page_array_size - (pi - first_page); 965 vm_phys_fictitious_init_range(fp, start, dpage_count, 966 memattr); 967 page_count -= dpage_count; 968 start += ptoa(dpage_count); 969 goto alloc; 970 } 971 /* 972 * We can allocate the full range from vm_page_array, 973 * so there's no need to register the range in the tree. 974 */ 975 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 976 return (0); 977 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 978 /* 979 * We have a segment that ends inside of vm_page_array, 980 * but starts outside of it. 981 */ 982 fp = &vm_page_array[0]; 983 dpage_count = pe - first_page; 984 vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, 985 memattr); 986 end -= ptoa(dpage_count); 987 page_count -= dpage_count; 988 goto alloc; 989 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 990 /* 991 * Trying to register a fictitious range that expands before 992 * and after vm_page_array. 993 */ 994 return (EINVAL); 995 } else { 996alloc: 997#endif 998 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 999 M_WAITOK); 1000#ifdef VM_PHYSSEG_DENSE 1001 } 1002#endif 1003 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 1004 1005 seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); 1006 seg->start = start; 1007 seg->end = end; 1008 seg->first_page = fp; 1009 1010 rw_wlock(&vm_phys_fictitious_reg_lock); 1011 RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); 1012 rw_wunlock(&vm_phys_fictitious_reg_lock); 1013 1014 return (0); 1015} 1016 1017void 1018vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 1019{ 1020 struct vm_phys_fictitious_seg *seg, tmp; 1021#ifdef VM_PHYSSEG_DENSE 1022 long pi, pe; 1023#endif 1024 1025 KASSERT(start < end, 1026 ("Start of segment isn't less than end (start: %jx end: %jx)", 1027 (uintmax_t)start, (uintmax_t)end)); 1028 1029#ifdef VM_PHYSSEG_DENSE 1030 pi = atop(start); 1031 pe = atop(end); 1032 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 1033 if ((pe - first_page) <= vm_page_array_size) { 1034 /* 1035 * This segment was allocated using vm_page_array 1036 * only, there's nothing to do since those pages 1037 * were never added to the tree. 1038 */ 1039 return; 1040 } 1041 /* 1042 * We have a segment that starts inside 1043 * of vm_page_array, but ends outside of it. 1044 * 1045 * Calculate how many pages were added to the 1046 * tree and free them. 1047 */ 1048 start = ptoa(first_page + vm_page_array_size); 1049 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 1050 /* 1051 * We have a segment that ends inside of vm_page_array, 1052 * but starts outside of it. 1053 */ 1054 end = ptoa(first_page); 1055 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 1056 /* Since it's not possible to register such a range, panic. */ 1057 panic( 1058 "Unregistering not registered fictitious range [%#jx:%#jx]", 1059 (uintmax_t)start, (uintmax_t)end); 1060 } 1061#endif 1062 tmp.start = start; 1063 tmp.end = 0; 1064 1065 rw_wlock(&vm_phys_fictitious_reg_lock); 1066 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 1067 if (seg->start != start || seg->end != end) { 1068 rw_wunlock(&vm_phys_fictitious_reg_lock); 1069 panic( 1070 "Unregistering not registered fictitious range [%#jx:%#jx]", 1071 (uintmax_t)start, (uintmax_t)end); 1072 } 1073 RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); 1074 rw_wunlock(&vm_phys_fictitious_reg_lock); 1075 free(seg->first_page, M_FICT_PAGES); 1076 free(seg, M_FICT_PAGES); 1077} 1078 1079/* 1080 * Free a contiguous, power of two-sized set of physical pages. 1081 * 1082 * The free page queues must be locked. 1083 */ 1084void 1085vm_phys_free_pages(vm_page_t m, int order) 1086{ 1087 struct vm_freelist *fl; 1088 struct vm_phys_seg *seg; 1089 vm_paddr_t pa; 1090 vm_page_t m_buddy; 1091 1092 KASSERT(m->order == VM_NFREEORDER, 1093 ("vm_phys_free_pages: page %p has unexpected order %d", 1094 m, m->order)); 1095 KASSERT(m->pool < VM_NFREEPOOL, 1096 ("vm_phys_free_pages: page %p has unexpected pool %d", 1097 m, m->pool)); 1098 KASSERT(order < VM_NFREEORDER, 1099 ("vm_phys_free_pages: order %d is out of range", order)); 1100 seg = &vm_phys_segs[m->segind]; 1101 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1102 if (order < VM_NFREEORDER - 1) { 1103 pa = VM_PAGE_TO_PHYS(m); 1104 do { 1105 pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 1106 if (pa < seg->start || pa >= seg->end) 1107 break; 1108 m_buddy = &seg->first_page[atop(pa - seg->start)]; 1109 if (m_buddy->order != order) 1110 break; 1111 fl = (*seg->free_queues)[m_buddy->pool]; 1112 vm_freelist_rem(fl, m_buddy, order); 1113 if (m_buddy->pool != m->pool) 1114 vm_phys_set_pool(m->pool, m_buddy, order); 1115 order++; 1116 pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 1117 m = &seg->first_page[atop(pa - seg->start)]; 1118 } while (order < VM_NFREEORDER - 1); 1119 } 1120 fl = (*seg->free_queues)[m->pool]; 1121 vm_freelist_add(fl, m, order, 1); 1122} 1123 1124/* 1125 * Free a contiguous, arbitrarily sized set of physical pages. 1126 * 1127 * The free page queues must be locked. 1128 */ 1129void 1130vm_phys_free_contig(vm_page_t m, u_long npages) 1131{ 1132 u_int n; 1133 int order; 1134 1135 /* 1136 * Avoid unnecessary coalescing by freeing the pages in the largest 1137 * possible power-of-two-sized subsets. 1138 */ 1139 vm_domain_free_assert_locked(vm_pagequeue_domain(m)); 1140 for (;; npages -= n) { 1141 /* 1142 * Unsigned "min" is used here so that "order" is assigned 1143 * "VM_NFREEORDER - 1" when "m"'s physical address is zero 1144 * or the low-order bits of its physical address are zero 1145 * because the size of a physical address exceeds the size of 1146 * a long. 1147 */ 1148 order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 1149 VM_NFREEORDER - 1); 1150 n = 1 << order; 1151 if (npages < n) 1152 break; 1153 vm_phys_free_pages(m, order); 1154 m += n; 1155 } 1156 /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ 1157 for (; npages > 0; npages -= n) { 1158 order = flsl(npages) - 1; 1159 n = 1 << order; 1160 vm_phys_free_pages(m, order); 1161 m += n; 1162 } 1163} 1164 1165/* 1166 * Scan physical memory between the specified addresses "low" and "high" for a 1167 * run of contiguous physical pages that satisfy the specified conditions, and 1168 * return the lowest page in the run. The specified "alignment" determines 1169 * the alignment of the lowest physical page in the run. If the specified 1170 * "boundary" is non-zero, then the run of physical pages cannot span a 1171 * physical address that is a multiple of "boundary". 1172 * 1173 * "npages" must be greater than zero. Both "alignment" and "boundary" must 1174 * be a power of two. 1175 */ 1176vm_page_t 1177vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1178 u_long alignment, vm_paddr_t boundary, int options) 1179{ 1180 vm_paddr_t pa_end; 1181 vm_page_t m_end, m_run, m_start; 1182 struct vm_phys_seg *seg; 1183 int segind; 1184 1185 KASSERT(npages > 0, ("npages is 0")); 1186 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1187 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1188 if (low >= high) 1189 return (NULL); 1190 for (segind = 0; segind < vm_phys_nsegs; segind++) { 1191 seg = &vm_phys_segs[segind]; 1192 if (seg->domain != domain) 1193 continue; 1194 if (seg->start >= high) 1195 break; 1196 if (low >= seg->end) 1197 continue; 1198 if (low <= seg->start) 1199 m_start = seg->first_page; 1200 else 1201 m_start = &seg->first_page[atop(low - seg->start)]; 1202 if (high < seg->end) 1203 pa_end = high; 1204 else 1205 pa_end = seg->end; 1206 if (pa_end - VM_PAGE_TO_PHYS(m_start) < ptoa(npages)) 1207 continue; 1208 m_end = &seg->first_page[atop(pa_end - seg->start)]; 1209 m_run = vm_page_scan_contig(npages, m_start, m_end, 1210 alignment, boundary, options); 1211 if (m_run != NULL) 1212 return (m_run); 1213 } 1214 return (NULL); 1215} 1216 1217/* 1218 * Set the pool for a contiguous, power of two-sized set of physical pages. 1219 */ 1220void 1221vm_phys_set_pool(int pool, vm_page_t m, int order) 1222{ 1223 vm_page_t m_tmp; 1224 1225 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 1226 m_tmp->pool = pool; 1227} 1228 1229/* 1230 * Search for the given physical page "m" in the free lists. If the search 1231 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 1232 * FALSE, indicating that "m" is not in the free lists. 1233 * 1234 * The free page queues must be locked. 1235 */ 1236boolean_t 1237vm_phys_unfree_page(vm_page_t m) 1238{ 1239 struct vm_freelist *fl; 1240 struct vm_phys_seg *seg; 1241 vm_paddr_t pa, pa_half; 1242 vm_page_t m_set, m_tmp; 1243 int order; 1244 1245 /* 1246 * First, find the contiguous, power of two-sized set of free 1247 * physical pages containing the given physical page "m" and 1248 * assign it to "m_set". 1249 */ 1250 seg = &vm_phys_segs[m->segind]; 1251 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1252 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 1253 order < VM_NFREEORDER - 1; ) { 1254 order++; 1255 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 1256 if (pa >= seg->start) 1257 m_set = &seg->first_page[atop(pa - seg->start)]; 1258 else 1259 return (FALSE); 1260 } 1261 if (m_set->order < order) 1262 return (FALSE); 1263 if (m_set->order == VM_NFREEORDER) 1264 return (FALSE); 1265 KASSERT(m_set->order < VM_NFREEORDER, 1266 ("vm_phys_unfree_page: page %p has unexpected order %d", 1267 m_set, m_set->order)); 1268 1269 /* 1270 * Next, remove "m_set" from the free lists. Finally, extract 1271 * "m" from "m_set" using an iterative algorithm: While "m_set" 1272 * is larger than a page, shrink "m_set" by returning the half 1273 * of "m_set" that does not contain "m" to the free lists. 1274 */ 1275 fl = (*seg->free_queues)[m_set->pool]; 1276 order = m_set->order; 1277 vm_freelist_rem(fl, m_set, order); 1278 while (order > 0) { 1279 order--; 1280 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 1281 if (m->phys_addr < pa_half) 1282 m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 1283 else { 1284 m_tmp = m_set; 1285 m_set = &seg->first_page[atop(pa_half - seg->start)]; 1286 } 1287 vm_freelist_add(fl, m_tmp, order, 0); 1288 } 1289 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 1290 return (TRUE); 1291} 1292 1293/* 1294 * Allocate a contiguous set of physical pages of the given size 1295 * "npages" from the free lists. All of the physical pages must be at 1296 * or above the given physical address "low" and below the given 1297 * physical address "high". The given value "alignment" determines the 1298 * alignment of the first physical page in the set. If the given value 1299 * "boundary" is non-zero, then the set of physical pages cannot cross 1300 * any physical address boundary that is a multiple of that value. Both 1301 * "alignment" and "boundary" must be a power of two. 1302 */ 1303vm_page_t 1304vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, 1305 u_long alignment, vm_paddr_t boundary) 1306{ 1307 vm_paddr_t pa_end, pa_start; 1308 vm_page_t m_run; 1309 struct vm_phys_seg *seg; 1310 int segind; 1311 1312 KASSERT(npages > 0, ("npages is 0")); 1313 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1314 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1315 vm_domain_free_assert_locked(VM_DOMAIN(domain)); 1316 if (low >= high) 1317 return (NULL); 1318 m_run = NULL; 1319 for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { 1320 seg = &vm_phys_segs[segind]; 1321 if (seg->start >= high || seg->domain != domain) 1322 continue; 1323 if (low >= seg->end) 1324 break; 1325 if (low <= seg->start) 1326 pa_start = seg->start; 1327 else 1328 pa_start = low; 1329 if (high < seg->end) 1330 pa_end = high; 1331 else 1332 pa_end = seg->end; 1333 if (pa_end - pa_start < ptoa(npages)) 1334 continue; 1335 m_run = vm_phys_alloc_seg_contig(seg, npages, low, high, 1336 alignment, boundary); 1337 if (m_run != NULL) 1338 break; 1339 } 1340 return (m_run); 1341} 1342 1343/* 1344 * Allocate a run of contiguous physical pages from the free list for the 1345 * specified segment. 1346 */ 1347static vm_page_t 1348vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages, 1349 vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) 1350{ 1351 struct vm_freelist *fl; 1352 vm_paddr_t pa, pa_end, size; 1353 vm_page_t m, m_ret; 1354 u_long npages_end; 1355 int oind, order, pind; 1356 1357 KASSERT(npages > 0, ("npages is 0")); 1358 KASSERT(powerof2(alignment), ("alignment is not a power of 2")); 1359 KASSERT(powerof2(boundary), ("boundary is not a power of 2")); 1360 vm_domain_free_assert_locked(VM_DOMAIN(seg->domain)); 1361 /* Compute the queue that is the best fit for npages. */ 1362 order = flsl(npages - 1); 1363 /* Search for a run satisfying the specified conditions. */ 1364 size = npages << PAGE_SHIFT; 1365 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; 1366 oind++) { 1367 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1368 fl = (*seg->free_queues)[pind]; 1369 TAILQ_FOREACH(m_ret, &fl[oind].pl, listq) { 1370 /* 1371 * Is the size of this allocation request 1372 * larger than the largest block size? 1373 */ 1374 if (order >= VM_NFREEORDER) { 1375 /* 1376 * Determine if a sufficient number of 1377 * subsequent blocks to satisfy the 1378 * allocation request are free. 1379 */ 1380 pa = VM_PAGE_TO_PHYS(m_ret); 1381 pa_end = pa + size; 1382 if (pa_end < pa) 1383 continue; 1384 for (;;) { 1385 pa += 1 << (PAGE_SHIFT + 1386 VM_NFREEORDER - 1); 1387 if (pa >= pa_end || 1388 pa < seg->start || 1389 pa >= seg->end) 1390 break; 1391 m = &seg->first_page[atop(pa - 1392 seg->start)]; 1393 if (m->order != VM_NFREEORDER - 1394 1) 1395 break; 1396 } 1397 /* If not, go to the next block. */ 1398 if (pa < pa_end) 1399 continue; 1400 } 1401 1402 /* 1403 * Determine if the blocks are within the 1404 * given range, satisfy the given alignment, 1405 * and do not cross the given boundary. 1406 */ 1407 pa = VM_PAGE_TO_PHYS(m_ret); 1408 pa_end = pa + size; 1409 if (pa >= low && pa_end <= high && 1410 (pa & (alignment - 1)) == 0 && 1411 rounddown2(pa ^ (pa_end - 1), boundary) == 0) 1412 goto done; 1413 } 1414 } 1415 } 1416 return (NULL); 1417done: 1418 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 1419 fl = (*seg->free_queues)[m->pool]; 1420 vm_freelist_rem(fl, m, oind); 1421 if (m->pool != VM_FREEPOOL_DEFAULT) 1422 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, oind); 1423 } 1424 /* Return excess pages to the free lists. */ 1425 npages_end = roundup2(npages, 1 << oind); 1426 if (npages < npages_end) { 1427 fl = (*seg->free_queues)[VM_FREEPOOL_DEFAULT]; 1428 vm_phys_enq_range(&m_ret[npages], npages_end - npages, fl, 0); 1429 } 1430 return (m_ret); 1431} 1432 1433#ifdef DDB 1434/* 1435 * Show the number of physical pages in each of the free lists. 1436 */ 1437DB_SHOW_COMMAND(freepages, db_show_freepages) 1438{ 1439 struct vm_freelist *fl; 1440 int flind, oind, pind, dom; 1441 1442 for (dom = 0; dom < vm_ndomains; dom++) { 1443 db_printf("DOMAIN: %d\n", dom); 1444 for (flind = 0; flind < vm_nfreelists; flind++) { 1445 db_printf("FREE LIST %d:\n" 1446 "\n ORDER (SIZE) | NUMBER" 1447 "\n ", flind); 1448 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1449 db_printf(" | POOL %d", pind); 1450 db_printf("\n-- "); 1451 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1452 db_printf("-- -- "); 1453 db_printf("--\n"); 1454 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 1455 db_printf(" %2.2d (%6.6dK)", oind, 1456 1 << (PAGE_SHIFT - 10 + oind)); 1457 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1458 fl = vm_phys_free_queues[dom][flind][pind]; 1459 db_printf(" | %6.6d", fl[oind].lcnt); 1460 } 1461 db_printf("\n"); 1462 } 1463 db_printf("\n"); 1464 } 1465 db_printf("\n"); 1466 } 1467} 1468#endif 1469