1/*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2004, 2005, 5 * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD$"); 32 33#include "opt_param.h" 34 35#include <sys/param.h> 36#include <sys/domainset.h> 37#include <sys/malloc.h> 38#include <sys/types.h> 39#include <sys/systm.h> 40#include <sys/mbuf.h> 41#include <sys/domain.h> 42#include <sys/eventhandler.h> 43#include <sys/kernel.h> 44#include <sys/limits.h> 45#include <sys/lock.h> 46#include <sys/mutex.h> 47#include <sys/protosw.h> 48#include <sys/smp.h> 49#include <sys/sysctl.h> 50 51#include <vm/vm.h> 52#include <vm/vm_extern.h> 53#include <vm/vm_kern.h> 54#include <vm/vm_page.h> 55#include <vm/vm_map.h> 56#include <vm/uma.h> 57#include <vm/uma_dbg.h> 58 59/* 60 * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA 61 * Zones. 62 * 63 * Mbuf Clusters (2K, contiguous) are allocated from the Cluster 64 * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the 65 * administrator so desires. 66 * 67 * Mbufs are allocated from a UMA Master Zone called the Mbuf 68 * Zone. 69 * 70 * Additionally, FreeBSD provides a Packet Zone, which it 71 * configures as a Secondary Zone to the Mbuf Master Zone, 72 * thus sharing backend Slab kegs with the Mbuf Master Zone. 73 * 74 * Thus common-case allocations and locking are simplified: 75 * 76 * m_clget() m_getcl() 77 * | | 78 * | .------------>[(Packet Cache)] m_get(), m_gethdr() 79 * | | [ Packet ] | 80 * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ] 81 * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ] 82 * | \________ | 83 * [ Cluster Keg ] \ / 84 * | [ Mbuf Keg ] 85 * [ Cluster Slabs ] | 86 * | [ Mbuf Slabs ] 87 * \____________(VM)_________________/ 88 * 89 * 90 * Whenever an object is allocated with uma_zalloc() out of 91 * one of the Zones its _ctor_ function is executed. The same 92 * for any deallocation through uma_zfree() the _dtor_ function 93 * is executed. 94 * 95 * Caches are per-CPU and are filled from the Master Zone. 96 * 97 * Whenever an object is allocated from the underlying global 98 * memory pool it gets pre-initialized with the _zinit_ functions. 99 * When the Keg's are overfull objects get decommissioned with 100 * _zfini_ functions and free'd back to the global memory pool. 101 * 102 */ 103 104int nmbufs; /* limits number of mbufs */ 105int nmbclusters; /* limits number of mbuf clusters */ 106int nmbjumbop; /* limits number of page size jumbo clusters */ 107int nmbjumbo9; /* limits number of 9k jumbo clusters */ 108int nmbjumbo16; /* limits number of 16k jumbo clusters */ 109 110static quad_t maxmbufmem; /* overall real memory limit for all mbufs */ 111 112SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0, 113 "Maximum real memory allocatable to various mbuf types"); 114 115/* 116 * tunable_mbinit() has to be run before any mbuf allocations are done. 117 */ 118static void 119tunable_mbinit(void *dummy) 120{ 121 quad_t realmem; 122 123 /* 124 * The default limit for all mbuf related memory is 1/2 of all 125 * available kernel memory (physical or kmem). 126 * At most it can be 3/4 of available kernel memory. 127 */ 128 realmem = qmin((quad_t)physmem * PAGE_SIZE, vm_kmem_size); 129 maxmbufmem = realmem / 2; 130 TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem); 131 if (maxmbufmem > realmem / 4 * 3) 132 maxmbufmem = realmem / 4 * 3; 133 134 TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); 135 if (nmbclusters == 0) 136 nmbclusters = maxmbufmem / MCLBYTES / 4; 137 138 TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop); 139 if (nmbjumbop == 0) 140 nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4; 141 142 TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9); 143 if (nmbjumbo9 == 0) 144 nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6; 145 146 TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16); 147 if (nmbjumbo16 == 0) 148 nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6; 149 150 /* 151 * We need at least as many mbufs as we have clusters of 152 * the various types added together. 153 */ 154 TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs); 155 if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) 156 nmbufs = lmax(maxmbufmem / MSIZE / 5, 157 nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16); 158} 159SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL); 160 161static int 162sysctl_nmbclusters(SYSCTL_HANDLER_ARGS) 163{ 164 int error, newnmbclusters; 165 166 newnmbclusters = nmbclusters; 167 error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); 168 if (error == 0 && req->newptr && newnmbclusters != nmbclusters) { 169 if (newnmbclusters > nmbclusters && 170 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 171 nmbclusters = newnmbclusters; 172 nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); 173 EVENTHANDLER_INVOKE(nmbclusters_change); 174 } else 175 error = EINVAL; 176 } 177 return (error); 178} 179SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW, 180&nmbclusters, 0, sysctl_nmbclusters, "IU", 181 "Maximum number of mbuf clusters allowed"); 182 183static int 184sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS) 185{ 186 int error, newnmbjumbop; 187 188 newnmbjumbop = nmbjumbop; 189 error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); 190 if (error == 0 && req->newptr && newnmbjumbop != nmbjumbop) { 191 if (newnmbjumbop > nmbjumbop && 192 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 193 nmbjumbop = newnmbjumbop; 194 nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); 195 } else 196 error = EINVAL; 197 } 198 return (error); 199} 200SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW, 201&nmbjumbop, 0, sysctl_nmbjumbop, "IU", 202 "Maximum number of mbuf page size jumbo clusters allowed"); 203 204static int 205sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) 206{ 207 int error, newnmbjumbo9; 208 209 newnmbjumbo9 = nmbjumbo9; 210 error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); 211 if (error == 0 && req->newptr && newnmbjumbo9 != nmbjumbo9) { 212 if (newnmbjumbo9 > nmbjumbo9 && 213 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 214 nmbjumbo9 = newnmbjumbo9; 215 nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); 216 } else 217 error = EINVAL; 218 } 219 return (error); 220} 221SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW, 222&nmbjumbo9, 0, sysctl_nmbjumbo9, "IU", 223 "Maximum number of mbuf 9k jumbo clusters allowed"); 224 225static int 226sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS) 227{ 228 int error, newnmbjumbo16; 229 230 newnmbjumbo16 = nmbjumbo16; 231 error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); 232 if (error == 0 && req->newptr && newnmbjumbo16 != nmbjumbo16) { 233 if (newnmbjumbo16 > nmbjumbo16 && 234 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 235 nmbjumbo16 = newnmbjumbo16; 236 nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); 237 } else 238 error = EINVAL; 239 } 240 return (error); 241} 242SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW, 243&nmbjumbo16, 0, sysctl_nmbjumbo16, "IU", 244 "Maximum number of mbuf 16k jumbo clusters allowed"); 245 246static int 247sysctl_nmbufs(SYSCTL_HANDLER_ARGS) 248{ 249 int error, newnmbufs; 250 251 newnmbufs = nmbufs; 252 error = sysctl_handle_int(oidp, &newnmbufs, 0, req); 253 if (error == 0 && req->newptr && newnmbufs != nmbufs) { 254 if (newnmbufs > nmbufs) { 255 nmbufs = newnmbufs; 256 nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); 257 EVENTHANDLER_INVOKE(nmbufs_change); 258 } else 259 error = EINVAL; 260 } 261 return (error); 262} 263SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT|CTLFLAG_RW, 264&nmbufs, 0, sysctl_nmbufs, "IU", 265 "Maximum number of mbufs allowed"); 266 267/* 268 * Zones from which we allocate. 269 */ 270uma_zone_t zone_mbuf; 271uma_zone_t zone_clust; 272uma_zone_t zone_pack; 273uma_zone_t zone_jumbop; 274uma_zone_t zone_jumbo9; 275uma_zone_t zone_jumbo16; 276 277/* 278 * Local prototypes. 279 */ 280static int mb_ctor_mbuf(void *, int, void *, int); 281static int mb_ctor_clust(void *, int, void *, int); 282static int mb_ctor_pack(void *, int, void *, int); 283static void mb_dtor_mbuf(void *, int, void *); 284static void mb_dtor_pack(void *, int, void *); 285static int mb_zinit_pack(void *, int, int); 286static void mb_zfini_pack(void *, int); 287static void mb_reclaim(uma_zone_t, int); 288static void *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int); 289 290/* Ensure that MSIZE is a power of 2. */ 291CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); 292 293/* 294 * Initialize FreeBSD Network buffer allocation. 295 */ 296static void 297mbuf_init(void *dummy) 298{ 299 300 /* 301 * Configure UMA zones for Mbufs, Clusters, and Packets. 302 */ 303 zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, 304 mb_ctor_mbuf, mb_dtor_mbuf, 305#ifdef INVARIANTS 306 trash_init, trash_fini, 307#else 308 NULL, NULL, 309#endif 310 MSIZE - 1, UMA_ZONE_MAXBUCKET); 311 if (nmbufs > 0) 312 nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); 313 uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached"); 314 uma_zone_set_maxaction(zone_mbuf, mb_reclaim); 315 316 zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, 317 mb_ctor_clust, 318#ifdef INVARIANTS 319 trash_dtor, trash_init, trash_fini, 320#else 321 NULL, NULL, NULL, 322#endif 323 UMA_ALIGN_PTR, 0); 324 if (nmbclusters > 0) 325 nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); 326 uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached"); 327 uma_zone_set_maxaction(zone_clust, mb_reclaim); 328 329 zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, 330 mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); 331 332 /* Make jumbo frame zone too. Page size, 9k and 16k. */ 333 zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE, 334 mb_ctor_clust, 335#ifdef INVARIANTS 336 trash_dtor, trash_init, trash_fini, 337#else 338 NULL, NULL, NULL, 339#endif 340 UMA_ALIGN_PTR, 0); 341 if (nmbjumbop > 0) 342 nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); 343 uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached"); 344 uma_zone_set_maxaction(zone_jumbop, mb_reclaim); 345 346 zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, 347 mb_ctor_clust, 348#ifdef INVARIANTS 349 trash_dtor, trash_init, trash_fini, 350#else 351 NULL, NULL, NULL, 352#endif 353 UMA_ALIGN_PTR, 0); 354 uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc); 355 if (nmbjumbo9 > 0) 356 nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); 357 uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached"); 358 uma_zone_set_maxaction(zone_jumbo9, mb_reclaim); 359 360 zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, 361 mb_ctor_clust, 362#ifdef INVARIANTS 363 trash_dtor, trash_init, trash_fini, 364#else 365 NULL, NULL, NULL, 366#endif 367 UMA_ALIGN_PTR, 0); 368 uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc); 369 if (nmbjumbo16 > 0) 370 nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); 371 uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached"); 372 uma_zone_set_maxaction(zone_jumbo16, mb_reclaim); 373 374 /* 375 * Hook event handler for low-memory situation, used to 376 * drain protocols and push data back to the caches (UMA 377 * later pushes it back to VM). 378 */ 379 EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, 380 EVENTHANDLER_PRI_FIRST); 381} 382SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); 383 384#ifdef NETDUMP 385/* 386 * netdump makes use of a pre-allocated pool of mbufs and clusters. When 387 * netdump is configured, we initialize a set of UMA cache zones which return 388 * items from this pool. At panic-time, the regular UMA zone pointers are 389 * overwritten with those of the cache zones so that drivers may allocate and 390 * free mbufs and clusters without attempting to allocate physical memory. 391 * 392 * We keep mbufs and clusters in a pair of mbuf queues. In particular, for 393 * the purpose of caching clusters, we treat them as mbufs. 394 */ 395static struct mbufq nd_mbufq = 396 { STAILQ_HEAD_INITIALIZER(nd_mbufq.mq_head), 0, INT_MAX }; 397static struct mbufq nd_clustq = 398 { STAILQ_HEAD_INITIALIZER(nd_clustq.mq_head), 0, INT_MAX }; 399 400static int nd_clsize; 401static uma_zone_t nd_zone_mbuf; 402static uma_zone_t nd_zone_clust; 403static uma_zone_t nd_zone_pack; 404 405static int 406nd_buf_import(void *arg, void **store, int count, int domain __unused, 407 int flags) 408{ 409 struct mbufq *q; 410 struct mbuf *m; 411 int i; 412 413 q = arg; 414 415 for (i = 0; i < count; i++) { 416 m = mbufq_dequeue(q); 417 if (m == NULL) 418 break; 419 trash_init(m, q == &nd_mbufq ? MSIZE : nd_clsize, flags); 420 store[i] = m; 421 } 422 return (i); 423} 424 425static void 426nd_buf_release(void *arg, void **store, int count) 427{ 428 struct mbufq *q; 429 struct mbuf *m; 430 int i; 431 432 q = arg; 433 434 for (i = 0; i < count; i++) { 435 m = store[i]; 436 (void)mbufq_enqueue(q, m); 437 } 438} 439 440static int 441nd_pack_import(void *arg __unused, void **store, int count, int domain __unused, 442 int flags __unused) 443{ 444 struct mbuf *m; 445 void *clust; 446 int i; 447 448 for (i = 0; i < count; i++) { 449 m = m_get(MT_DATA, M_NOWAIT); 450 if (m == NULL) 451 break; 452 clust = uma_zalloc(nd_zone_clust, M_NOWAIT); 453 if (clust == NULL) { 454 m_free(m); 455 break; 456 } 457 mb_ctor_clust(clust, nd_clsize, m, 0); 458 store[i] = m; 459 } 460 return (i); 461} 462 463static void 464nd_pack_release(void *arg __unused, void **store, int count) 465{ 466 struct mbuf *m; 467 void *clust; 468 int i; 469 470 for (i = 0; i < count; i++) { 471 m = store[i]; 472 clust = m->m_ext.ext_buf; 473 uma_zfree(nd_zone_clust, clust); 474 uma_zfree(nd_zone_mbuf, m); 475 } 476} 477 478/* 479 * Free the pre-allocated mbufs and clusters reserved for netdump, and destroy 480 * the corresponding UMA cache zones. 481 */ 482void 483netdump_mbuf_drain(void) 484{ 485 struct mbuf *m; 486 void *item; 487 488 if (nd_zone_mbuf != NULL) { 489 uma_zdestroy(nd_zone_mbuf); 490 nd_zone_mbuf = NULL; 491 } 492 if (nd_zone_clust != NULL) { 493 uma_zdestroy(nd_zone_clust); 494 nd_zone_clust = NULL; 495 } 496 if (nd_zone_pack != NULL) { 497 uma_zdestroy(nd_zone_pack); 498 nd_zone_pack = NULL; 499 } 500 501 while ((m = mbufq_dequeue(&nd_mbufq)) != NULL) 502 m_free(m); 503 while ((item = mbufq_dequeue(&nd_clustq)) != NULL) 504 uma_zfree(m_getzone(nd_clsize), item); 505} 506 507/* 508 * Callback invoked immediately prior to starting a netdump. 509 */ 510void 511netdump_mbuf_dump(void) 512{ 513 514 /* 515 * All cluster zones return buffers of the size requested by the 516 * drivers. It's up to the driver to reinitialize the zones if the 517 * MTU of a netdump-enabled interface changes. 518 */ 519 printf("netdump: overwriting mbuf zone pointers\n"); 520 zone_mbuf = nd_zone_mbuf; 521 zone_clust = nd_zone_clust; 522 zone_pack = nd_zone_pack; 523 zone_jumbop = nd_zone_clust; 524 zone_jumbo9 = nd_zone_clust; 525 zone_jumbo16 = nd_zone_clust; 526} 527 528/* 529 * Reinitialize the netdump mbuf+cluster pool and cache zones. 530 */ 531void 532netdump_mbuf_reinit(int nmbuf, int nclust, int clsize) 533{ 534 struct mbuf *m; 535 void *item; 536 537 netdump_mbuf_drain(); 538 539 nd_clsize = clsize; 540 541 nd_zone_mbuf = uma_zcache_create("netdump_" MBUF_MEM_NAME, 542 MSIZE, mb_ctor_mbuf, mb_dtor_mbuf, 543#ifdef INVARIANTS 544 trash_init, trash_fini, 545#else 546 NULL, NULL, 547#endif 548 nd_buf_import, nd_buf_release, 549 &nd_mbufq, UMA_ZONE_NOBUCKET); 550 551 nd_zone_clust = uma_zcache_create("netdump_" MBUF_CLUSTER_MEM_NAME, 552 clsize, mb_ctor_clust, 553#ifdef INVARIANTS 554 trash_dtor, trash_init, trash_fini, 555#else 556 NULL, NULL, NULL, 557#endif 558 nd_buf_import, nd_buf_release, 559 &nd_clustq, UMA_ZONE_NOBUCKET); 560 561 nd_zone_pack = uma_zcache_create("netdump_" MBUF_PACKET_MEM_NAME, 562 MCLBYTES, mb_ctor_pack, mb_dtor_pack, NULL, NULL, 563 nd_pack_import, nd_pack_release, 564 NULL, UMA_ZONE_NOBUCKET); 565 566 while (nmbuf-- > 0) { 567 m = m_get(MT_DATA, M_WAITOK); 568 uma_zfree(nd_zone_mbuf, m); 569 } 570 while (nclust-- > 0) { 571 item = uma_zalloc(m_getzone(nd_clsize), M_WAITOK); 572 uma_zfree(nd_zone_clust, item); 573 } 574} 575#endif /* NETDUMP */ 576 577/* 578 * UMA backend page allocator for the jumbo frame zones. 579 * 580 * Allocates kernel virtual memory that is backed by contiguous physical 581 * pages. 582 */ 583static void * 584mbuf_jumbo_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags, 585 int wait) 586{ 587 588 /* Inform UMA that this allocator uses kernel_map/object. */ 589 *flags = UMA_SLAB_KERNEL; 590 return ((void *)kmem_alloc_contig_domainset(DOMAINSET_FIXED(domain), 591 bytes, wait, (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, 592 VM_MEMATTR_DEFAULT)); 593} 594 595/* 596 * Constructor for Mbuf master zone. 597 * 598 * The 'arg' pointer points to a mb_args structure which 599 * contains call-specific information required to support the 600 * mbuf allocation API. See mbuf.h. 601 */ 602static int 603mb_ctor_mbuf(void *mem, int size, void *arg, int how) 604{ 605 struct mbuf *m; 606 struct mb_args *args; 607 int error; 608 int flags; 609 short type; 610 611#ifdef INVARIANTS 612 trash_ctor(mem, size, arg, how); 613#endif 614 args = (struct mb_args *)arg; 615 type = args->type; 616 617 /* 618 * The mbuf is initialized later. The caller has the 619 * responsibility to set up any MAC labels too. 620 */ 621 if (type == MT_NOINIT) 622 return (0); 623 624 m = (struct mbuf *)mem; 625 flags = args->flags; 626 MPASS((flags & M_NOFREE) == 0); 627 628 error = m_init(m, how, type, flags); 629 630 return (error); 631} 632 633/* 634 * The Mbuf master zone destructor. 635 */ 636static void 637mb_dtor_mbuf(void *mem, int size, void *arg) 638{ 639 struct mbuf *m; 640 unsigned long flags; 641 642 m = (struct mbuf *)mem; 643 flags = (unsigned long)arg; 644 645 KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); 646 if (!(flags & MB_DTOR_SKIP) && (m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags)) 647 m_tag_delete_chain(m, NULL); 648#ifdef INVARIANTS 649 trash_dtor(mem, size, arg); 650#endif 651} 652 653/* 654 * The Mbuf Packet zone destructor. 655 */ 656static void 657mb_dtor_pack(void *mem, int size, void *arg) 658{ 659 struct mbuf *m; 660 661 m = (struct mbuf *)mem; 662 if ((m->m_flags & M_PKTHDR) != 0) 663 m_tag_delete_chain(m, NULL); 664 665 /* Make sure we've got a clean cluster back. */ 666 KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); 667 KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__)); 668 KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__)); 669 KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__)); 670 KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__)); 671 KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__)); 672 KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__)); 673#ifdef INVARIANTS 674 trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg); 675#endif 676 /* 677 * If there are processes blocked on zone_clust, waiting for pages 678 * to be freed up, * cause them to be woken up by draining the 679 * packet zone. We are exposed to a race here * (in the check for 680 * the UMA_ZFLAG_FULL) where we might miss the flag set, but that 681 * is deliberate. We don't want to acquire the zone lock for every 682 * mbuf free. 683 */ 684 if (uma_zone_exhausted_nolock(zone_clust)) 685 zone_drain(zone_pack); 686} 687 688/* 689 * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor. 690 * 691 * Here the 'arg' pointer points to the Mbuf which we 692 * are configuring cluster storage for. If 'arg' is 693 * empty we allocate just the cluster without setting 694 * the mbuf to it. See mbuf.h. 695 */ 696static int 697mb_ctor_clust(void *mem, int size, void *arg, int how) 698{ 699 struct mbuf *m; 700 701#ifdef INVARIANTS 702 trash_ctor(mem, size, arg, how); 703#endif 704 m = (struct mbuf *)arg; 705 if (m != NULL) { 706 m->m_ext.ext_buf = (char *)mem; 707 m->m_data = m->m_ext.ext_buf; 708 m->m_flags |= M_EXT; 709 m->m_ext.ext_free = NULL; 710 m->m_ext.ext_arg1 = NULL; 711 m->m_ext.ext_arg2 = NULL; 712 m->m_ext.ext_size = size; 713 m->m_ext.ext_type = m_gettype(size); 714 m->m_ext.ext_flags = EXT_FLAG_EMBREF; 715 m->m_ext.ext_count = 1; 716 } 717 718 return (0); 719} 720 721/* 722 * The Packet secondary zone's init routine, executed on the 723 * object's transition from mbuf keg slab to zone cache. 724 */ 725static int 726mb_zinit_pack(void *mem, int size, int how) 727{ 728 struct mbuf *m; 729 730 m = (struct mbuf *)mem; /* m is virgin. */ 731 if (uma_zalloc_arg(zone_clust, m, how) == NULL || 732 m->m_ext.ext_buf == NULL) 733 return (ENOMEM); 734 m->m_ext.ext_type = EXT_PACKET; /* Override. */ 735#ifdef INVARIANTS 736 trash_init(m->m_ext.ext_buf, MCLBYTES, how); 737#endif 738 return (0); 739} 740 741/* 742 * The Packet secondary zone's fini routine, executed on the 743 * object's transition from zone cache to keg slab. 744 */ 745static void 746mb_zfini_pack(void *mem, int size) 747{ 748 struct mbuf *m; 749 750 m = (struct mbuf *)mem; 751#ifdef INVARIANTS 752 trash_fini(m->m_ext.ext_buf, MCLBYTES); 753#endif 754 uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL); 755#ifdef INVARIANTS 756 trash_dtor(mem, size, NULL); 757#endif 758} 759 760/* 761 * The "packet" keg constructor. 762 */ 763static int 764mb_ctor_pack(void *mem, int size, void *arg, int how) 765{ 766 struct mbuf *m; 767 struct mb_args *args; 768 int error, flags; 769 short type; 770 771 m = (struct mbuf *)mem; 772 args = (struct mb_args *)arg; 773 flags = args->flags; 774 type = args->type; 775 MPASS((flags & M_NOFREE) == 0); 776 777#ifdef INVARIANTS 778 trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how); 779#endif 780 781 error = m_init(m, how, type, flags); 782 783 /* m_ext is already initialized. */ 784 m->m_data = m->m_ext.ext_buf; 785 m->m_flags = (flags | M_EXT); 786 787 return (error); 788} 789 790/* 791 * This is the protocol drain routine. Called by UMA whenever any of the 792 * mbuf zones is closed to its limit. 793 * 794 * No locks should be held when this is called. The drain routines have to 795 * presently acquire some locks which raises the possibility of lock order 796 * reversal. 797 */ 798static void 799mb_reclaim(uma_zone_t zone __unused, int pending __unused) 800{ 801 struct domain *dp; 802 struct protosw *pr; 803 804 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, __func__); 805 806 for (dp = domains; dp != NULL; dp = dp->dom_next) 807 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 808 if (pr->pr_drain != NULL) 809 (*pr->pr_drain)(); 810} 811 812/* 813 * Clean up after mbufs with M_EXT storage attached to them if the 814 * reference count hits 1. 815 */ 816void 817mb_free_ext(struct mbuf *m) 818{ 819 volatile u_int *refcnt; 820 struct mbuf *mref; 821 int freembuf; 822 823 KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m)); 824 825 /* See if this is the mbuf that holds the embedded refcount. */ 826 if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { 827 refcnt = &m->m_ext.ext_count; 828 mref = m; 829 } else { 830 KASSERT(m->m_ext.ext_cnt != NULL, 831 ("%s: no refcounting pointer on %p", __func__, m)); 832 refcnt = m->m_ext.ext_cnt; 833 mref = __containerof(refcnt, struct mbuf, m_ext.ext_count); 834 } 835 836 /* 837 * Check if the header is embedded in the cluster. It is 838 * important that we can't touch any of the mbuf fields 839 * after we have freed the external storage, since mbuf 840 * could have been embedded in it. For now, the mbufs 841 * embedded into the cluster are always of type EXT_EXTREF, 842 * and for this type we won't free the mref. 843 */ 844 if (m->m_flags & M_NOFREE) { 845 freembuf = 0; 846 KASSERT(m->m_ext.ext_type == EXT_EXTREF, 847 ("%s: no-free mbuf %p has wrong type", __func__, m)); 848 } else 849 freembuf = 1; 850 851 /* Free attached storage if this mbuf is the only reference to it. */ 852 if (*refcnt == 1 || atomic_fetchadd_int(refcnt, -1) == 1) { 853 switch (m->m_ext.ext_type) { 854 case EXT_PACKET: 855 /* The packet zone is special. */ 856 if (*refcnt == 0) 857 *refcnt = 1; 858 uma_zfree(zone_pack, mref); 859 break; 860 case EXT_CLUSTER: 861 uma_zfree(zone_clust, m->m_ext.ext_buf); 862 uma_zfree(zone_mbuf, mref); 863 break; 864 case EXT_JUMBOP: 865 uma_zfree(zone_jumbop, m->m_ext.ext_buf); 866 uma_zfree(zone_mbuf, mref); 867 break; 868 case EXT_JUMBO9: 869 uma_zfree(zone_jumbo9, m->m_ext.ext_buf); 870 uma_zfree(zone_mbuf, mref); 871 break; 872 case EXT_JUMBO16: 873 uma_zfree(zone_jumbo16, m->m_ext.ext_buf); 874 uma_zfree(zone_mbuf, mref); 875 break; 876 case EXT_SFBUF: 877 case EXT_NET_DRV: 878 case EXT_MOD_TYPE: 879 case EXT_DISPOSABLE: 880 KASSERT(mref->m_ext.ext_free != NULL, 881 ("%s: ext_free not set", __func__)); 882 mref->m_ext.ext_free(mref); 883 uma_zfree(zone_mbuf, mref); 884 break; 885 case EXT_EXTREF: 886 KASSERT(m->m_ext.ext_free != NULL, 887 ("%s: ext_free not set", __func__)); 888 m->m_ext.ext_free(m); 889 break; 890 default: 891 KASSERT(m->m_ext.ext_type == 0, 892 ("%s: unknown ext_type", __func__)); 893 } 894 } 895 896 if (freembuf && m != mref) 897 uma_zfree(zone_mbuf, m); 898} 899 900/* 901 * Official mbuf(9) allocation KPI for stack and drivers: 902 * 903 * m_get() - a single mbuf without any attachments, sys/mbuf.h. 904 * m_gethdr() - a single mbuf initialized as M_PKTHDR, sys/mbuf.h. 905 * m_getcl() - an mbuf + 2k cluster, sys/mbuf.h. 906 * m_clget() - attach cluster to already allocated mbuf. 907 * m_cljget() - attach jumbo cluster to already allocated mbuf. 908 * m_get2() - allocate minimum mbuf that would fit size argument. 909 * m_getm2() - allocate a chain of mbufs/clusters. 910 * m_extadd() - attach external cluster to mbuf. 911 * 912 * m_free() - free single mbuf with its tags and ext, sys/mbuf.h. 913 * m_freem() - free chain of mbufs. 914 */ 915 916int 917m_clget(struct mbuf *m, int how) 918{ 919 920 KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT", 921 __func__, m)); 922 m->m_ext.ext_buf = (char *)NULL; 923 uma_zalloc_arg(zone_clust, m, how); 924 /* 925 * On a cluster allocation failure, drain the packet zone and retry, 926 * we might be able to loosen a few clusters up on the drain. 927 */ 928 if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) { 929 zone_drain(zone_pack); 930 uma_zalloc_arg(zone_clust, m, how); 931 } 932 MBUF_PROBE2(m__clget, m, how); 933 return (m->m_flags & M_EXT); 934} 935 936/* 937 * m_cljget() is different from m_clget() as it can allocate clusters without 938 * attaching them to an mbuf. In that case the return value is the pointer 939 * to the cluster of the requested size. If an mbuf was specified, it gets 940 * the cluster attached to it and the return value can be safely ignored. 941 * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. 942 */ 943void * 944m_cljget(struct mbuf *m, int how, int size) 945{ 946 uma_zone_t zone; 947 void *retval; 948 949 if (m != NULL) { 950 KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT", 951 __func__, m)); 952 m->m_ext.ext_buf = NULL; 953 } 954 955 zone = m_getzone(size); 956 retval = uma_zalloc_arg(zone, m, how); 957 958 MBUF_PROBE4(m__cljget, m, how, size, retval); 959 960 return (retval); 961} 962 963/* 964 * m_get2() allocates minimum mbuf that would fit "size" argument. 965 */ 966struct mbuf * 967m_get2(int size, int how, short type, int flags) 968{ 969 struct mb_args args; 970 struct mbuf *m, *n; 971 972 args.flags = flags; 973 args.type = type; 974 975 if (size <= MHLEN || (size <= MLEN && (flags & M_PKTHDR) == 0)) 976 return (uma_zalloc_arg(zone_mbuf, &args, how)); 977 if (size <= MCLBYTES) 978 return (uma_zalloc_arg(zone_pack, &args, how)); 979 980 if (size > MJUMPAGESIZE) 981 return (NULL); 982 983 m = uma_zalloc_arg(zone_mbuf, &args, how); 984 if (m == NULL) 985 return (NULL); 986 987 n = uma_zalloc_arg(zone_jumbop, m, how); 988 if (n == NULL) { 989 uma_zfree(zone_mbuf, m); 990 return (NULL); 991 } 992 993 return (m); 994} 995 996/* 997 * m_getjcl() returns an mbuf with a cluster of the specified size attached. 998 * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. 999 */ 1000struct mbuf * 1001m_getjcl(int how, short type, int flags, int size) 1002{ 1003 struct mb_args args; 1004 struct mbuf *m, *n; 1005 uma_zone_t zone; 1006 1007 if (size == MCLBYTES) 1008 return m_getcl(how, type, flags); 1009 1010 args.flags = flags; 1011 args.type = type; 1012 1013 m = uma_zalloc_arg(zone_mbuf, &args, how); 1014 if (m == NULL) 1015 return (NULL); 1016 1017 zone = m_getzone(size); 1018 n = uma_zalloc_arg(zone, m, how); 1019 if (n == NULL) { 1020 uma_zfree(zone_mbuf, m); 1021 return (NULL); 1022 } 1023 MBUF_PROBE5(m__getjcl, how, type, flags, size, m); 1024 return (m); 1025} 1026 1027/* 1028 * Allocate a given length worth of mbufs and/or clusters (whatever fits 1029 * best) and return a pointer to the top of the allocated chain. If an 1030 * existing mbuf chain is provided, then we will append the new chain 1031 * to the existing one but still return the top of the newly allocated 1032 * chain. 1033 */ 1034struct mbuf * 1035m_getm2(struct mbuf *m, int len, int how, short type, int flags) 1036{ 1037 struct mbuf *mb, *nm = NULL, *mtail = NULL; 1038 1039 KASSERT(len >= 0, ("%s: len is < 0", __func__)); 1040 1041 /* Validate flags. */ 1042 flags &= (M_PKTHDR | M_EOR); 1043 1044 /* Packet header mbuf must be first in chain. */ 1045 if ((flags & M_PKTHDR) && m != NULL) 1046 flags &= ~M_PKTHDR; 1047 1048 /* Loop and append maximum sized mbufs to the chain tail. */ 1049 while (len > 0) { 1050 if (len > MCLBYTES) 1051 mb = m_getjcl(how, type, (flags & M_PKTHDR), 1052 MJUMPAGESIZE); 1053 else if (len >= MINCLSIZE) 1054 mb = m_getcl(how, type, (flags & M_PKTHDR)); 1055 else if (flags & M_PKTHDR) 1056 mb = m_gethdr(how, type); 1057 else 1058 mb = m_get(how, type); 1059 1060 /* Fail the whole operation if one mbuf can't be allocated. */ 1061 if (mb == NULL) { 1062 if (nm != NULL) 1063 m_freem(nm); 1064 return (NULL); 1065 } 1066 1067 /* Book keeping. */ 1068 len -= M_SIZE(mb); 1069 if (mtail != NULL) 1070 mtail->m_next = mb; 1071 else 1072 nm = mb; 1073 mtail = mb; 1074 flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */ 1075 } 1076 if (flags & M_EOR) 1077 mtail->m_flags |= M_EOR; /* Only valid on the last mbuf. */ 1078 1079 /* If mbuf was supplied, append new chain to the end of it. */ 1080 if (m != NULL) { 1081 for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next) 1082 ; 1083 mtail->m_next = nm; 1084 mtail->m_flags &= ~M_EOR; 1085 } else 1086 m = nm; 1087 1088 return (m); 1089} 1090 1091/*- 1092 * Configure a provided mbuf to refer to the provided external storage 1093 * buffer and setup a reference count for said buffer. 1094 * 1095 * Arguments: 1096 * mb The existing mbuf to which to attach the provided buffer. 1097 * buf The address of the provided external storage buffer. 1098 * size The size of the provided buffer. 1099 * freef A pointer to a routine that is responsible for freeing the 1100 * provided external storage buffer. 1101 * args A pointer to an argument structure (of any type) to be passed 1102 * to the provided freef routine (may be NULL). 1103 * flags Any other flags to be passed to the provided mbuf. 1104 * type The type that the external storage buffer should be 1105 * labeled with. 1106 * 1107 * Returns: 1108 * Nothing. 1109 */ 1110void 1111m_extadd(struct mbuf *mb, char *buf, u_int size, m_ext_free_t freef, 1112 void *arg1, void *arg2, int flags, int type) 1113{ 1114 1115 KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__)); 1116 1117 mb->m_flags |= (M_EXT | flags); 1118 mb->m_ext.ext_buf = buf; 1119 mb->m_data = mb->m_ext.ext_buf; 1120 mb->m_ext.ext_size = size; 1121 mb->m_ext.ext_free = freef; 1122 mb->m_ext.ext_arg1 = arg1; 1123 mb->m_ext.ext_arg2 = arg2; 1124 mb->m_ext.ext_type = type; 1125 1126 if (type != EXT_EXTREF) { 1127 mb->m_ext.ext_count = 1; 1128 mb->m_ext.ext_flags = EXT_FLAG_EMBREF; 1129 } else 1130 mb->m_ext.ext_flags = 0; 1131} 1132 1133/* 1134 * Free an entire chain of mbufs and associated external buffers, if 1135 * applicable. 1136 */ 1137void 1138m_freem(struct mbuf *mb) 1139{ 1140 1141 MBUF_PROBE1(m__freem, mb); 1142 while (mb != NULL) 1143 mb = m_free(mb); 1144} 1145