kern_mbuf.c revision 253207
1129906Sbmilekic/*- 2141991Sbmilekic * Copyright (c) 2004, 2005, 3243995Spjd * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved. 4129906Sbmilekic * 5129906Sbmilekic * Redistribution and use in source and binary forms, with or without 6129906Sbmilekic * modification, are permitted provided that the following conditions 7129906Sbmilekic * are met: 8129906Sbmilekic * 1. Redistributions of source code must retain the above copyright 9129906Sbmilekic * notice unmodified, this list of conditions and the following 10129906Sbmilekic * disclaimer. 11129906Sbmilekic * 2. Redistributions in binary form must reproduce the above copyright 12129906Sbmilekic * notice, this list of conditions and the following disclaimer in the 13129906Sbmilekic * documentation and/or other materials provided with the distribution. 14129906Sbmilekic * 15129906Sbmilekic * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16129906Sbmilekic * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17129906Sbmilekic * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18129906Sbmilekic * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19129906Sbmilekic * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20129906Sbmilekic * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21129906Sbmilekic * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22129906Sbmilekic * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23129906Sbmilekic * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24129906Sbmilekic * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25129906Sbmilekic * SUCH DAMAGE. 26129906Sbmilekic */ 27129906Sbmilekic 28129906Sbmilekic#include <sys/cdefs.h> 29129906Sbmilekic__FBSDID("$FreeBSD: head/sys/kern/kern_mbuf.c 253207 2013-07-11 12:53:13Z andre $"); 30129906Sbmilekic 31129906Sbmilekic#include "opt_param.h" 32129906Sbmilekic 33129906Sbmilekic#include <sys/param.h> 34129906Sbmilekic#include <sys/malloc.h> 35129906Sbmilekic#include <sys/systm.h> 36129906Sbmilekic#include <sys/mbuf.h> 37129906Sbmilekic#include <sys/domain.h> 38129906Sbmilekic#include <sys/eventhandler.h> 39129906Sbmilekic#include <sys/kernel.h> 40129906Sbmilekic#include <sys/protosw.h> 41129906Sbmilekic#include <sys/smp.h> 42129906Sbmilekic#include <sys/sysctl.h> 43129906Sbmilekic 44163606Srwatson#include <security/mac/mac_framework.h> 45163606Srwatson 46129906Sbmilekic#include <vm/vm.h> 47194454Salc#include <vm/vm_extern.h> 48194454Salc#include <vm/vm_kern.h> 49129906Sbmilekic#include <vm/vm_page.h> 50245575Sandre#include <vm/vm_map.h> 51129906Sbmilekic#include <vm/uma.h> 52147537Ssilby#include <vm/uma_int.h> 53147537Ssilby#include <vm/uma_dbg.h> 54129906Sbmilekic 55129906Sbmilekic/* 56129906Sbmilekic * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA 57129906Sbmilekic * Zones. 58129906Sbmilekic * 59129906Sbmilekic * Mbuf Clusters (2K, contiguous) are allocated from the Cluster 60129906Sbmilekic * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the 61129906Sbmilekic * administrator so desires. 62129906Sbmilekic * 63129906Sbmilekic * Mbufs are allocated from a UMA Master Zone called the Mbuf 64129906Sbmilekic * Zone. 65129906Sbmilekic * 66129906Sbmilekic * Additionally, FreeBSD provides a Packet Zone, which it 67129906Sbmilekic * configures as a Secondary Zone to the Mbuf Master Zone, 68129906Sbmilekic * thus sharing backend Slab kegs with the Mbuf Master Zone. 69129906Sbmilekic * 70129906Sbmilekic * Thus common-case allocations and locking are simplified: 71129906Sbmilekic * 72129906Sbmilekic * m_clget() m_getcl() 73129906Sbmilekic * | | 74129906Sbmilekic * | .------------>[(Packet Cache)] m_get(), m_gethdr() 75129906Sbmilekic * | | [ Packet ] | 76129906Sbmilekic * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ] 77129906Sbmilekic * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ] 78129906Sbmilekic * | \________ | 79129906Sbmilekic * [ Cluster Keg ] \ / 80243995Spjd * | [ Mbuf Keg ] 81129906Sbmilekic * [ Cluster Slabs ] | 82129906Sbmilekic * | [ Mbuf Slabs ] 83129906Sbmilekic * \____________(VM)_________________/ 84151976Sandre * 85151976Sandre * 86156023Sglebius * Whenever an object is allocated with uma_zalloc() out of 87151976Sandre * one of the Zones its _ctor_ function is executed. The same 88156023Sglebius * for any deallocation through uma_zfree() the _dtor_ function 89151976Sandre * is executed. 90156023Sglebius * 91151976Sandre * Caches are per-CPU and are filled from the Master Zone. 92151976Sandre * 93156023Sglebius * Whenever an object is allocated from the underlying global 94151976Sandre * memory pool it gets pre-initialized with the _zinit_ functions. 95151976Sandre * When the Keg's are overfull objects get decomissioned with 96151976Sandre * _zfini_ functions and free'd back to the global memory pool. 97151976Sandre * 98129906Sbmilekic */ 99129906Sbmilekic 100243631Sandreint nmbufs; /* limits number of mbufs */ 101151976Sandreint nmbclusters; /* limits number of mbuf clusters */ 102155780Sandreint nmbjumbop; /* limits number of page size jumbo clusters */ 103151976Sandreint nmbjumbo9; /* limits number of 9k jumbo clusters */ 104151976Sandreint nmbjumbo16; /* limits number of 16k jumbo clusters */ 105129906Sbmilekicstruct mbstat mbstat; 106129906Sbmilekic 107253204Sandrestatic quad_t maxmbufmem; /* overall real memory limit for all mbufs */ 108253204Sandre 109253204SandreSYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN, &maxmbufmem, 0, 110253204Sandre "Maximum real memory allocateable to various mbuf types"); 111253204Sandre 112185893Sbz/* 113245575Sandre * tunable_mbinit() has to be run before any mbuf allocations are done. 114185893Sbz */ 115129906Sbmilekicstatic void 116129906Sbmilekictunable_mbinit(void *dummy) 117129906Sbmilekic{ 118253204Sandre quad_t realmem; 119129906Sbmilekic 120245575Sandre /* 121245575Sandre * The default limit for all mbuf related memory is 1/2 of all 122245575Sandre * available kernel memory (physical or kmem). 123245575Sandre * At most it can be 3/4 of available kernel memory. 124245575Sandre */ 125245575Sandre realmem = qmin((quad_t)physmem * PAGE_SIZE, 126249843Sandre vm_map_max(kmem_map) - vm_map_min(kmem_map)); 127245575Sandre maxmbufmem = realmem / 2; 128253204Sandre TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem); 129245575Sandre if (maxmbufmem > realmem / 4 * 3) 130245575Sandre maxmbufmem = realmem / 4 * 3; 131245575Sandre 132239624Snp TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); 133243639Sandre if (nmbclusters == 0) 134243639Sandre nmbclusters = maxmbufmem / MCLBYTES / 4; 135239624Snp 136239624Snp TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop); 137239624Snp if (nmbjumbop == 0) 138243639Sandre nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4; 139239624Snp 140239624Snp TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9); 141239624Snp if (nmbjumbo9 == 0) 142243639Sandre nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6; 143239624Snp 144239624Snp TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16); 145239624Snp if (nmbjumbo16 == 0) 146243639Sandre nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6; 147243639Sandre 148243639Sandre /* 149243639Sandre * We need at least as many mbufs as we have clusters of 150243639Sandre * the various types added together. 151243639Sandre */ 152243639Sandre TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs); 153243639Sandre if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) 154243639Sandre nmbufs = lmax(maxmbufmem / MSIZE / 5, 155243996Spjd nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16); 156129906Sbmilekic} 157245575SandreSYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL); 158129906Sbmilekic 159157927Spsstatic int 160157927Spssysctl_nmbclusters(SYSCTL_HANDLER_ARGS) 161157927Sps{ 162157927Sps int error, newnmbclusters; 163157927Sps 164157927Sps newnmbclusters = nmbclusters; 165243995Spjd error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); 166157927Sps if (error == 0 && req->newptr) { 167243631Sandre if (newnmbclusters > nmbclusters && 168243631Sandre nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 169157927Sps nmbclusters = newnmbclusters; 170253207Sandre nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); 171157927Sps EVENTHANDLER_INVOKE(nmbclusters_change); 172157927Sps } else 173157927Sps error = EINVAL; 174157927Sps } 175157927Sps return (error); 176157927Sps} 177157927SpsSYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW, 178157927Sps&nmbclusters, 0, sysctl_nmbclusters, "IU", 179129906Sbmilekic "Maximum number of mbuf clusters allowed"); 180174292Srrs 181174292Srrsstatic int 182174292Srrssysctl_nmbjumbop(SYSCTL_HANDLER_ARGS) 183174292Srrs{ 184174292Srrs int error, newnmbjumbop; 185174292Srrs 186174292Srrs newnmbjumbop = nmbjumbop; 187243995Spjd error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); 188174292Srrs if (error == 0 && req->newptr) { 189243631Sandre if (newnmbjumbop > nmbjumbop && 190243631Sandre nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 191174292Srrs nmbjumbop = newnmbjumbop; 192253207Sandre nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); 193174292Srrs } else 194174292Srrs error = EINVAL; 195174292Srrs } 196174292Srrs return (error); 197174292Srrs} 198174292SrrsSYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW, 199174292Srrs&nmbjumbop, 0, sysctl_nmbjumbop, "IU", 200243996Spjd "Maximum number of mbuf page size jumbo clusters allowed"); 201174292Srrs 202174292Srrsstatic int 203174292Srrssysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) 204174292Srrs{ 205174292Srrs int error, newnmbjumbo9; 206174292Srrs 207174292Srrs newnmbjumbo9 = nmbjumbo9; 208243995Spjd error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); 209174292Srrs if (error == 0 && req->newptr) { 210253204Sandre if (newnmbjumbo9 > nmbjumbo9 && 211243631Sandre nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 212174292Srrs nmbjumbo9 = newnmbjumbo9; 213253207Sandre nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); 214174292Srrs } else 215174292Srrs error = EINVAL; 216174292Srrs } 217174292Srrs return (error); 218174292Srrs} 219174292SrrsSYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW, 220174292Srrs&nmbjumbo9, 0, sysctl_nmbjumbo9, "IU", 221243995Spjd "Maximum number of mbuf 9k jumbo clusters allowed"); 222174292Srrs 223174292Srrsstatic int 224174292Srrssysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS) 225174292Srrs{ 226174292Srrs int error, newnmbjumbo16; 227174292Srrs 228174292Srrs newnmbjumbo16 = nmbjumbo16; 229243995Spjd error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); 230174292Srrs if (error == 0 && req->newptr) { 231243631Sandre if (newnmbjumbo16 > nmbjumbo16 && 232243631Sandre nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 233174292Srrs nmbjumbo16 = newnmbjumbo16; 234253207Sandre nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); 235174292Srrs } else 236174292Srrs error = EINVAL; 237174292Srrs } 238174292Srrs return (error); 239174292Srrs} 240174292SrrsSYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW, 241174292Srrs&nmbjumbo16, 0, sysctl_nmbjumbo16, "IU", 242151976Sandre "Maximum number of mbuf 16k jumbo clusters allowed"); 243174292Srrs 244243631Sandrestatic int 245243631Sandresysctl_nmbufs(SYSCTL_HANDLER_ARGS) 246243631Sandre{ 247243631Sandre int error, newnmbufs; 248174292Srrs 249243631Sandre newnmbufs = nmbufs; 250243995Spjd error = sysctl_handle_int(oidp, &newnmbufs, 0, req); 251243631Sandre if (error == 0 && req->newptr) { 252243631Sandre if (newnmbufs > nmbufs) { 253243631Sandre nmbufs = newnmbufs; 254253207Sandre nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); 255243631Sandre EVENTHANDLER_INVOKE(nmbufs_change); 256243631Sandre } else 257243631Sandre error = EINVAL; 258243631Sandre } 259243631Sandre return (error); 260243631Sandre} 261253204SandreSYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT|CTLFLAG_RW, 262243631Sandre&nmbufs, 0, sysctl_nmbufs, "IU", 263243631Sandre "Maximum number of mbufs allowed"); 264174292Srrs 265129906SbmilekicSYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat, 266129906Sbmilekic "Mbuf general information and statistics"); 267129906Sbmilekic 268129906Sbmilekic/* 269129906Sbmilekic * Zones from which we allocate. 270129906Sbmilekic */ 271129906Sbmilekicuma_zone_t zone_mbuf; 272129906Sbmilekicuma_zone_t zone_clust; 273129906Sbmilekicuma_zone_t zone_pack; 274155780Sandreuma_zone_t zone_jumbop; 275151976Sandreuma_zone_t zone_jumbo9; 276151976Sandreuma_zone_t zone_jumbo16; 277151976Sandreuma_zone_t zone_ext_refcnt; 278129906Sbmilekic 279129906Sbmilekic/* 280129906Sbmilekic * Local prototypes. 281129906Sbmilekic */ 282132987Sgreenstatic int mb_ctor_mbuf(void *, int, void *, int); 283132987Sgreenstatic int mb_ctor_clust(void *, int, void *, int); 284132987Sgreenstatic int mb_ctor_pack(void *, int, void *, int); 285129906Sbmilekicstatic void mb_dtor_mbuf(void *, int, void *); 286151976Sandrestatic void mb_dtor_clust(void *, int, void *); 287151976Sandrestatic void mb_dtor_pack(void *, int, void *); 288151976Sandrestatic int mb_zinit_pack(void *, int, int); 289151976Sandrestatic void mb_zfini_pack(void *, int); 290129906Sbmilekic 291129906Sbmilekicstatic void mb_reclaim(void *); 292209390Sedstatic void *mbuf_jumbo_alloc(uma_zone_t, int, uint8_t *, int); 293129906Sbmilekic 294245575Sandre/* Ensure that MSIZE is a power of 2. */ 295135510SbrianCTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); 296135510Sbrian 297129906Sbmilekic/* 298129906Sbmilekic * Initialize FreeBSD Network buffer allocation. 299129906Sbmilekic */ 300129906Sbmilekicstatic void 301129906Sbmilekicmbuf_init(void *dummy) 302129906Sbmilekic{ 303129906Sbmilekic 304129906Sbmilekic /* 305129906Sbmilekic * Configure UMA zones for Mbufs, Clusters, and Packets. 306129906Sbmilekic */ 307151976Sandre zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, 308151976Sandre mb_ctor_mbuf, mb_dtor_mbuf, 309147537Ssilby#ifdef INVARIANTS 310151976Sandre trash_init, trash_fini, 311147537Ssilby#else 312151976Sandre NULL, NULL, 313147537Ssilby#endif 314151976Sandre MSIZE - 1, UMA_ZONE_MAXBUCKET); 315243997Spjd if (nmbufs > 0) 316243997Spjd nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); 317243999Spjd uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached"); 318151976Sandre 319148095Srwatson zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, 320151976Sandre mb_ctor_clust, mb_dtor_clust, 321147537Ssilby#ifdef INVARIANTS 322151976Sandre trash_init, trash_fini, 323147537Ssilby#else 324151976Sandre NULL, NULL, 325147537Ssilby#endif 326151976Sandre UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 327243997Spjd if (nmbclusters > 0) 328243997Spjd nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); 329243999Spjd uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached"); 330151976Sandre 331148095Srwatson zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, 332151976Sandre mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); 333129906Sbmilekic 334156023Sglebius /* Make jumbo frame zone too. Page size, 9k and 16k. */ 335155780Sandre zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE, 336153232Sandre mb_ctor_clust, mb_dtor_clust, 337153232Sandre#ifdef INVARIANTS 338153232Sandre trash_init, trash_fini, 339153232Sandre#else 340153232Sandre NULL, NULL, 341153232Sandre#endif 342153232Sandre UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 343243997Spjd if (nmbjumbop > 0) 344243997Spjd nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); 345243999Spjd uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached"); 346153232Sandre 347151976Sandre zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, 348151976Sandre mb_ctor_clust, mb_dtor_clust, 349151976Sandre#ifdef INVARIANTS 350151976Sandre trash_init, trash_fini, 351151976Sandre#else 352151976Sandre NULL, NULL, 353151976Sandre#endif 354151976Sandre UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 355243631Sandre uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc); 356243997Spjd if (nmbjumbo9 > 0) 357243997Spjd nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); 358243999Spjd uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached"); 359129906Sbmilekic 360151976Sandre zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, 361151976Sandre mb_ctor_clust, mb_dtor_clust, 362151976Sandre#ifdef INVARIANTS 363151976Sandre trash_init, trash_fini, 364151976Sandre#else 365151976Sandre NULL, NULL, 366151976Sandre#endif 367151976Sandre UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 368243631Sandre uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc); 369243997Spjd if (nmbjumbo16 > 0) 370243997Spjd nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); 371243999Spjd uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached"); 372151976Sandre 373151976Sandre zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int), 374151976Sandre NULL, NULL, 375151976Sandre NULL, NULL, 376151976Sandre UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 377151976Sandre 378151976Sandre /* uma_prealloc() goes here... */ 379151976Sandre 380129906Sbmilekic /* 381129906Sbmilekic * Hook event handler for low-memory situation, used to 382129906Sbmilekic * drain protocols and push data back to the caches (UMA 383129906Sbmilekic * later pushes it back to VM). 384129906Sbmilekic */ 385129906Sbmilekic EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, 386129906Sbmilekic EVENTHANDLER_PRI_FIRST); 387129906Sbmilekic 388129906Sbmilekic /* 389129906Sbmilekic * [Re]set counters and local statistics knobs. 390129906Sbmilekic * XXX Some of these should go and be replaced, but UMA stat 391129906Sbmilekic * gathering needs to be revised. 392129906Sbmilekic */ 393129906Sbmilekic mbstat.m_mbufs = 0; 394129906Sbmilekic mbstat.m_mclusts = 0; 395129906Sbmilekic mbstat.m_drain = 0; 396129906Sbmilekic mbstat.m_msize = MSIZE; 397129906Sbmilekic mbstat.m_mclbytes = MCLBYTES; 398129906Sbmilekic mbstat.m_minclsize = MINCLSIZE; 399129906Sbmilekic mbstat.m_mlen = MLEN; 400129906Sbmilekic mbstat.m_mhlen = MHLEN; 401129906Sbmilekic mbstat.m_numtypes = MT_NTYPES; 402129906Sbmilekic 403129906Sbmilekic mbstat.m_mcfail = mbstat.m_mpfail = 0; 404129906Sbmilekic mbstat.sf_iocnt = 0; 405129906Sbmilekic mbstat.sf_allocwait = mbstat.sf_allocfail = 0; 406129906Sbmilekic} 407245575SandreSYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); 408129906Sbmilekic 409129906Sbmilekic/* 410174247Salc * UMA backend page allocator for the jumbo frame zones. 411174247Salc * 412174247Salc * Allocates kernel virtual memory that is backed by contiguous physical 413174247Salc * pages. 414174247Salc */ 415174247Salcstatic void * 416209390Sedmbuf_jumbo_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait) 417174247Salc{ 418174247Salc 419177921Salc /* Inform UMA that this allocator uses kernel_map/object. */ 420177921Salc *flags = UMA_SLAB_KERNEL; 421194454Salc return ((void *)kmem_alloc_contig(kernel_map, bytes, wait, 422195649Salc (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT)); 423174247Salc} 424174247Salc 425174247Salc/* 426129906Sbmilekic * Constructor for Mbuf master zone. 427129906Sbmilekic * 428129906Sbmilekic * The 'arg' pointer points to a mb_args structure which 429129906Sbmilekic * contains call-specific information required to support the 430151976Sandre * mbuf allocation API. See mbuf.h. 431129906Sbmilekic */ 432132987Sgreenstatic int 433132987Sgreenmb_ctor_mbuf(void *mem, int size, void *arg, int how) 434129906Sbmilekic{ 435129906Sbmilekic struct mbuf *m; 436129906Sbmilekic struct mb_args *args; 437132987Sgreen#ifdef MAC 438132987Sgreen int error; 439132987Sgreen#endif 440129906Sbmilekic int flags; 441129906Sbmilekic short type; 442129906Sbmilekic 443147537Ssilby#ifdef INVARIANTS 444147537Ssilby trash_ctor(mem, size, arg, how); 445147537Ssilby#endif 446129906Sbmilekic m = (struct mbuf *)mem; 447129906Sbmilekic args = (struct mb_args *)arg; 448129906Sbmilekic flags = args->flags; 449129906Sbmilekic type = args->type; 450129906Sbmilekic 451151976Sandre /* 452151976Sandre * The mbuf is initialized later. The caller has the 453156059Sglebius * responsibility to set up any MAC labels too. 454151976Sandre */ 455151976Sandre if (type == MT_NOINIT) 456151976Sandre return (0); 457151976Sandre 458129906Sbmilekic m->m_next = NULL; 459129906Sbmilekic m->m_nextpkt = NULL; 460151976Sandre m->m_len = 0; 461129947Sbmilekic m->m_flags = flags; 462151976Sandre m->m_type = type; 463129906Sbmilekic if (flags & M_PKTHDR) { 464129906Sbmilekic m->m_data = m->m_pktdat; 465129906Sbmilekic m->m_pkthdr.rcvif = NULL; 466184778Skmacy m->m_pkthdr.header = NULL; 467151976Sandre m->m_pkthdr.len = 0; 468129906Sbmilekic m->m_pkthdr.csum_flags = 0; 469151976Sandre m->m_pkthdr.csum_data = 0; 470162377Sandre m->m_pkthdr.tso_segsz = 0; 471162377Sandre m->m_pkthdr.ether_vtag = 0; 472186683Srwatson m->m_pkthdr.flowid = 0; 473250952Sjulian m->m_pkthdr.fibnum = 0; 474129906Sbmilekic SLIST_INIT(&m->m_pkthdr.tags); 475129906Sbmilekic#ifdef MAC 476129906Sbmilekic /* If the label init fails, fail the alloc */ 477172930Srwatson error = mac_mbuf_init(m, how); 478132987Sgreen if (error) 479132987Sgreen return (error); 480129906Sbmilekic#endif 481129947Sbmilekic } else 482129906Sbmilekic m->m_data = m->m_dat; 483132987Sgreen return (0); 484129906Sbmilekic} 485129906Sbmilekic 486129906Sbmilekic/* 487151976Sandre * The Mbuf master zone destructor. 488129906Sbmilekic */ 489129906Sbmilekicstatic void 490129906Sbmilekicmb_dtor_mbuf(void *mem, int size, void *arg) 491129906Sbmilekic{ 492129906Sbmilekic struct mbuf *m; 493243995Spjd unsigned long flags; 494129906Sbmilekic 495129906Sbmilekic m = (struct mbuf *)mem; 496172462Skmacy flags = (unsigned long)arg; 497173029Sobrien 498172462Skmacy if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0) 499129906Sbmilekic m_tag_delete_chain(m, NULL); 500151976Sandre KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__)); 501173029Sobrien KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); 502147537Ssilby#ifdef INVARIANTS 503147537Ssilby trash_dtor(mem, size, arg); 504147537Ssilby#endif 505129906Sbmilekic} 506129906Sbmilekic 507151976Sandre/* 508151976Sandre * The Mbuf Packet zone destructor. 509151976Sandre */ 510129906Sbmilekicstatic void 511129906Sbmilekicmb_dtor_pack(void *mem, int size, void *arg) 512129906Sbmilekic{ 513129906Sbmilekic struct mbuf *m; 514129906Sbmilekic 515129906Sbmilekic m = (struct mbuf *)mem; 516129906Sbmilekic if ((m->m_flags & M_PKTHDR) != 0) 517129906Sbmilekic m_tag_delete_chain(m, NULL); 518151976Sandre 519151976Sandre /* Make sure we've got a clean cluster back. */ 520151976Sandre KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); 521151976Sandre KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__)); 522151976Sandre KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__)); 523175872Sphk KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__)); 524175872Sphk KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__)); 525151976Sandre KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__)); 526152130Sglebius KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__)); 527151976Sandre KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__)); 528147537Ssilby#ifdef INVARIANTS 529147537Ssilby trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg); 530147537Ssilby#endif 531166213Smohans /* 532173029Sobrien * If there are processes blocked on zone_clust, waiting for pages 533173029Sobrien * to be freed up, * cause them to be woken up by draining the 534173029Sobrien * packet zone. We are exposed to a race here * (in the check for 535173029Sobrien * the UMA_ZFLAG_FULL) where we might miss the flag set, but that 536173029Sobrien * is deliberate. We don't want to acquire the zone lock for every 537173029Sobrien * mbuf free. 538166213Smohans */ 539173029Sobrien if (uma_zone_exhausted_nolock(zone_clust)) 540173029Sobrien zone_drain(zone_pack); 541129906Sbmilekic} 542129906Sbmilekic 543129906Sbmilekic/* 544155780Sandre * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor. 545129906Sbmilekic * 546129906Sbmilekic * Here the 'arg' pointer points to the Mbuf which we 547151976Sandre * are configuring cluster storage for. If 'arg' is 548151976Sandre * empty we allocate just the cluster without setting 549151976Sandre * the mbuf to it. See mbuf.h. 550129906Sbmilekic */ 551132987Sgreenstatic int 552132987Sgreenmb_ctor_clust(void *mem, int size, void *arg, int how) 553129906Sbmilekic{ 554129906Sbmilekic struct mbuf *m; 555151976Sandre u_int *refcnt; 556168374Skmacy int type; 557168374Skmacy uma_zone_t zone; 558173029Sobrien 559147537Ssilby#ifdef INVARIANTS 560147537Ssilby trash_ctor(mem, size, arg, how); 561147537Ssilby#endif 562168374Skmacy switch (size) { 563168374Skmacy case MCLBYTES: 564168374Skmacy type = EXT_CLUSTER; 565168374Skmacy zone = zone_clust; 566168374Skmacy break; 567168374Skmacy#if MJUMPAGESIZE != MCLBYTES 568168374Skmacy case MJUMPAGESIZE: 569168374Skmacy type = EXT_JUMBOP; 570168374Skmacy zone = zone_jumbop; 571168374Skmacy break; 572168374Skmacy#endif 573168374Skmacy case MJUM9BYTES: 574168374Skmacy type = EXT_JUMBO9; 575168374Skmacy zone = zone_jumbo9; 576168374Skmacy break; 577168374Skmacy case MJUM16BYTES: 578168374Skmacy type = EXT_JUMBO16; 579168374Skmacy zone = zone_jumbo16; 580168374Skmacy break; 581168374Skmacy default: 582168374Skmacy panic("unknown cluster size"); 583168374Skmacy break; 584168374Skmacy } 585168374Skmacy 586129906Sbmilekic m = (struct mbuf *)arg; 587168374Skmacy refcnt = uma_find_refcnt(zone, mem); 588173029Sobrien *refcnt = 1; 589151976Sandre if (m != NULL) { 590151976Sandre m->m_ext.ext_buf = (caddr_t)mem; 591151976Sandre m->m_data = m->m_ext.ext_buf; 592151976Sandre m->m_flags |= M_EXT; 593151976Sandre m->m_ext.ext_free = NULL; 594175872Sphk m->m_ext.ext_arg1 = NULL; 595175872Sphk m->m_ext.ext_arg2 = NULL; 596151976Sandre m->m_ext.ext_size = size; 597151976Sandre m->m_ext.ext_type = type; 598168374Skmacy m->m_ext.ref_cnt = refcnt; 599151976Sandre } 600168374Skmacy 601132987Sgreen return (0); 602129906Sbmilekic} 603129906Sbmilekic 604151976Sandre/* 605151976Sandre * The Mbuf Cluster zone destructor. 606151976Sandre */ 607129906Sbmilekicstatic void 608129906Sbmilekicmb_dtor_clust(void *mem, int size, void *arg) 609129906Sbmilekic{ 610168374Skmacy#ifdef INVARIANTS 611168374Skmacy uma_zone_t zone; 612151976Sandre 613168374Skmacy zone = m_getzone(size); 614168374Skmacy KASSERT(*(uma_find_refcnt(zone, mem)) <= 1, 615152035Sandre ("%s: refcnt incorrect %u", __func__, 616168374Skmacy *(uma_find_refcnt(zone, mem))) ); 617168374Skmacy 618147537Ssilby trash_dtor(mem, size, arg); 619147537Ssilby#endif 620129906Sbmilekic} 621129906Sbmilekic 622129906Sbmilekic/* 623129906Sbmilekic * The Packet secondary zone's init routine, executed on the 624151976Sandre * object's transition from mbuf keg slab to zone cache. 625129906Sbmilekic */ 626132987Sgreenstatic int 627151976Sandremb_zinit_pack(void *mem, int size, int how) 628129906Sbmilekic{ 629129906Sbmilekic struct mbuf *m; 630129906Sbmilekic 631151976Sandre m = (struct mbuf *)mem; /* m is virgin. */ 632156428Sandre if (uma_zalloc_arg(zone_clust, m, how) == NULL || 633156428Sandre m->m_ext.ext_buf == NULL) 634132987Sgreen return (ENOMEM); 635152101Sandre m->m_ext.ext_type = EXT_PACKET; /* Override. */ 636147537Ssilby#ifdef INVARIANTS 637147537Ssilby trash_init(m->m_ext.ext_buf, MCLBYTES, how); 638147537Ssilby#endif 639132987Sgreen return (0); 640129906Sbmilekic} 641129906Sbmilekic 642129906Sbmilekic/* 643129906Sbmilekic * The Packet secondary zone's fini routine, executed on the 644129906Sbmilekic * object's transition from zone cache to keg slab. 645129906Sbmilekic */ 646129906Sbmilekicstatic void 647151976Sandremb_zfini_pack(void *mem, int size) 648129906Sbmilekic{ 649129906Sbmilekic struct mbuf *m; 650129906Sbmilekic 651129906Sbmilekic m = (struct mbuf *)mem; 652147537Ssilby#ifdef INVARIANTS 653147537Ssilby trash_fini(m->m_ext.ext_buf, MCLBYTES); 654147537Ssilby#endif 655129906Sbmilekic uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL); 656147652Ssilby#ifdef INVARIANTS 657147652Ssilby trash_dtor(mem, size, NULL); 658147652Ssilby#endif 659129906Sbmilekic} 660129906Sbmilekic 661129906Sbmilekic/* 662129906Sbmilekic * The "packet" keg constructor. 663129906Sbmilekic */ 664132987Sgreenstatic int 665132987Sgreenmb_ctor_pack(void *mem, int size, void *arg, int how) 666129906Sbmilekic{ 667129906Sbmilekic struct mbuf *m; 668129906Sbmilekic struct mb_args *args; 669132987Sgreen#ifdef MAC 670132987Sgreen int error; 671132987Sgreen#endif 672132987Sgreen int flags; 673129906Sbmilekic short type; 674129906Sbmilekic 675129906Sbmilekic m = (struct mbuf *)mem; 676129906Sbmilekic args = (struct mb_args *)arg; 677129906Sbmilekic flags = args->flags; 678129906Sbmilekic type = args->type; 679129906Sbmilekic 680147537Ssilby#ifdef INVARIANTS 681147537Ssilby trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how); 682147537Ssilby#endif 683129906Sbmilekic m->m_next = NULL; 684129947Sbmilekic m->m_nextpkt = NULL; 685129906Sbmilekic m->m_data = m->m_ext.ext_buf; 686151976Sandre m->m_len = 0; 687151976Sandre m->m_flags = (flags | M_EXT); 688151976Sandre m->m_type = type; 689173029Sobrien 690129906Sbmilekic if (flags & M_PKTHDR) { 691129906Sbmilekic m->m_pkthdr.rcvif = NULL; 692151976Sandre m->m_pkthdr.len = 0; 693151976Sandre m->m_pkthdr.header = NULL; 694129906Sbmilekic m->m_pkthdr.csum_flags = 0; 695151976Sandre m->m_pkthdr.csum_data = 0; 696162377Sandre m->m_pkthdr.tso_segsz = 0; 697162377Sandre m->m_pkthdr.ether_vtag = 0; 698186683Srwatson m->m_pkthdr.flowid = 0; 699250952Sjulian m->m_pkthdr.fibnum = 0; 700129906Sbmilekic SLIST_INIT(&m->m_pkthdr.tags); 701129906Sbmilekic#ifdef MAC 702129906Sbmilekic /* If the label init fails, fail the alloc */ 703172930Srwatson error = mac_mbuf_init(m, how); 704132987Sgreen if (error) 705132987Sgreen return (error); 706129906Sbmilekic#endif 707129906Sbmilekic } 708151976Sandre /* m_ext is already initialized. */ 709151976Sandre 710132987Sgreen return (0); 711129906Sbmilekic} 712129906Sbmilekic 713194515Skmacyint 714194515Skmacym_pkthdr_init(struct mbuf *m, int how) 715194515Skmacy{ 716194515Skmacy#ifdef MAC 717194515Skmacy int error; 718194515Skmacy#endif 719194515Skmacy m->m_data = m->m_pktdat; 720194515Skmacy SLIST_INIT(&m->m_pkthdr.tags); 721194515Skmacy m->m_pkthdr.rcvif = NULL; 722194515Skmacy m->m_pkthdr.header = NULL; 723194515Skmacy m->m_pkthdr.len = 0; 724194515Skmacy m->m_pkthdr.flowid = 0; 725250952Sjulian m->m_pkthdr.fibnum = 0; 726194515Skmacy m->m_pkthdr.csum_flags = 0; 727194515Skmacy m->m_pkthdr.csum_data = 0; 728194515Skmacy m->m_pkthdr.tso_segsz = 0; 729194515Skmacy m->m_pkthdr.ether_vtag = 0; 730194515Skmacy#ifdef MAC 731194515Skmacy /* If the label init fails, fail the alloc */ 732194515Skmacy error = mac_mbuf_init(m, how); 733194515Skmacy if (error) 734194515Skmacy return (error); 735194515Skmacy#endif 736194515Skmacy 737194515Skmacy return (0); 738194515Skmacy} 739194515Skmacy 740129906Sbmilekic/* 741129906Sbmilekic * This is the protocol drain routine. 742129906Sbmilekic * 743129906Sbmilekic * No locks should be held when this is called. The drain routines have to 744129906Sbmilekic * presently acquire some locks which raises the possibility of lock order 745129906Sbmilekic * reversal. 746129906Sbmilekic */ 747129906Sbmilekicstatic void 748129906Sbmilekicmb_reclaim(void *junk) 749129906Sbmilekic{ 750129906Sbmilekic struct domain *dp; 751129906Sbmilekic struct protosw *pr; 752129906Sbmilekic 753129906Sbmilekic WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, 754129906Sbmilekic "mb_reclaim()"); 755129906Sbmilekic 756129906Sbmilekic for (dp = domains; dp != NULL; dp = dp->dom_next) 757129906Sbmilekic for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 758129906Sbmilekic if (pr->pr_drain != NULL) 759129906Sbmilekic (*pr->pr_drain)(); 760129906Sbmilekic} 761