kern_mbuf.c revision 243999
1129906Sbmilekic/*- 2141991Sbmilekic * Copyright (c) 2004, 2005, 3243995Spjd * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved. 4129906Sbmilekic * 5129906Sbmilekic * Redistribution and use in source and binary forms, with or without 6129906Sbmilekic * modification, are permitted provided that the following conditions 7129906Sbmilekic * are met: 8129906Sbmilekic * 1. Redistributions of source code must retain the above copyright 9129906Sbmilekic * notice unmodified, this list of conditions and the following 10129906Sbmilekic * disclaimer. 11129906Sbmilekic * 2. Redistributions in binary form must reproduce the above copyright 12129906Sbmilekic * notice, this list of conditions and the following disclaimer in the 13129906Sbmilekic * documentation and/or other materials provided with the distribution. 14129906Sbmilekic * 15129906Sbmilekic * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16129906Sbmilekic * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17129906Sbmilekic * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18129906Sbmilekic * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19129906Sbmilekic * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20129906Sbmilekic * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21129906Sbmilekic * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22129906Sbmilekic * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23129906Sbmilekic * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24129906Sbmilekic * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25129906Sbmilekic * SUCH DAMAGE. 26129906Sbmilekic */ 27129906Sbmilekic 28129906Sbmilekic#include <sys/cdefs.h> 29129906Sbmilekic__FBSDID("$FreeBSD: head/sys/kern/kern_mbuf.c 243999 2012-12-07 22:30:30Z pjd $"); 30129906Sbmilekic 31129906Sbmilekic#include "opt_param.h" 32129906Sbmilekic 33129906Sbmilekic#include <sys/param.h> 34129906Sbmilekic#include <sys/malloc.h> 35129906Sbmilekic#include <sys/systm.h> 36129906Sbmilekic#include <sys/mbuf.h> 37129906Sbmilekic#include <sys/domain.h> 38129906Sbmilekic#include <sys/eventhandler.h> 39129906Sbmilekic#include <sys/kernel.h> 40129906Sbmilekic#include <sys/protosw.h> 41129906Sbmilekic#include <sys/smp.h> 42129906Sbmilekic#include <sys/sysctl.h> 43129906Sbmilekic 44163606Srwatson#include <security/mac/mac_framework.h> 45163606Srwatson 46129906Sbmilekic#include <vm/vm.h> 47194454Salc#include <vm/vm_extern.h> 48194454Salc#include <vm/vm_kern.h> 49129906Sbmilekic#include <vm/vm_page.h> 50129906Sbmilekic#include <vm/uma.h> 51147537Ssilby#include <vm/uma_int.h> 52147537Ssilby#include <vm/uma_dbg.h> 53129906Sbmilekic 54129906Sbmilekic/* 55129906Sbmilekic * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA 56129906Sbmilekic * Zones. 57129906Sbmilekic * 58129906Sbmilekic * Mbuf Clusters (2K, contiguous) are allocated from the Cluster 59129906Sbmilekic * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the 60129906Sbmilekic * administrator so desires. 61129906Sbmilekic * 62129906Sbmilekic * Mbufs are allocated from a UMA Master Zone called the Mbuf 63129906Sbmilekic * Zone. 64129906Sbmilekic * 65129906Sbmilekic * Additionally, FreeBSD provides a Packet Zone, which it 66129906Sbmilekic * configures as a Secondary Zone to the Mbuf Master Zone, 67129906Sbmilekic * thus sharing backend Slab kegs with the Mbuf Master Zone. 68129906Sbmilekic * 69129906Sbmilekic * Thus common-case allocations and locking are simplified: 70129906Sbmilekic * 71129906Sbmilekic * m_clget() m_getcl() 72129906Sbmilekic * | | 73129906Sbmilekic * | .------------>[(Packet Cache)] m_get(), m_gethdr() 74129906Sbmilekic * | | [ Packet ] | 75129906Sbmilekic * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ] 76129906Sbmilekic * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ] 77129906Sbmilekic * | \________ | 78129906Sbmilekic * [ Cluster Keg ] \ / 79243995Spjd * | [ Mbuf Keg ] 80129906Sbmilekic * [ Cluster Slabs ] | 81129906Sbmilekic * | [ Mbuf Slabs ] 82129906Sbmilekic * \____________(VM)_________________/ 83151976Sandre * 84151976Sandre * 85156023Sglebius * Whenever an object is allocated with uma_zalloc() out of 86151976Sandre * one of the Zones its _ctor_ function is executed. The same 87156023Sglebius * for any deallocation through uma_zfree() the _dtor_ function 88151976Sandre * is executed. 89156023Sglebius * 90151976Sandre * Caches are per-CPU and are filled from the Master Zone. 91151976Sandre * 92156023Sglebius * Whenever an object is allocated from the underlying global 93151976Sandre * memory pool it gets pre-initialized with the _zinit_ functions. 94151976Sandre * When the Keg's are overfull objects get decomissioned with 95151976Sandre * _zfini_ functions and free'd back to the global memory pool. 96151976Sandre * 97129906Sbmilekic */ 98129906Sbmilekic 99243631Sandreint nmbufs; /* limits number of mbufs */ 100151976Sandreint nmbclusters; /* limits number of mbuf clusters */ 101155780Sandreint nmbjumbop; /* limits number of page size jumbo clusters */ 102151976Sandreint nmbjumbo9; /* limits number of 9k jumbo clusters */ 103151976Sandreint nmbjumbo16; /* limits number of 16k jumbo clusters */ 104129906Sbmilekicstruct mbstat mbstat; 105129906Sbmilekic 106185893Sbz/* 107185893Sbz * tunable_mbinit() has to be run before init_maxsockets() thus 108185893Sbz * the SYSINIT order below is SI_ORDER_MIDDLE while init_maxsockets() 109185893Sbz * runs at SI_ORDER_ANY. 110243639Sandre * 111243639Sandre * NB: This has to be done before VM init. 112185893Sbz */ 113129906Sbmilekicstatic void 114129906Sbmilekictunable_mbinit(void *dummy) 115129906Sbmilekic{ 116129906Sbmilekic 117239624Snp TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); 118243639Sandre if (nmbclusters == 0) 119243639Sandre nmbclusters = maxmbufmem / MCLBYTES / 4; 120239624Snp 121239624Snp TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop); 122239624Snp if (nmbjumbop == 0) 123243639Sandre nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4; 124239624Snp 125239624Snp TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9); 126239624Snp if (nmbjumbo9 == 0) 127243639Sandre nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6; 128239624Snp 129239624Snp TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16); 130239624Snp if (nmbjumbo16 == 0) 131243639Sandre nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6; 132243639Sandre 133243639Sandre /* 134243639Sandre * We need at least as many mbufs as we have clusters of 135243639Sandre * the various types added together. 136243639Sandre */ 137243639Sandre TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs); 138243639Sandre if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) 139243639Sandre nmbufs = lmax(maxmbufmem / MSIZE / 5, 140243996Spjd nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16); 141129906Sbmilekic} 142185893SbzSYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_mbinit, NULL); 143129906Sbmilekic 144157927Spsstatic int 145157927Spssysctl_nmbclusters(SYSCTL_HANDLER_ARGS) 146157927Sps{ 147157927Sps int error, newnmbclusters; 148157927Sps 149157927Sps newnmbclusters = nmbclusters; 150243995Spjd error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); 151157927Sps if (error == 0 && req->newptr) { 152243631Sandre if (newnmbclusters > nmbclusters && 153243631Sandre nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 154157927Sps nmbclusters = newnmbclusters; 155157927Sps uma_zone_set_max(zone_clust, nmbclusters); 156243631Sandre nmbclusters = uma_zone_get_max(zone_clust); 157157927Sps EVENTHANDLER_INVOKE(nmbclusters_change); 158157927Sps } else 159157927Sps error = EINVAL; 160157927Sps } 161157927Sps return (error); 162157927Sps} 163157927SpsSYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW, 164157927Sps&nmbclusters, 0, sysctl_nmbclusters, "IU", 165129906Sbmilekic "Maximum number of mbuf clusters allowed"); 166174292Srrs 167174292Srrsstatic int 168174292Srrssysctl_nmbjumbop(SYSCTL_HANDLER_ARGS) 169174292Srrs{ 170174292Srrs int error, newnmbjumbop; 171174292Srrs 172174292Srrs newnmbjumbop = nmbjumbop; 173243995Spjd error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); 174174292Srrs if (error == 0 && req->newptr) { 175243631Sandre if (newnmbjumbop > nmbjumbop && 176243631Sandre nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 177174292Srrs nmbjumbop = newnmbjumbop; 178174292Srrs uma_zone_set_max(zone_jumbop, nmbjumbop); 179243631Sandre nmbjumbop = uma_zone_get_max(zone_jumbop); 180174292Srrs } else 181174292Srrs error = EINVAL; 182174292Srrs } 183174292Srrs return (error); 184174292Srrs} 185174292SrrsSYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW, 186174292Srrs&nmbjumbop, 0, sysctl_nmbjumbop, "IU", 187243996Spjd "Maximum number of mbuf page size jumbo clusters allowed"); 188174292Srrs 189174292Srrsstatic int 190174292Srrssysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) 191174292Srrs{ 192174292Srrs int error, newnmbjumbo9; 193174292Srrs 194174292Srrs newnmbjumbo9 = nmbjumbo9; 195243995Spjd error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); 196174292Srrs if (error == 0 && req->newptr) { 197243631Sandre if (newnmbjumbo9 > nmbjumbo9&& 198243631Sandre nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 199174292Srrs nmbjumbo9 = newnmbjumbo9; 200174292Srrs uma_zone_set_max(zone_jumbo9, nmbjumbo9); 201243631Sandre nmbjumbo9 = uma_zone_get_max(zone_jumbo9); 202174292Srrs } else 203174292Srrs error = EINVAL; 204174292Srrs } 205174292Srrs return (error); 206174292Srrs} 207174292SrrsSYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW, 208174292Srrs&nmbjumbo9, 0, sysctl_nmbjumbo9, "IU", 209243995Spjd "Maximum number of mbuf 9k jumbo clusters allowed"); 210174292Srrs 211174292Srrsstatic int 212174292Srrssysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS) 213174292Srrs{ 214174292Srrs int error, newnmbjumbo16; 215174292Srrs 216174292Srrs newnmbjumbo16 = nmbjumbo16; 217243995Spjd error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); 218174292Srrs if (error == 0 && req->newptr) { 219243631Sandre if (newnmbjumbo16 > nmbjumbo16 && 220243631Sandre nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 221174292Srrs nmbjumbo16 = newnmbjumbo16; 222174292Srrs uma_zone_set_max(zone_jumbo16, nmbjumbo16); 223243631Sandre nmbjumbo16 = uma_zone_get_max(zone_jumbo16); 224174292Srrs } else 225174292Srrs error = EINVAL; 226174292Srrs } 227174292Srrs return (error); 228174292Srrs} 229174292SrrsSYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW, 230174292Srrs&nmbjumbo16, 0, sysctl_nmbjumbo16, "IU", 231151976Sandre "Maximum number of mbuf 16k jumbo clusters allowed"); 232174292Srrs 233243631Sandrestatic int 234243631Sandresysctl_nmbufs(SYSCTL_HANDLER_ARGS) 235243631Sandre{ 236243631Sandre int error, newnmbufs; 237174292Srrs 238243631Sandre newnmbufs = nmbufs; 239243995Spjd error = sysctl_handle_int(oidp, &newnmbufs, 0, req); 240243631Sandre if (error == 0 && req->newptr) { 241243631Sandre if (newnmbufs > nmbufs) { 242243631Sandre nmbufs = newnmbufs; 243243631Sandre uma_zone_set_max(zone_mbuf, nmbufs); 244243639Sandre nmbufs = uma_zone_get_max(zone_mbuf); 245243631Sandre EVENTHANDLER_INVOKE(nmbufs_change); 246243631Sandre } else 247243631Sandre error = EINVAL; 248243631Sandre } 249243631Sandre return (error); 250243631Sandre} 251243631SandreSYSCTL_PROC(_kern_ipc, OID_AUTO, nmbuf, CTLTYPE_INT|CTLFLAG_RW, 252243631Sandre&nmbufs, 0, sysctl_nmbufs, "IU", 253243631Sandre "Maximum number of mbufs allowed"); 254174292Srrs 255129906SbmilekicSYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat, 256129906Sbmilekic "Mbuf general information and statistics"); 257129906Sbmilekic 258129906Sbmilekic/* 259129906Sbmilekic * Zones from which we allocate. 260129906Sbmilekic */ 261129906Sbmilekicuma_zone_t zone_mbuf; 262129906Sbmilekicuma_zone_t zone_clust; 263129906Sbmilekicuma_zone_t zone_pack; 264155780Sandreuma_zone_t zone_jumbop; 265151976Sandreuma_zone_t zone_jumbo9; 266151976Sandreuma_zone_t zone_jumbo16; 267151976Sandreuma_zone_t zone_ext_refcnt; 268129906Sbmilekic 269129906Sbmilekic/* 270129906Sbmilekic * Local prototypes. 271129906Sbmilekic */ 272132987Sgreenstatic int mb_ctor_mbuf(void *, int, void *, int); 273132987Sgreenstatic int mb_ctor_clust(void *, int, void *, int); 274132987Sgreenstatic int mb_ctor_pack(void *, int, void *, int); 275129906Sbmilekicstatic void mb_dtor_mbuf(void *, int, void *); 276151976Sandrestatic void mb_dtor_clust(void *, int, void *); 277151976Sandrestatic void mb_dtor_pack(void *, int, void *); 278151976Sandrestatic int mb_zinit_pack(void *, int, int); 279151976Sandrestatic void mb_zfini_pack(void *, int); 280129906Sbmilekic 281129906Sbmilekicstatic void mb_reclaim(void *); 282129906Sbmilekicstatic void mbuf_init(void *); 283209390Sedstatic void *mbuf_jumbo_alloc(uma_zone_t, int, uint8_t *, int); 284129906Sbmilekic 285242304Skevlo/* Ensure that MSIZE must be a power of 2. */ 286135510SbrianCTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); 287135510Sbrian 288129906Sbmilekic/* 289129906Sbmilekic * Initialize FreeBSD Network buffer allocation. 290129906Sbmilekic */ 291177253SrwatsonSYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); 292129906Sbmilekicstatic void 293129906Sbmilekicmbuf_init(void *dummy) 294129906Sbmilekic{ 295129906Sbmilekic 296129906Sbmilekic /* 297129906Sbmilekic * Configure UMA zones for Mbufs, Clusters, and Packets. 298129906Sbmilekic */ 299151976Sandre zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, 300151976Sandre mb_ctor_mbuf, mb_dtor_mbuf, 301147537Ssilby#ifdef INVARIANTS 302151976Sandre trash_init, trash_fini, 303147537Ssilby#else 304151976Sandre NULL, NULL, 305147537Ssilby#endif 306151976Sandre MSIZE - 1, UMA_ZONE_MAXBUCKET); 307243997Spjd if (nmbufs > 0) 308243997Spjd nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); 309243999Spjd uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached"); 310151976Sandre 311148095Srwatson zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, 312151976Sandre mb_ctor_clust, mb_dtor_clust, 313147537Ssilby#ifdef INVARIANTS 314151976Sandre trash_init, trash_fini, 315147537Ssilby#else 316151976Sandre NULL, NULL, 317147537Ssilby#endif 318151976Sandre UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 319243997Spjd if (nmbclusters > 0) 320243997Spjd nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); 321243999Spjd uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached"); 322151976Sandre 323148095Srwatson zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, 324151976Sandre mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); 325129906Sbmilekic 326156023Sglebius /* Make jumbo frame zone too. Page size, 9k and 16k. */ 327155780Sandre zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE, 328153232Sandre mb_ctor_clust, mb_dtor_clust, 329153232Sandre#ifdef INVARIANTS 330153232Sandre trash_init, trash_fini, 331153232Sandre#else 332153232Sandre NULL, NULL, 333153232Sandre#endif 334153232Sandre UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 335243997Spjd if (nmbjumbop > 0) 336243997Spjd nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); 337243999Spjd uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached"); 338153232Sandre 339151976Sandre zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, 340151976Sandre mb_ctor_clust, mb_dtor_clust, 341151976Sandre#ifdef INVARIANTS 342151976Sandre trash_init, trash_fini, 343151976Sandre#else 344151976Sandre NULL, NULL, 345151976Sandre#endif 346151976Sandre UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 347243631Sandre uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc); 348243997Spjd if (nmbjumbo9 > 0) 349243997Spjd nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); 350243999Spjd uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached"); 351129906Sbmilekic 352151976Sandre zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, 353151976Sandre mb_ctor_clust, mb_dtor_clust, 354151976Sandre#ifdef INVARIANTS 355151976Sandre trash_init, trash_fini, 356151976Sandre#else 357151976Sandre NULL, NULL, 358151976Sandre#endif 359151976Sandre UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 360243631Sandre uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc); 361243997Spjd if (nmbjumbo16 > 0) 362243997Spjd nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); 363243999Spjd uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached"); 364151976Sandre 365151976Sandre zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int), 366151976Sandre NULL, NULL, 367151976Sandre NULL, NULL, 368151976Sandre UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 369151976Sandre 370151976Sandre /* uma_prealloc() goes here... */ 371151976Sandre 372129906Sbmilekic /* 373129906Sbmilekic * Hook event handler for low-memory situation, used to 374129906Sbmilekic * drain protocols and push data back to the caches (UMA 375129906Sbmilekic * later pushes it back to VM). 376129906Sbmilekic */ 377129906Sbmilekic EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, 378129906Sbmilekic EVENTHANDLER_PRI_FIRST); 379129906Sbmilekic 380129906Sbmilekic /* 381129906Sbmilekic * [Re]set counters and local statistics knobs. 382129906Sbmilekic * XXX Some of these should go and be replaced, but UMA stat 383129906Sbmilekic * gathering needs to be revised. 384129906Sbmilekic */ 385129906Sbmilekic mbstat.m_mbufs = 0; 386129906Sbmilekic mbstat.m_mclusts = 0; 387129906Sbmilekic mbstat.m_drain = 0; 388129906Sbmilekic mbstat.m_msize = MSIZE; 389129906Sbmilekic mbstat.m_mclbytes = MCLBYTES; 390129906Sbmilekic mbstat.m_minclsize = MINCLSIZE; 391129906Sbmilekic mbstat.m_mlen = MLEN; 392129906Sbmilekic mbstat.m_mhlen = MHLEN; 393129906Sbmilekic mbstat.m_numtypes = MT_NTYPES; 394129906Sbmilekic 395129906Sbmilekic mbstat.m_mcfail = mbstat.m_mpfail = 0; 396129906Sbmilekic mbstat.sf_iocnt = 0; 397129906Sbmilekic mbstat.sf_allocwait = mbstat.sf_allocfail = 0; 398129906Sbmilekic} 399129906Sbmilekic 400129906Sbmilekic/* 401174247Salc * UMA backend page allocator for the jumbo frame zones. 402174247Salc * 403174247Salc * Allocates kernel virtual memory that is backed by contiguous physical 404174247Salc * pages. 405174247Salc */ 406174247Salcstatic void * 407209390Sedmbuf_jumbo_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait) 408174247Salc{ 409174247Salc 410177921Salc /* Inform UMA that this allocator uses kernel_map/object. */ 411177921Salc *flags = UMA_SLAB_KERNEL; 412194454Salc return ((void *)kmem_alloc_contig(kernel_map, bytes, wait, 413195649Salc (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT)); 414174247Salc} 415174247Salc 416174247Salc/* 417129906Sbmilekic * Constructor for Mbuf master zone. 418129906Sbmilekic * 419129906Sbmilekic * The 'arg' pointer points to a mb_args structure which 420129906Sbmilekic * contains call-specific information required to support the 421151976Sandre * mbuf allocation API. See mbuf.h. 422129906Sbmilekic */ 423132987Sgreenstatic int 424132987Sgreenmb_ctor_mbuf(void *mem, int size, void *arg, int how) 425129906Sbmilekic{ 426129906Sbmilekic struct mbuf *m; 427129906Sbmilekic struct mb_args *args; 428132987Sgreen#ifdef MAC 429132987Sgreen int error; 430132987Sgreen#endif 431129906Sbmilekic int flags; 432129906Sbmilekic short type; 433129906Sbmilekic 434147537Ssilby#ifdef INVARIANTS 435147537Ssilby trash_ctor(mem, size, arg, how); 436147537Ssilby#endif 437129906Sbmilekic m = (struct mbuf *)mem; 438129906Sbmilekic args = (struct mb_args *)arg; 439129906Sbmilekic flags = args->flags; 440129906Sbmilekic type = args->type; 441129906Sbmilekic 442151976Sandre /* 443151976Sandre * The mbuf is initialized later. The caller has the 444156059Sglebius * responsibility to set up any MAC labels too. 445151976Sandre */ 446151976Sandre if (type == MT_NOINIT) 447151976Sandre return (0); 448151976Sandre 449129906Sbmilekic m->m_next = NULL; 450129906Sbmilekic m->m_nextpkt = NULL; 451151976Sandre m->m_len = 0; 452129947Sbmilekic m->m_flags = flags; 453151976Sandre m->m_type = type; 454129906Sbmilekic if (flags & M_PKTHDR) { 455129906Sbmilekic m->m_data = m->m_pktdat; 456129906Sbmilekic m->m_pkthdr.rcvif = NULL; 457184778Skmacy m->m_pkthdr.header = NULL; 458151976Sandre m->m_pkthdr.len = 0; 459129906Sbmilekic m->m_pkthdr.csum_flags = 0; 460151976Sandre m->m_pkthdr.csum_data = 0; 461162377Sandre m->m_pkthdr.tso_segsz = 0; 462162377Sandre m->m_pkthdr.ether_vtag = 0; 463186683Srwatson m->m_pkthdr.flowid = 0; 464129906Sbmilekic SLIST_INIT(&m->m_pkthdr.tags); 465129906Sbmilekic#ifdef MAC 466129906Sbmilekic /* If the label init fails, fail the alloc */ 467172930Srwatson error = mac_mbuf_init(m, how); 468132987Sgreen if (error) 469132987Sgreen return (error); 470129906Sbmilekic#endif 471129947Sbmilekic } else 472129906Sbmilekic m->m_data = m->m_dat; 473132987Sgreen return (0); 474129906Sbmilekic} 475129906Sbmilekic 476129906Sbmilekic/* 477151976Sandre * The Mbuf master zone destructor. 478129906Sbmilekic */ 479129906Sbmilekicstatic void 480129906Sbmilekicmb_dtor_mbuf(void *mem, int size, void *arg) 481129906Sbmilekic{ 482129906Sbmilekic struct mbuf *m; 483243995Spjd unsigned long flags; 484129906Sbmilekic 485129906Sbmilekic m = (struct mbuf *)mem; 486172462Skmacy flags = (unsigned long)arg; 487173029Sobrien 488172462Skmacy if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0) 489129906Sbmilekic m_tag_delete_chain(m, NULL); 490151976Sandre KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__)); 491173029Sobrien KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); 492147537Ssilby#ifdef INVARIANTS 493147537Ssilby trash_dtor(mem, size, arg); 494147537Ssilby#endif 495129906Sbmilekic} 496129906Sbmilekic 497151976Sandre/* 498151976Sandre * The Mbuf Packet zone destructor. 499151976Sandre */ 500129906Sbmilekicstatic void 501129906Sbmilekicmb_dtor_pack(void *mem, int size, void *arg) 502129906Sbmilekic{ 503129906Sbmilekic struct mbuf *m; 504129906Sbmilekic 505129906Sbmilekic m = (struct mbuf *)mem; 506129906Sbmilekic if ((m->m_flags & M_PKTHDR) != 0) 507129906Sbmilekic m_tag_delete_chain(m, NULL); 508151976Sandre 509151976Sandre /* Make sure we've got a clean cluster back. */ 510151976Sandre KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); 511151976Sandre KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__)); 512151976Sandre KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__)); 513175872Sphk KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__)); 514175872Sphk KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__)); 515151976Sandre KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__)); 516152130Sglebius KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__)); 517151976Sandre KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__)); 518147537Ssilby#ifdef INVARIANTS 519147537Ssilby trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg); 520147537Ssilby#endif 521166213Smohans /* 522173029Sobrien * If there are processes blocked on zone_clust, waiting for pages 523173029Sobrien * to be freed up, * cause them to be woken up by draining the 524173029Sobrien * packet zone. We are exposed to a race here * (in the check for 525173029Sobrien * the UMA_ZFLAG_FULL) where we might miss the flag set, but that 526173029Sobrien * is deliberate. We don't want to acquire the zone lock for every 527173029Sobrien * mbuf free. 528166213Smohans */ 529173029Sobrien if (uma_zone_exhausted_nolock(zone_clust)) 530173029Sobrien zone_drain(zone_pack); 531129906Sbmilekic} 532129906Sbmilekic 533129906Sbmilekic/* 534155780Sandre * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor. 535129906Sbmilekic * 536129906Sbmilekic * Here the 'arg' pointer points to the Mbuf which we 537151976Sandre * are configuring cluster storage for. If 'arg' is 538151976Sandre * empty we allocate just the cluster without setting 539151976Sandre * the mbuf to it. See mbuf.h. 540129906Sbmilekic */ 541132987Sgreenstatic int 542132987Sgreenmb_ctor_clust(void *mem, int size, void *arg, int how) 543129906Sbmilekic{ 544129906Sbmilekic struct mbuf *m; 545151976Sandre u_int *refcnt; 546168374Skmacy int type; 547168374Skmacy uma_zone_t zone; 548173029Sobrien 549147537Ssilby#ifdef INVARIANTS 550147537Ssilby trash_ctor(mem, size, arg, how); 551147537Ssilby#endif 552168374Skmacy switch (size) { 553168374Skmacy case MCLBYTES: 554168374Skmacy type = EXT_CLUSTER; 555168374Skmacy zone = zone_clust; 556168374Skmacy break; 557168374Skmacy#if MJUMPAGESIZE != MCLBYTES 558168374Skmacy case MJUMPAGESIZE: 559168374Skmacy type = EXT_JUMBOP; 560168374Skmacy zone = zone_jumbop; 561168374Skmacy break; 562168374Skmacy#endif 563168374Skmacy case MJUM9BYTES: 564168374Skmacy type = EXT_JUMBO9; 565168374Skmacy zone = zone_jumbo9; 566168374Skmacy break; 567168374Skmacy case MJUM16BYTES: 568168374Skmacy type = EXT_JUMBO16; 569168374Skmacy zone = zone_jumbo16; 570168374Skmacy break; 571168374Skmacy default: 572168374Skmacy panic("unknown cluster size"); 573168374Skmacy break; 574168374Skmacy } 575168374Skmacy 576129906Sbmilekic m = (struct mbuf *)arg; 577168374Skmacy refcnt = uma_find_refcnt(zone, mem); 578173029Sobrien *refcnt = 1; 579151976Sandre if (m != NULL) { 580151976Sandre m->m_ext.ext_buf = (caddr_t)mem; 581151976Sandre m->m_data = m->m_ext.ext_buf; 582151976Sandre m->m_flags |= M_EXT; 583151976Sandre m->m_ext.ext_free = NULL; 584175872Sphk m->m_ext.ext_arg1 = NULL; 585175872Sphk m->m_ext.ext_arg2 = NULL; 586151976Sandre m->m_ext.ext_size = size; 587151976Sandre m->m_ext.ext_type = type; 588168374Skmacy m->m_ext.ref_cnt = refcnt; 589151976Sandre } 590168374Skmacy 591132987Sgreen return (0); 592129906Sbmilekic} 593129906Sbmilekic 594151976Sandre/* 595151976Sandre * The Mbuf Cluster zone destructor. 596151976Sandre */ 597129906Sbmilekicstatic void 598129906Sbmilekicmb_dtor_clust(void *mem, int size, void *arg) 599129906Sbmilekic{ 600168374Skmacy#ifdef INVARIANTS 601168374Skmacy uma_zone_t zone; 602151976Sandre 603168374Skmacy zone = m_getzone(size); 604168374Skmacy KASSERT(*(uma_find_refcnt(zone, mem)) <= 1, 605152035Sandre ("%s: refcnt incorrect %u", __func__, 606168374Skmacy *(uma_find_refcnt(zone, mem))) ); 607168374Skmacy 608147537Ssilby trash_dtor(mem, size, arg); 609147537Ssilby#endif 610129906Sbmilekic} 611129906Sbmilekic 612129906Sbmilekic/* 613129906Sbmilekic * The Packet secondary zone's init routine, executed on the 614151976Sandre * object's transition from mbuf keg slab to zone cache. 615129906Sbmilekic */ 616132987Sgreenstatic int 617151976Sandremb_zinit_pack(void *mem, int size, int how) 618129906Sbmilekic{ 619129906Sbmilekic struct mbuf *m; 620129906Sbmilekic 621151976Sandre m = (struct mbuf *)mem; /* m is virgin. */ 622156428Sandre if (uma_zalloc_arg(zone_clust, m, how) == NULL || 623156428Sandre m->m_ext.ext_buf == NULL) 624132987Sgreen return (ENOMEM); 625152101Sandre m->m_ext.ext_type = EXT_PACKET; /* Override. */ 626147537Ssilby#ifdef INVARIANTS 627147537Ssilby trash_init(m->m_ext.ext_buf, MCLBYTES, how); 628147537Ssilby#endif 629132987Sgreen return (0); 630129906Sbmilekic} 631129906Sbmilekic 632129906Sbmilekic/* 633129906Sbmilekic * The Packet secondary zone's fini routine, executed on the 634129906Sbmilekic * object's transition from zone cache to keg slab. 635129906Sbmilekic */ 636129906Sbmilekicstatic void 637151976Sandremb_zfini_pack(void *mem, int size) 638129906Sbmilekic{ 639129906Sbmilekic struct mbuf *m; 640129906Sbmilekic 641129906Sbmilekic m = (struct mbuf *)mem; 642147537Ssilby#ifdef INVARIANTS 643147537Ssilby trash_fini(m->m_ext.ext_buf, MCLBYTES); 644147537Ssilby#endif 645129906Sbmilekic uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL); 646147652Ssilby#ifdef INVARIANTS 647147652Ssilby trash_dtor(mem, size, NULL); 648147652Ssilby#endif 649129906Sbmilekic} 650129906Sbmilekic 651129906Sbmilekic/* 652129906Sbmilekic * The "packet" keg constructor. 653129906Sbmilekic */ 654132987Sgreenstatic int 655132987Sgreenmb_ctor_pack(void *mem, int size, void *arg, int how) 656129906Sbmilekic{ 657129906Sbmilekic struct mbuf *m; 658129906Sbmilekic struct mb_args *args; 659132987Sgreen#ifdef MAC 660132987Sgreen int error; 661132987Sgreen#endif 662132987Sgreen int flags; 663129906Sbmilekic short type; 664129906Sbmilekic 665129906Sbmilekic m = (struct mbuf *)mem; 666129906Sbmilekic args = (struct mb_args *)arg; 667129906Sbmilekic flags = args->flags; 668129906Sbmilekic type = args->type; 669129906Sbmilekic 670147537Ssilby#ifdef INVARIANTS 671147537Ssilby trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how); 672147537Ssilby#endif 673129906Sbmilekic m->m_next = NULL; 674129947Sbmilekic m->m_nextpkt = NULL; 675129906Sbmilekic m->m_data = m->m_ext.ext_buf; 676151976Sandre m->m_len = 0; 677151976Sandre m->m_flags = (flags | M_EXT); 678151976Sandre m->m_type = type; 679173029Sobrien 680129906Sbmilekic if (flags & M_PKTHDR) { 681129906Sbmilekic m->m_pkthdr.rcvif = NULL; 682151976Sandre m->m_pkthdr.len = 0; 683151976Sandre m->m_pkthdr.header = NULL; 684129906Sbmilekic m->m_pkthdr.csum_flags = 0; 685151976Sandre m->m_pkthdr.csum_data = 0; 686162377Sandre m->m_pkthdr.tso_segsz = 0; 687162377Sandre m->m_pkthdr.ether_vtag = 0; 688186683Srwatson m->m_pkthdr.flowid = 0; 689129906Sbmilekic SLIST_INIT(&m->m_pkthdr.tags); 690129906Sbmilekic#ifdef MAC 691129906Sbmilekic /* If the label init fails, fail the alloc */ 692172930Srwatson error = mac_mbuf_init(m, how); 693132987Sgreen if (error) 694132987Sgreen return (error); 695129906Sbmilekic#endif 696129906Sbmilekic } 697151976Sandre /* m_ext is already initialized. */ 698151976Sandre 699132987Sgreen return (0); 700129906Sbmilekic} 701129906Sbmilekic 702194515Skmacyint 703194515Skmacym_pkthdr_init(struct mbuf *m, int how) 704194515Skmacy{ 705194515Skmacy#ifdef MAC 706194515Skmacy int error; 707194515Skmacy#endif 708194515Skmacy m->m_data = m->m_pktdat; 709194515Skmacy SLIST_INIT(&m->m_pkthdr.tags); 710194515Skmacy m->m_pkthdr.rcvif = NULL; 711194515Skmacy m->m_pkthdr.header = NULL; 712194515Skmacy m->m_pkthdr.len = 0; 713194515Skmacy m->m_pkthdr.flowid = 0; 714194515Skmacy m->m_pkthdr.csum_flags = 0; 715194515Skmacy m->m_pkthdr.csum_data = 0; 716194515Skmacy m->m_pkthdr.tso_segsz = 0; 717194515Skmacy m->m_pkthdr.ether_vtag = 0; 718194515Skmacy#ifdef MAC 719194515Skmacy /* If the label init fails, fail the alloc */ 720194515Skmacy error = mac_mbuf_init(m, how); 721194515Skmacy if (error) 722194515Skmacy return (error); 723194515Skmacy#endif 724194515Skmacy 725194515Skmacy return (0); 726194515Skmacy} 727194515Skmacy 728129906Sbmilekic/* 729129906Sbmilekic * This is the protocol drain routine. 730129906Sbmilekic * 731129906Sbmilekic * No locks should be held when this is called. The drain routines have to 732129906Sbmilekic * presently acquire some locks which raises the possibility of lock order 733129906Sbmilekic * reversal. 734129906Sbmilekic */ 735129906Sbmilekicstatic void 736129906Sbmilekicmb_reclaim(void *junk) 737129906Sbmilekic{ 738129906Sbmilekic struct domain *dp; 739129906Sbmilekic struct protosw *pr; 740129906Sbmilekic 741129906Sbmilekic WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, 742129906Sbmilekic "mb_reclaim()"); 743129906Sbmilekic 744129906Sbmilekic for (dp = domains; dp != NULL; dp = dp->dom_next) 745129906Sbmilekic for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 746129906Sbmilekic if (pr->pr_drain != NULL) 747129906Sbmilekic (*pr->pr_drain)(); 748129906Sbmilekic} 749